Halo
发布于 2022-11-01 / 195 阅读 / 0 评论 / 0 点赞

lstm常见参数解释

torch::randn

torch.randn(size)
torch.randn(row, col)
torch.randn(batch_size, sequence_length, input_features)
batch_size = row
sequence_length = col
input_features = hidden size

>>> import torch
>>> torch.randn(4)
tensor([-0.1007,  0.0667,  0.0822, -0.2458])
>>> torch.randn(2,3)
tensor([[ 0.5246, -0.2974, -1.0181],
        [-0.7873,  0.6837, -0.3721]])
>>> torch.randn(2,3,4)
tensor([[[ 0.7940,  1.1223,  0.2736,  0.5900],
         [-1.6618, -1.2831, -0.3418,  0.2202],
         [-0.0702, -0.4113,  1.4427,  0.2688]],

        [[-0.0545,  0.6486,  1.0529, -2.3303],
         [-0.3028, -0.2045,  1.7163, -1.0495],
         [ 0.2985,  0.8775, -0.2320, -0.5702]]])
>>> 

tensor和lstm关系

import torch
import torch.nn as nn

lstm = nn.LSTM(input_size=2, hidden_size=3, num_layers=1, batch_first=true)  # 传入参数
data = [torch.randn(1, 2) for _ in range(5)]
input = torch.cat(data).view(len(data), 1, -1)
(h0, c0) = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  #
output, (h, c) = lstm(input, (h0, c0))  # 输入输出
print('data:', data)
print('input:', input)
print('output:', output)
print('h:', h)
print('c:', c)
data: [tensor([[0.4167, 0.9958]]), tensor([[-1.6619,  0.0977]]), tensor([[-0.3533, -0.0643]]), tensor([[0.5458, 0.9554]]), tensor([[-0.6359, -0.4318]])]
input: tensor([[[ 0.4167,  0.9958]],

        [[-1.6619,  0.0977]],

        [[-0.3533, -0.0643]],

        [[ 0.5458,  0.9554]],

        [[-0.6359, -0.4318]]])
output: tensor([[[ 0.5286, -0.7768,  0.1083]],

        [[ 0.1292, -0.6464,  0.3183]],

        [[ 0.1304, -0.6921,  0.3207]],

        [[ 0.1732, -0.6954,  0.2833]],

        [[ 0.0555, -0.6110,  0.2997]]], grad_fn=<StackBackward0>)
h: tensor([[[ 0.0555, -0.6110,  0.2997]]], grad_fn=<StackBackward0>)
c: tensor([[[ 0.1291, -1.2786,  0.5819]]], grad_fn=<StackBackward0>)

input reshape

X,              y
10, 20, 30      40
20, 30, 40      50
30, 40, 50      60

reshape from [samples, timesteps] into [samples, timesteps, features]

# Given that X is a numpy array
samples = X.shape[0]
steps = X.shape[1]
X = X.reshape(samples, steps, 1)

for why

You always have to give a three-dimensional array as an input to your LSTM network. Where the first dimension represents the batch size, the second dimension represents the number of time-steps you are feeding a sequence. And the third dimension represents the number of units in one input sequence. For example, input shape looks like (batch_size, time_steps, seq_len)

input and output relation

  • torch.nn.LSTM(input_size=1, hidden_size =32, num_layers=1, batch_first=True)
  • input: (N,L,H_{in})
  • output: (N,L,H_{out})

where:
N = batch size, as same as rows
L = sequence length, as same as columns
H_{in} = input_size, as same as one column has how many elements
H_{out} = hidden_size

example for predicting stock price

import torch  
import torch.nn as nn
import numpy as np
from .normalize import *

class LSTM(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=32, num_layers=2, output_dim=1):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out
  
def lstm_predict(df, price_col_name, pre_days=5, num_epochs=100):
    # print(df)
    df_temp = (df - df.mean()) / df.std()
    x_train = []
    y_train = []
    data = df_temp[price_col_name].values
    # print(data)
    size = len(data)
    for i in range(size-pre_days):
        temp = data[i:i+pre_days]
        x_train.append(temp)
        temp = data[i+pre_days]
        y_train.append(temp)

    x_train = torch.tensor(np.array(x_train), dtype=torch.float32)
    x_train = torch.reshape(x_train, (1, len(x_train), pre_days))
    y_train = torch.tensor(y_train, dtype=torch.float32)
    y_train = torch.reshape(y_train, (1, len(y_train), 1))
    # print(x_train)
    # print(y_train)

    model = LSTM(input_dim=pre_days, output_dim=1)
    loss_fn = torch.nn.MSELoss()
    lr_val = 1/num_epochs
    # print(lr_val)
    optimiser = torch.optim.Adam(model.parameters(), lr=lr_val)
    for t in range(num_epochs):
        y_train_pred = model(x_train)
        # print(y_train_pred)
        loss = loss_fn(y_train_pred, y_train)
        # print(loss)
        loss.backward()
        optimiser.step()
        optimiser.zero_grad()
    x_test = torch.tensor(data[-pre_days:], dtype=torch.float32)
    x_test = torch.reshape(x_test, (1, 1, pre_days))
    # print(x_test)
    y_test_pred = model(x_test)
    # print(y_test_pred)
    ret = round(y_test_pred.item(), 2)
    ret = ret * df.std() + df.mean()
    ret = round(ret[price_col_name], 2)
    # last_price = data[-1]
    # print(f'{price_col_name},{last_price},{ret}')
    model = None
    loss = None
    optimiser = None
    return ret

评论