torch::randn
torch.randn(size)
torch.randn(row, col)
torch.randn(batch_size, sequence_length, input_features)
batch_size = row
sequence_length = col
input_features = hidden size
>>> import torch
>>> torch.randn(4)
tensor([-0.1007, 0.0667, 0.0822, -0.2458])
>>> torch.randn(2,3)
tensor([[ 0.5246, -0.2974, -1.0181],
[-0.7873, 0.6837, -0.3721]])
>>> torch.randn(2,3,4)
tensor([[[ 0.7940, 1.1223, 0.2736, 0.5900],
[-1.6618, -1.2831, -0.3418, 0.2202],
[-0.0702, -0.4113, 1.4427, 0.2688]],
[[-0.0545, 0.6486, 1.0529, -2.3303],
[-0.3028, -0.2045, 1.7163, -1.0495],
[ 0.2985, 0.8775, -0.2320, -0.5702]]])
>>>
tensor和lstm关系
import torch
import torch.nn as nn
lstm = nn.LSTM(input_size=2, hidden_size=3, num_layers=1, batch_first=true) # 传入参数
data = [torch.randn(1, 2) for _ in range(5)]
input = torch.cat(data).view(len(data), 1, -1)
(h0, c0) = (torch.randn(1, 1, 3), torch.randn(1, 1, 3)) #
output, (h, c) = lstm(input, (h0, c0)) # 输入输出
print('data:', data)
print('input:', input)
print('output:', output)
print('h:', h)
print('c:', c)
data: [tensor([[0.4167, 0.9958]]), tensor([[-1.6619, 0.0977]]), tensor([[-0.3533, -0.0643]]), tensor([[0.5458, 0.9554]]), tensor([[-0.6359, -0.4318]])]
input: tensor([[[ 0.4167, 0.9958]],
[[-1.6619, 0.0977]],
[[-0.3533, -0.0643]],
[[ 0.5458, 0.9554]],
[[-0.6359, -0.4318]]])
output: tensor([[[ 0.5286, -0.7768, 0.1083]],
[[ 0.1292, -0.6464, 0.3183]],
[[ 0.1304, -0.6921, 0.3207]],
[[ 0.1732, -0.6954, 0.2833]],
[[ 0.0555, -0.6110, 0.2997]]], grad_fn=<StackBackward0>)
h: tensor([[[ 0.0555, -0.6110, 0.2997]]], grad_fn=<StackBackward0>)
c: tensor([[[ 0.1291, -1.2786, 0.5819]]], grad_fn=<StackBackward0>)
input reshape
X, y
10, 20, 30 40
20, 30, 40 50
30, 40, 50 60
reshape from [samples, timesteps] into [samples, timesteps, features]
# Given that X is a numpy array
samples = X.shape[0]
steps = X.shape[1]
X = X.reshape(samples, steps, 1)
for why
You always have to give a three-dimensional array as an input to your LSTM network. Where the first dimension represents the batch size, the second dimension represents the number of time-steps you are feeding a sequence. And the third dimension represents the number of units in one input sequence. For example, input shape looks like (batch_size, time_steps, seq_len)
input and output relation
- torch.nn.LSTM(input_size=1, hidden_size =32, num_layers=1, batch_first=True)
- input: (N,L,H_{in})
- output: (N,L,H_{out})
where:
N = batch size, as same as rows
L = sequence length, as same as columns
H_{in} = input_size, as same as one column has how many elements
H_{out} = hidden_size
example for predicting stock price
import torch
import torch.nn as nn
import numpy as np
from .normalize import *
class LSTM(nn.Module):
def __init__(self, input_dim=1, hidden_dim=32, num_layers=2, output_dim=1):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
out, _ = self.lstm(x)
out = self.fc(out)
return out
def lstm_predict(df, price_col_name, pre_days=5, num_epochs=100):
# print(df)
df_temp = (df - df.mean()) / df.std()
x_train = []
y_train = []
data = df_temp[price_col_name].values
# print(data)
size = len(data)
for i in range(size-pre_days):
temp = data[i:i+pre_days]
x_train.append(temp)
temp = data[i+pre_days]
y_train.append(temp)
x_train = torch.tensor(np.array(x_train), dtype=torch.float32)
x_train = torch.reshape(x_train, (1, len(x_train), pre_days))
y_train = torch.tensor(y_train, dtype=torch.float32)
y_train = torch.reshape(y_train, (1, len(y_train), 1))
# print(x_train)
# print(y_train)
model = LSTM(input_dim=pre_days, output_dim=1)
loss_fn = torch.nn.MSELoss()
lr_val = 1/num_epochs
# print(lr_val)
optimiser = torch.optim.Adam(model.parameters(), lr=lr_val)
for t in range(num_epochs):
y_train_pred = model(x_train)
# print(y_train_pred)
loss = loss_fn(y_train_pred, y_train)
# print(loss)
loss.backward()
optimiser.step()
optimiser.zero_grad()
x_test = torch.tensor(data[-pre_days:], dtype=torch.float32)
x_test = torch.reshape(x_test, (1, 1, pre_days))
# print(x_test)
y_test_pred = model(x_test)
# print(y_test_pred)
ret = round(y_test_pred.item(), 2)
ret = ret * df.std() + df.mean()
ret = round(ret[price_col_name], 2)
# last_price = data[-1]
# print(f'{price_col_name},{last_price},{ret}')
model = None
loss = None
optimiser = None
return ret