我想使用 PyTorch 中的神经网络预测特定股票的趋势。我按照指南¹来了解该类型程序的基本结构。但是,本指南仅适用于基于 x 过去一天的股票价值的单日预测(回顾)。
我的目标是看看这些预测是否可以在未来更进一步,所以过去的单日预测。因此,我修改了程序以根据神经网络先前预测的值进行递归预测。本质上,我从进行一天的预测开始,将值附加到进行前一个预测的回溯数组中,并使用第一天的预测值和给定的值对第二天进行新的预测前几天。
程序本身运行良好,但预测值似乎接近某个值。将预测值绘制成指数图(见下图)。
我正在寻找关于为什么观察到这种行为的解释和/或关于预测过去一天未来值的更好算法的建议。我很可能犯了一些明显的逻辑错误,因为这对我来说是全新的领域。
笔记
我正在使用 AlphaVantage 提供的数据集。示例中使用的股票数据集是 AMZN 股票。
代码
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import math
import time
import datetime
# ALGORITHM FOR FUTURE PREDICTIONS (this is where the issue lies)
def forward_future(model, mse, past, length):
results = np.zeros(length)
approach = past
for i in range(length):
pred_y = model(approach)
approach = torch.cat((approach, pred_y.unsqueeze(2)), dim=1)
#approach = torch.from_numpy(np.append(approach[:, 1:, :].detach().numpy(), pred_y.detach().numpy()[np.newaxis, :, :], axis=1)).type(torch.Tensor)
results[i] = pred_y
return results
# CONSTS
PATH_FILES = os.path.join(os.path.dirname(os.path.abspath(__file__)), "stockdata/raw")
FILE = "stock_AMZN.csv"
PATH = os.path.join(PATH_FILES, FILE)
lookback = 20
# DATA PROCESSING
df = pd.read_csv(PATH, lineterminator="|", usecols=["timestamp", "open", "high", "low", "close", "volume"]).sort_values("timestamp")[1:]
dates = df.loc[:, "timestamp"].to_numpy()
p_HIGH = df.loc[:, "high"].to_numpy()
p_LOW = df.loc[:, "low"].to_numpy()
p_MID = (p_HIGH + p_LOW) / 2.0
scaler = MinMaxScaler(feature_range=(-1, 1))
p_MID = scaler.fit_transform(pd.Series(p_MID).values.reshape(-1, 1))
# PREPARING DATA
def split(price, lookback):
d_RAW = price
d_CLEAN = []
for i in range(len(d_RAW) - lookback):
d_CLEAN.append(d_RAW[i: i + lookback])
d_CLEAN = np.array(d_CLEAN)
s_TRAIN_size = d_CLEAN.shape[0]
s_TRAIN_x = d_CLEAN[:s_TRAIN_size,:-1:]
s_TRAIN_y = d_CLEAN[:s_TRAIN_size, -1,:]
s_PRED = s_TRAIN_x[-1]
s_PRED = s_PRED[np.newaxis, :, :]
return [s_TRAIN_x, s_TRAIN_y, s_PRED]
s_TRAIN_x, s_TRAIN_y, s_PRED = split(p_MID, lookback)
s_TRAIN_x = torch.from_numpy(s_TRAIN_x).type(torch.Tensor)
s_TRAIN_y = torch.from_numpy(s_TRAIN_y).type(torch.Tensor)
s_PRED = torch.from_numpy(s_PRED).type(torch.Tensor)
# DEFINITION OF THE NEURAL NETWORK
dim_INPUT = 1
dim_HIDDEN = 32
dim_OUTPUT = 1
lay_NUM = 2
epo_NUM = 100
class GRU(nn.Module):
def __init__(self, dim_INPUT, dim_HIDDEN, lay_NUM, dim_OUTPUT):
super(GRU, self).__init__()
self.dim_HIDDEN = dim_HIDDEN
self.lay_NUM = lay_NUM
self.gru = nn.GRU(dim_INPUT, dim_HIDDEN, lay_NUM, batch_first = True)
self.fc = nn.Linear(dim_HIDDEN, dim_OUTPUT)
def forward(self, x):
h0 = torch.zeros(self.lay_NUM, x.size(0), self.dim_HIDDEN).requires_grad_()
out, (hn) = self.gru(x, (h0.detach()))
out = self.fc(out[:, -1, :])
return out
model = GRU(dim_INPUT = dim_INPUT, dim_HIDDEN = dim_HIDDEN, dim_OUTPUT = dim_OUTPUT, lay_NUM = lay_NUM)
criterion = torch.nn.MSELoss(reduction = "mean")
optimiser = torch.optim.Adam(model.parameters(), lr = 0.01)
# TRAINING
hist = np.zeros(epo_NUM)
t_initial = time.time()
for t in range(epo_NUM):
pred_TRAIN_y = model(s_TRAIN_x)
print(pred_TRAIN_y)
loss = criterion(pred_TRAIN_y, s_TRAIN_y)
print("Epoch %s\nMSE: %s"%(str(t), str(loss.item())))
hist[t] = loss.item()
optimiser.zero_grad()
loss.backward()
optimiser.step()
t_delta = time.time() - t_initial
print("Training Time: {}".format(t_delta))
# CALL TO MAKE FUTURE PREDICTIONS
prediction_size = 30
predictions = forward_future(model, hist[-1], s_PRED, prediction_size)
prediction_plot_x = range(len(p_MID) - prediction_size, len(p_MID))
# PREPARATION FOR PLOTTING
vfunc = np.vectorize(lambda x: round(x, 3))
p_MID = scaler.inverse_transform(p_MID)
predictions = scaler.inverse_transform(predictions[:, np.newaxis])
# PLOTTING
fig, (ax1, ax2) = plot.subplots(2)
ax1.plot(range(len(p_MID)), p_MID, color="blue", label="True")
ax1.plot(prediction_plot_x, predictions, color="green", label="Predictions")
plot.sca(ax1)
plot.xticks(range(len(p_MID)), dates, rotation="vertical")
plot.setp(ax1.get_xticklabels()[::1], visible=False)
ax1.grid(False)
ax1.legend()
ax2.plot(range(epo_NUM), hist)
ax2.set_ylabel("Loss")
ax2.set_xlabel("Epochs")
ax2.grid()
plot.show()
参考
¹ 使用 PyTorch 进行股票价格预测,Medium,https: //medium.com/swlh/stock-price-prediction-with-pytorch-37f52ae84632
