当前位置: 首页 > news >正文

基于Pytorch框架的LSTM算法(二)——多维度单步预测

1.项目说明

**选用Close和Low两个特征,使用窗口time_steps窗口的2个特征,然后预测Close这一个特征数据未来一天的数据

当batch_first=True,则LSTM的inputs=(batch_size,time_steps,input_size)

batch_size = len(data)-time_steps
time_steps = 滑动窗口,本项目中值为lookback
input_size = 2【因为选取了Close和Low两个特征】**

2.数据集

参考:https://blog.csdn.net/qq_38633279/article/details/134245512?spm=1001.2014.3001.5501中的数据集

3.数据预处理

3.1 读取数据

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import seaborn as sns
import math, time
from sklearn.metrics import mean_squared_error


filepath = './data/rlData.csv'
data = pd.read_csv(filepath)
data = data.sort_values('Date')
data.head()
data.shape

sns.set_style("darkgrid")
plt.figure(figsize = (15,9))
plt.plot(data[['Close']])
plt.xticks(range(0,data.shape[0],20), data['Date'].loc[::20], rotation=45)
plt.title("****** Stock Price",fontsize=18, fontweight='bold')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (USD)',fontsize=18)
plt.show()

3.2 选取Close和Low两个特征

price = data[['Close', 'Low']]

3.3 数据归一化

scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))
price['Low'] = scaler.fit_transform(price['Low'].values.reshape(-1,1))

3.4 数据集的制造[batch_size,time_steps,input_size]

本次选取2个维度特征作为输出,因此,input_size =2
x_train.shape = [batch_size,time_steps,input_size]
y_train.shape = [batch_size,1]

1. 输入选取的是Close和Low列作为多维度的输入,所以选择的是data数据中的第一列和第二列作为x_train【因此input_size=2】
2. 输出是选取的Close列作为预测,所以选取data数据的第一列作为y_train【即Close列作为y_train】。

#2.数据集的制作
def split_data(stock, lookback):
    data_raw = stock.to_numpy() 
    data = []    
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data);
    test_set_size = int(np.round(0.2 * data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:train_set_size,:-1,:]  #x_train.shape =  (198, 4, 2)
    y_train = data[:train_set_size,-1,0:1] #y_train.shape =  (198, 1)
    
    x_test = data[train_set_size:,:-1,:]   #x_test.shape =  (49, 4, 2)
    y_test = data[train_set_size:,-1,0:1]  #y_test.shape =  (49, 1)
    
    return [torch.Tensor(x_train), torch.Tensor(y_train), torch.Tensor(x_test),torch.Tensor(y_test)]
   
lookback = 5
x_train, y_train, x_test, y_test = split_data(price, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)

4.LSTM算法

这里的LSTM算法和单维单步预测中的LSTM预测算法一模一样。只不过我们在制作数据集的时候,对于LSTM模型中输入不一样了。

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :]) 

5.预训练

input_dim = 2
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

hist = np.zeros(num_epochs)
lstm = []

for t in range(num_epochs):
    y_train_pred = model(x_train)

    loss = criterion(y_train_pred, y_train)
    hist[t] = loss.item()
    # print("Epoch ", t, "MSE: ", loss.item())
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

6.绘制预测值和真实值拟合图形,以及loss图形

predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train.detach().numpy()))


sns.set_style("darkgrid")    

fig = plt.figure()
fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='royalblue')
ax = sns.lineplot(x = predict.index, y = predict[0], label="Training Prediction (LSTM)", color='tomato')
ax.set_title('Stock price', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Cost (USD)", size = 14)
ax.set_xticklabels('', size=10)

plt.subplot(1, 2, 2)
ax = sns.lineplot(data=hist, color='royalblue')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
fig.set_figheight(6)
fig.set_figwidth(16)


# make predictions
y_test_pred = model(x_test)

# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
lstm.append(trainScore)
lstm.append(testScore)
lstm.append(training_time)

完整代码

问题描述:
选用Close和Low两个特征,使用窗口time_steps窗口的2个特征,然后预测Close这一个特征数据未来一天的数据
当batch_first=True,则LSTM的inputs=(batch_size,time_steps,input_size)
batch_size = len(data)-time_steps
time_steps = 滑动窗口,本项目中值为lookback
input_size = 2【因为选取了Close和Low两个特征】
#%%
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import seaborn as sns
import math, time
from sklearn.metrics import mean_squared_error


filepath = './data/rlData.csv'
data = pd.read_csv(filepath)
data = data.sort_values('Date')
data.head()
data.shape

sns.set_style("darkgrid")
plt.figure(figsize = (15,9))
plt.plot(data[['Close']])
plt.xticks(range(0,data.shape[0],20), data['Date'].loc[::20], rotation=45)
plt.title("****** Stock Price",fontsize=18, fontweight='bold')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (USD)',fontsize=18)
plt.show()


#1.选取特征工程2个
price = data[['Close', 'Low']]

scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))
price['Low'] = scaler.fit_transform(price['Low'].values.reshape(-1,1))

#2.数据集的制作
def split_data(stock, lookback):
    data_raw = stock.to_numpy() 
    data = []    
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data);
    test_set_size = int(np.round(0.2 * data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:train_set_size,:-1,:]  #x_train.shape =  (198, 4, 2)
    y_train = data[:train_set_size,-1,0:1] #y_train.shape =  (198, 1)
    
    x_test = data[train_set_size:,:-1,:]   #x_test.shape =  (49, 4, 2)
    y_test = data[train_set_size:,-1,0:1]  #y_test.shape =  (49, 1)
    
    return [torch.Tensor(x_train), torch.Tensor(y_train), torch.Tensor(x_test),torch.Tensor(y_test)]
   
lookback = 5
x_train, y_train, x_test, y_test = split_data(price, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)



class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

input_dim = 2
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

hist = np.zeros(num_epochs)
lstm = []

for t in range(num_epochs):
    y_train_pred = model(x_train)

    loss = criterion(y_train_pred, y_train)
    hist[t] = loss.item()
    # print("Epoch ", t, "MSE: ", loss.item())
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
    
predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train.detach().numpy()))


sns.set_style("darkgrid")    

fig = plt.figure()
fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='royalblue')
ax = sns.lineplot(x = predict.index, y = predict[0], label="Training Prediction (LSTM)", color='tomato')
ax.set_title('Stock price', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Cost (USD)", size = 14)
ax.set_xticklabels('', size=10)

plt.subplot(1, 2, 2)
ax = sns.lineplot(data=hist, color='royalblue')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
fig.set_figheight(6)
fig.set_figwidth(16)


# make predictions
y_test_pred = model(x_test)

# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
lstm.append(trainScore)
lstm.append(testScore)
lstm.append(training_time)

参考:https://gitee.com/qiangchen_sh/stock-prediction/blob/master/%E4%BB%A3%E7%A0%81/LSTM%E4%BB%8E%E7%90%86%E8%AE%BA%E5%9F%BA%E7%A1%80%E5%88%B0%E4%BB%A3%E7%A0%81%E5%AE%9E%E6%88%98%204%20%E5%A4%9A%E7%BB%B4%E7%89%B9%E5%BE%81%E8%82%A1%E7%A5%A8%E4%BB%B7%E6%A0%BC%E9%A2%84%E6%B5%8B_Pytorch.ipynb

http://www.dtcms.com/a/3785.html

相关文章:

  • 如何修改CentOS登录时默认目录
  • Django ORM:数据库操作的Python化艺术
  • Mac电脑录屏软件 Screen Recorder by Omi 中文最新
  • LLVM学习笔记(60)
  • 「随笔」浅谈2023年云计算的发展趋势
  • 【c趣编程】输入一个整数,判断其有几位
  • 在linux安装单机版hadoop-3.3.6
  • 视频批量混剪剪辑软件类似剪映设计一个模板后, 视频,图片,文字,转场,音频,特效都可以系统随机
  • 轻松与任何 SQL 数据库集成:Directus 助你无代码开发 | 开源日报 No.69
  • 通过一道题目带你深入了解WAF特性、PHP超级打印函数、ASCII码chr()对应表等原理[RoarCTF 2019]Easy Calc 1
  • vscode + cmake + opencv example
  • 音视频技术开发周刊 | 318
  • BP神经网络的数据分类——语音特征信号分类
  • 成都3瓜成都渣女1+2,成都75页ppt下载查看攻略分享!成都三瓜ppt事件分享
  • 【pytest】html报告修改和汉化
  • C#中.NET 7.0控制台应用使用LINQtoSQL、LINQtoXML
  • 第一章: SpringBoot 简介
  • 设计模式 -- 策略模式(Strategy Pattern)
  • Zookeeper3.7.1分布式安装部署
  • Spring底层原理学习笔记--第三讲--(bean生命周期与模板方法)
  • 51单片机-串口通信
  • 求2个字符串的最短编辑距离 java 实现
  • ChatGPT和API发生重大中断!
  • 面试--springboot基础
  • 跨足泛娱乐:TikTok如何重新定义娱乐产业?
  • 有限域的Fast Multiplication和Modular Reduction算法实现
  • Flink SQL TopN语句详解
  • 【MongoDB-Redis-MySQL-Elasticsearch-Kibana-RabbitMQ-MinIO】Java全栈开发软件一网打尽
  • flutter开发实战-TweenSequence实现动画序列
  • 单通道低压 H 桥电机驱动芯片AT9110H 兼容L9110 马达驱动芯片