代码

  1. import copy
  2. import os
  3. import torch
  4. import torch.nn as nn
  5. import numpy as np
  6. import pandas as pd
  7. from sklearn.preprocessing import MinMaxScaler
  8. import datetime
  9. import joblib
  10. import matplotlib.pyplot as plt
  11. class LSTMRegression(nn.Module):
  12. """
  13. LSTM模型时间序列预测
  14. """
  15. def __init__(self, input_size, hidden_size, num_layers, output_size=1,):
  16. super().__init__()
  17. # LSTM模型
  18. self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
  19. # RNN模型
  20. # self.lstm = nn.RNN(input_size, hidden_size, num_layers)
  21. self.fc = nn.Linear(hidden_size, output_size)
  22. self.last_time = None
  23. self.seq_step = []
  24. def forward(self, _x):
  25. x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size)
  26. x = self.fc(x)
  27. return x
  28. class LSTMProduct:
  29. def __init__(self, seq_step=5, hidden_size=12, num_layers=2):
  30. self.seq_step = seq_step
  31. self.sate = None
  32. if os.path.exists('./parameter.pkl'):
  33. self.sate = torch.load("./parameter.pkl")
  34. # 预测模式
  35. self.lstm_model = LSTMRegression(seq_step, hidden_size=hidden_size, num_layers=num_layers)
  36. # 重新加载模型参数
  37. self.lstm_model.load_state_dict(self.sate["model"])
  38. else:
  39. self.lstm_model = LSTMRegression(seq_step, hidden_size=hidden_size, num_layers=num_layers)
  40. def load_data(self, path):
  41. """
  42. :param path: CSV文件路径, 时间间隔无法确定的情况下要考虑是否需要重采样
  43. :return:
  44. """
  45. df = pd.read_csv(path)
  46. df['time'] = pd.to_datetime(df['time'])
  47. # 根据时间进行排序
  48. df.sort_values(by='time', ascending=True, inplace=True)
  49. # 将时间设置成索引
  50. df2 = df.set_index(keys='time')
  51. # 时间重采样 1H
  52. df3 = df2.resample("1H").mean()
  53. index = df3.index
  54. data = df3['level'].values
  55. return index, data
  56. def create_dataset(self, data):
  57. """
  58. 创建数据集
  59. data格式: (index, values)
  60. """
  61. index, value = data
  62. dataset_x, dataset_y, dataset_y_index = [], [], []
  63. for i in range(len(value) - self.seq_step):
  64. _x = value[i:(i + self.seq_step)]
  65. dataset_x.append(_x)
  66. dataset_y.append(value[i + self.seq_step])
  67. dataset_y_index.append(index[i + self.seq_step])
  68. return np.array(dataset_x), np.array(dataset_y), dataset_y_index
  69. def data_parser(self, data):
  70. """
  71. 数据处理: 将数据归一化处理,不然损失结果非常大
  72. data格式: (index, values)
  73. """
  74. index, value = data
  75. scaler = MinMaxScaler(feature_range=(-1, 1))
  76. scaler_value = scaler.fit_transform(value.reshape(-1, 1)).reshape(-1, )
  77. joblib.dump(scaler, "scaler.model")
  78. return index, scaler_value
  79. def result_data_parser(self, out):
  80. """预测结果处理"""
  81. scaler = joblib.load("scaler.model")
  82. data = scaler.inverse_transform(np.array(out.view(-1).tolist()).reshape(-1, 1))
  83. return data
  84. def product(self, train_x):
  85. out = self.lstm_model(train_x)
  86. scaler = joblib.load("scaler.model")
  87. scaler_value = scaler.inverse_transform(np.array(out.tolist()).reshape(-1, 1)).reshape(-1, )
  88. return scaler_value.reshape(-1, ), out.view(-1).tolist()
  89. def train_model(self, train_data, epoch=5000, lr=0.01):
  90. """
  91. 训练模型
  92. """
  93. train_x, train_y = train_data
  94. loss_function = nn.MSELoss(reduce=True, size_average=True)
  95. optimizer = torch.optim.Adam(self.lstm_model.parameters(), lr=lr)
  96. min_loss = 100
  97. flag = 0
  98. for i in range(epoch):
  99. out = self.lstm_model(train_x)
  100. loss = loss_function(out, train_y)
  101. # 反向传播
  102. loss.backward()
  103. # 计算梯度
  104. optimizer.step()
  105. # 梯度0
  106. optimizer.zero_grad()
  107. # 损失值
  108. loss_value = loss.item()
  109. if (i+1) % 100 == 0:
  110. print('Epoch: {}, Loss:{:.6f}'.format(i+1, loss_value))
  111. # 保存损失最小的模型
  112. if min_loss > loss_value:
  113. self.sate = {"model": self.lstm_model.state_dict(), "last_seq": self.lstm_model.seq_step,
  114. "last_time": self.lstm_model.last_time}
  115. # 如果损失比较小提前结束训练
  116. if min_loss - loss_value <= 1e-8:
  117. flag += 1
  118. else:
  119. flag = 0
  120. if flag >= 3:
  121. torch.save(self.sate, './parameter.pkl')
  122. break
  123. min_loss = loss_value
  124. torch.save(self.sate, './parameter.pkl')
  125. def train(self, data_path='./train_data.csv'):
  126. """训练模型"""
  127. # 加载数据
  128. data = self.load_data(data_path)
  129. # # 数据处理
  130. parser_data = self.data_parser(data)
  131. self.lstm_model.last_time = parser_data[0][-1]
  132. self.lstm_model.seq_step = parser_data[-1][-self.seq_step:].tolist()
  133. # 如果是训练模型就训练模型
  134. dataset = self.create_dataset(parser_data)
  135. # 将数据改变形状,RNN 读入的数据维度是 (seq_size, batch_size, feature_size)
  136. train_x = dataset[0].reshape(1, -1, self.seq_step)
  137. train_y = dataset[1].reshape(1, -1, 1)
  138. # 转为pytorch的tensor对象
  139. train_x = torch.from_numpy(train_x).to(torch.float32)
  140. train_y = torch.from_numpy(train_y).to(torch.float32)
  141. # 训练模型
  142. self.train_model((train_x, train_y))
  143. return self
  144. def get_mse(self, data_1, data_2):
  145. """
  146. 获取均方误差
  147. :return:
  148. """
  149. input_ = torch.from_numpy(np.array(data_1))
  150. output = torch.from_numpy(np.array(data_2))
  151. loss_function = nn.MSELoss(reduce=True, size_average=True)
  152. return loss_function(input_, output).item()
  153. def forecast(self, num=5):
  154. forecast_list = []
  155. seq_step = copy.deepcopy(self.sate.get("last_seq"))
  156. last_time = self.sate.get("last_time")
  157. for i in range(num):
  158. train_x = np.array(seq_step[-self.seq_step:]).reshape(1, -1, self.seq_step)
  159. train_x = train_x.reshape(1, -1, self.seq_step)
  160. train_x = torch.from_numpy(train_x).to(torch.float32)
  161. product_data, scalar_product_data = self.product(train_x)
  162. current_time = last_time + np.timedelta64(1, 'h')
  163. last_time = current_time
  164. forecast_list.append((current_time.strftime("%Y-%m-%d %H:%M:%S"), product_data[0]))
  165. seq_step.extend(scalar_product_data)
  166. return forecast_list
  167. def test(self):
  168. # 未来24小时数据用来计算测试误差
  169. data_1 = [165, 142, 136,147,170,196,217,226,224,217,213,217,233,262,302,347,387,415, 423,410,377,333,283,235]
  170. data = [i[-1] for i in self.forecast(24)]
  171. print(self.get_mse(data, data_1))
  172. if __name__ == '__main__':
  173. lstm_model = LSTMProduct(seq_step=60, hidden_size=2, num_layers=1)
  174. # lstm_model.train(data_path="./train_data.csv")
  175. lstm_model.test()
  176. print(lstm_model.forecast(24))

数据

train_data.csv