题目:03年到19年第一季度分季度的数据,13年之前只有传统汽车的销量,13年之后是传统汽车+新能源汽车的销量,需要预测未来三期传统汽车的销量~
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. data = pd.read_excel("C:\orange_credit\crdt\时序数据.xlsx", sheet_name='Sheet2', index_col=u'日期')
  4. #先画出日期和传统车型销量的图形,可以判断是非平稳的
  5. data.plot()
  6. plt.show()

image.jpeg

所使用的源代码:

  1. def test_stationarity(timeseries):
  2. # 滚动平均
  3. rolmean = pd.rolling_mean(timeseries, window=12)
  4. rolstd = pd.rolling_std(timeseries, window=12)
  5. ts_diff = timeseries - timeseries.shift()
  6. orig = timeseries.plot(color='blue', label='Original')
  7. mean = rolmean.plot(color='red', label='Rolling Mean')
  8. std = rolstd.plot(color='black', label='Rolling Std')
  9. diff = ts_diff.plot(color='green', label='Diff 1')
  10. plt.legend(loc='best')
  11. plt.title('Rolling Mean & Standard Deviation')
  12. plt.show(block=False)
  13. # adf检验
  14. print('Result of Dickry-Fuller test')
  15. dftest = adfuller(timeseries, autolag='AIC')
  16. dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of observations Used'])
  17. for key, value in dftest[4].items():
  18. dfoutput['Critical value(%s)' % key] = value
  19. print(dfoutput)
  20. #test_stationarity(ts)
  21. def test_stationarity(timeseries):
  22. # 滚动平均#差分#标准差
  23. rolmean = pd.rolling_mean(timeseries, window=12)
  24. ts_diff = timeseries - timeseries.shift()
  25. rolstd = pd.rolling_std(timeseries, window=12)
  26. orig = timeseries.plot(color='blue', label='Original')
  27. mean = rolmean.plot(color='red', label='Rolling 12 Mean')
  28. std = rolstd.plot(color='black', label='Rolling 12 Std')
  29. diff = ts_diff.plot(color='green', label='Diff 1')
  30. plt.legend(loc='best')
  31. plt.title('Rolling Mean and Standard Deviation and Diff 1')
  32. l1 = plt.axhline(y=0, linewidth=1, color='yellow')
  33. plt.show(block=False)
  34. # adf--AIC检验
  35. print('Result of Augment Dickry-Fuller test--AIC')
  36. dftest = adfuller(timeseries, autolag='AIC')
  37. dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', 'Lags Used',
  38. 'Number of observations Used'])
  39. for key, value in dftest[4].items():
  40. dfoutput['Critical value(%s)' % key] = value
  41. print(dfoutput)
  42. # adf--BIC检验
  43. print('-------------------------------------------')
  44. print('Result of Augment Dickry-Fuller test--BIC')
  45. dftest = adfuller(timeseries, autolag='BIC')
  46. dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', 'Lags Used',
  47. 'Number of observations Used'])
  48. for key, value in dftest[4].items():
  49. dfoutput['Critical value(%s)' % key] = value
  50. print(dfoutput)
  51. test_stationarity(ts)
  52. # 不取对数,直接减掉趋势
  53. ts.index = ts.index.to_timestamp()
  54. moving_avg = pd.rolling_mean(ts, 12)
  55. plt.plot(ts)
  56. plt.plot(moving_avg, color='red')
  57. plt.show()
  58. ts_moving_avg_diff = ts - moving_avg
  59. ts_moving_avg_diff.head(12)
  60. # 再一次确定平稳性
  61. ts_moving_avg_diff.dropna(inplace=True)
  62. test_stationarity(ts_moving_avg_diff)
  63. expwighted_avg = pd.ewma(ts, halflife=12)
  64. plt.plot(ts)
  65. plt.plot(expwighted_avg, color='red')
  66. plt.show()
  67. # 减掉指数平滑法
  68. ts_ewma_diff = ts - expwighted_avg
  69. test_stationarity(ts_ewma_diff)
  70. # 差分法1
  71. ts_diff = ts - ts.shift()
  72. plt.plot(ts_diff)
  73. plt.show()
  74. # 差分方法2
  75. fig = plt.figure(figsize=(12, 8))
  76. ax1 = fig.add_subplot(111)
  77. diff1 = ts.diff(1)
  78. # diff2 = diff1.diff(1)
  79. diff1.plot(ax=ax1)
  80. plt.show()
  81. ts_diff.dropna(inplace=True)
  82. test_stationarity(ts_diff)
  83. plt.show()
  84. from statsmodels.tsa.seasonal import seasonal_decompose
  85. decomposition = seasonal_decompose(ts)
  86. trend = decomposition.trend
  87. seasonal = decomposition.seasonal
  88. residual = decomposition.resid
  89. plt.subplot(411)
  90. plt.plot(ts, label='Original')
  91. plt.legend(loc='best')
  92. plt.subplot(412)
  93. plt.plot(trend, label='Trend')
  94. plt.legend(loc='best')
  95. plt.subplot(413)
  96. plt.plot(seasonal, label='Seasonality')
  97. plt.legend(loc='best')
  98. plt.subplot(414)
  99. plt.plot(residual, label='Residuals')
  100. plt.legend(loc='best')
  101. plt.tight_layout()
  102. plt.show()
  103. # 直接对残差进行分析,我们检查残差的稳定性
  104. ts_decompose = residual
  105. ts_decompose.dropna(inplace=True)
  106. test_stationarity(ts_decompose)
  107. fig = plt.figure(figsize=(12, 8))
  108. ax1 = fig.add_subplot(211)
  109. fig = sm.graphics.tsa.plot_acf(ts_diff, lags=20, ax=ax1)
  110. ax1.xaxis.set_ticks_position('bottom')
  111. fig.tight_layout();
  112. ax2 = fig.add_subplot(212)
  113. fig = sm.graphics.tsa.plot_pacf(ts_diff, lags=20, ax=ax2)
  114. ax2.xaxis.set_ticks_position('bottom')
  115. plt.show()
  116. fig.tight_layout();
  117. # trend
  118. fig = plt.figure(figsize=(12, 8))
  119. ax1 = fig.add_subplot(211)
  120. fig = sm.graphics.tsa.plot_acf(trend, lags=20, ax=ax1)
  121. ax1.xaxis.set_ticks_position('bottom')
  122. fig.tight_layout();
  123. ax2 = fig.add_subplot(212)
  124. fig = sm.graphics.tsa.plot_pacf(trend, lags=20, ax=ax2)
  125. ax2.xaxis.set_ticks_position('bottom')
  126. plt.show()
  127. fig.tight_layout();
  128. # seasonal
  129. fig = plt.figure(figsize=(12, 8))
  130. ax1 = fig.add_subplot(211)
  131. fig = sm.graphics.tsa.plot_acf(seasonal, lags=20, ax=ax1)
  132. ax1.xaxis.set_ticks_position('bottom')
  133. fig.tight_layout();
  134. ax2 = fig.add_subplot(212)
  135. fig = sm.graphics.tsa.plot_pacf(seasonal, lags=20, ax=ax2)
  136. ax2.xaxis.set_ticks_position('bottom')
  137. plt.show()
  138. fig.tight_layout();
  139. import itertools
  140. # Define the p, d and q parameters to take any value between 0 and 2
  141. p = d = q = range(0, 3)
  142. # Generate all different combinations of p, q and q triplets
  143. pdq = list(itertools.product(p, d, q))
  144. print('p,d,q', pdq, '\n')
  145. for param in pdq:
  146. try:
  147. model = ARIMA(ts_diff, order=param)
  148. # model=ARIMA(ts_log,order=(2,1,2))
  149. result_ARIMA = model.fit(disp=-1)
  150. print('ARIMA{}-- AIC:{} -- BIC:{} --HQIC:{}'.format(
  151. param, result_ARIMA.aic, result_ARIMA.bic, result_ARIMA.hqic))
  152. except:
  153. continue
  154. # AR model
  155. model = ARIMA(seasonal, order=(2, 0, 0))
  156. result_AR = model.fit(disp=-1)
  157. plt.plot(seasonal)
  158. plt.plot(result_AR.fittedvalues, color='blue')
  159. plt.title('RSS:%.4f' % sum(result_AR.fittedvalues - seasonal) ** 2)
  160. plt.show()
  161. # AR model
  162. model = ARIMA(ts_diff, order=(2, 0, 0))
  163. result_AR = model.fit(disp=-1)
  164. plt.plot(ts_diff)
  165. plt.plot(result_AR.fittedvalues, color='blue')
  166. plt.title('RSS:%.4f' % sum(result_AR.fittedvalues - ts_diff) ** 2)
  167. plt.show()
  168. # MA model
  169. model = ARIMA(ts_diff, order=(0, 0, 2))
  170. result_MA = model.fit(disp=-1)
  171. plt.plot(ts_diff)
  172. plt.plot(result_MA.fittedvalues, color='blue')
  173. plt.title('RSS:%.4f' % sum(result_MA.fittedvalues - ts_diff) ** 2)
  174. plt.show()
  175. # ARIMA 将两个结合起来 效果更好
  176. # warnings.filterwarnings("ignore") # specify to ignore warning messages
  177. import itertools
  178. # Define the p, d and q parameters to take any value between 0 and 2
  179. p = d = q = range(0, 3)
  180. # Generate all different combinations of p, q and q triplets
  181. pdq = list(itertools.product(p, d, q))
  182. print('p,d,q', pdq, '\n')
  183. for param in pdq:
  184. try:
  185. model = ARIMA(ts_diff, order=param)
  186. # model=ARIMA(ts_log,order=(2,1,2))
  187. result_ARIMA = model.fit(disp=-1)
  188. print('ARIMA{}- AIC:{} - BIC:{} - HQIC:{}'.format(param, result_ARIMA.aic, result_ARIMA.bic, result_ARIMA.hqic))
  189. except:
  190. continue
  191. # MA
  192. # model=ARIMA(ts_log,order=(2,1,2))
  193. model = ARIMA(ts_diff, order=(0, 0, 2))
  194. result_ARIMA_diff = model.fit(disp=-1)
  195. plt.plot(ts_diff)
  196. plt.plot(result_ARIMA_diff.fittedvalues, color='blue')
  197. plt.title('RSS:%.4f' % sum(result_ARIMA_diff.fittedvalues - ts_diff) ** 2)
  198. plt.show()
  199. # ARMA
  200. # model=ARIMA(ts_log,order=(2,1,2))
  201. model = ARIMA(ts_diff, order=(1, 0, 2))
  202. result_ARIMA_diff = model.fit(disp=-1)
  203. plt.plot(ts_diff)
  204. plt.plot(result_ARIMA_diff.fittedvalues, color='blue')
  205. plt.title('RSS:%.4f' % sum(result_ARIMA_diff.fittedvalues - ts_diff) ** 2)
  206. plt.show()
  207. # 检验部分
  208. fig = plt.figure(figsize=(12, 8))
  209. ax1 = fig.add_subplot(211)
  210. fig = sm.graphics.tsa.plot_acf(result_ARIMA_diff.resid.values.squeeze(), lags=40, ax=ax1)
  211. ax2 = fig.add_subplot(212)
  212. fig = sm.graphics.tsa.plot_pacf(result_ARIMA_diff.resid, lags=40, ax=ax2)
  213. plt.show()
  214. import statsmodels.api as sm
  215. print(sm.stats.durbin_watson(result_ARIMA_diff.resid.values))
  216. # 2附近即不存在一阶自相关
  217. # 检验结果是2.02424743723,说明不存在自相关性。
  218. resid = result_ARIMA_diff.resid # 残差
  219. fig = plt.figure(figsize=(12, 8))
  220. ax = fig.add_subplot(111)
  221. fig = qqplot(resid, line='q', ax=ax, fit=True)
  222. plt.show()
  223. # LJ检验
  224. r, q, p = sm.tsa.acf(result_ARIMA_diff.resid.values.squeeze(), qstat=True)
  225. data = np.c_[range(1, 41), r[1:], q, p]
  226. table = pd.DataFrame(data, columns=['lag', "AC", "Q", "Prob(>Q)"])
  227. print(table.set_index('lag'))
  228. # model=ARIMA(ts_log,order=(2,1,2))
  229. model_results = ARIMA(ts_diff, order=(1, 0, 2))
  230. result_ARIMA = model_results.fit(disp=-1)
  231. # fig, ax = plt.subplots(figsize=(12, 8))
  232. # ax = ts.ix['1962':].plot(ax=ax)
  233. # fig = result_ARIMA.plot_predict('1976-01', '1976-12', dynamic=True, ax=ax, plot_insample=False)
  234. result_ARIMA.plot_predict()
  235. result_ARIMA.forecast()
  236. plt.show()
  237. predictions_ARIMA_diff = pd.Series(result_ARIMA_diff.fittedvalues, copy=True)
  238. print(predictions_ARIMA_diff.head())
  239. print('------------------------')
  240. predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum()
  241. print(predictions_ARIMA_diff_cumsum.head())
  242. predictions_ARIMA = pd.Series(ts.ix[0], index=ts.index)
  243. predictions_ARIMA_cus = predictions_ARIMA.add(predictions_ARIMA_diff_cumsum, fill_value=0)
  244. predictions_ARIMA_cus.head()
  245. plt.plot(ts)
  246. plt.plot(predictions_ARIMA_cus)
  247. plt.title('RMSE: %.4f' % np.sqrt(sum((predictions_ARIMA_cus - ts) ** 2) / len(ts)))
  248. plt.show()
  249. # 对的--加上了预测的值
  250. predict_dta = result_ARIMA_diff.predict('1976-01', '1976-12', dynamic=True)
  251. AA = predictions_ARIMA_diff.append(predict_dta)
  252. predictions_ARIMA = pd.Series(ts.ix[0], index=pd.period_range('196202', '197612', freq='M'))
  253. AA.index = pd.period_range('196202', '197612', freq='M')
  254. predictions_ARIMA2 = predictions_ARIMA.add(AA.cumsum(), fill_value=0)
  255. ts.plot()
  256. predictions_ARIMA2.plot()
  257. plt.show()