Pytorch没有官方的高阶API,一般需要用户自己实现训练循环、验证循环、和预测循环。
作者通过仿照tf.keras.Model的功能对Pytorch的nn.Module进行了封装,
实现了 fit, validate,predict, summary 方法,相当于用户自定义高阶API。
并在其基础上实现线性回归模型和DNN二分类模型。

  1. import os
  2. import datetime
  3. #打印时间
  4. def printbar():
  5. nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  6. print("\n"+"=========="*8 + "%s"%nowtime)
  7. #mac系统上pytorch和matplotlib在jupyter中同时跑需要更改环境变量
  8. os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

线性回归模型

准备数据

  1. import numpy as np
  2. import pandas as pd
  3. from matplotlib import pyplot as plt
  4. import torch
  5. from torch import nn
  6. import torch.nn.functional as F
  7. from torch.utils.data import Dataset,DataLoader,TensorDataset
  8. #样本数量
  9. n = 400
  10. # 生成测试用数据集
  11. X = 10*torch.rand([n,2])-5.0 #torch.rand是均匀分布
  12. w0 = torch.tensor([[2.0],[-3.0]])
  13. b0 = torch.tensor([[10.0]])
  14. Y = X@w0 + b0 + torch.normal( 0.0,2.0,size = [n,1]) # @表示矩阵乘法,增加正态扰动
  1. # 数据可视化
  2. %matplotlib inline
  3. %config InlineBackend.figure_format = 'svg'
  4. plt.figure(figsize = (12,5))
  5. ax1 = plt.subplot(121)
  6. ax1.scatter(X[:,0],Y[:,0], c = "b",label = "samples")
  7. ax1.legend()
  8. plt.xlabel("x1")
  9. plt.ylabel("y",rotation = 0)
  10. ax2 = plt.subplot(122)
  11. ax2.scatter(X[:,1],Y[:,0], c = "g",label = "samples")
  12. ax2.legend()
  13. plt.xlabel("x2")
  14. plt.ylabel("y",rotation = 0)
  15. plt.show()
  16. # 数据可视化
  17. %matplotlib inline
  18. %config InlineBackend.figure_format = 'svg'
  19. plt.figure(figsize = (12,5))
  20. ax1 = plt.subplot(121)
  21. ax1.scatter(X[:,0].numpy(),Y[:,0].numpy(), c = "b",label = "samples")
  22. ax1.legend()
  23. plt.xlabel("x1")
  24. plt.ylabel("y",rotation = 0)
  25. ax2 = plt.subplot(122)
  26. ax2.scatter(X[:,1].numpy(),Y[:,0].numpy(), c = "g",label = "samples")
  27. ax2.legend()
  28. plt.xlabel("x2")
  29. plt.ylabel("y",rotation = 0)
  30. plt.show()

image.png

  1. #构建输入数据管道
  2. ds = TensorDataset(X,Y)
  3. ds_train,ds_valid = torch.utils.data.random_split(ds,[int(400*0.7),400-int(400*0.7)])
  4. dl_train = DataLoader(ds_train,batch_size = 10,shuffle=True,num_workers=2)
  5. dl_valid = DataLoader(ds_valid,batch_size = 10,num_workers=2)

定义模型

  1. # 继承用户自定义模型
  2. from torchkeras import Model
  3. class LinearRegression(Model):
  4. def __init__(self):
  5. super(LinearRegression, self).__init__()
  6. self.fc = nn.Linear(2,1)
  7. def forward(self,x):
  8. return self.fc(x)
  9. model = LinearRegression()
  1. model.summary(input_shape = (2,))

image.png

训练模型

  1. ### 使用fit方法进行训练
  2. def mean_absolute_error(y_pred,y_true):
  3. return torch.mean(torch.abs(y_pred-y_true))
  4. def mean_absolute_percent_error(y_pred,y_true):
  5. absolute_percent_error = (torch.abs(y_pred-y_true)+1e-7)/(torch.abs(y_true)+1e-7)
  6. return torch.mean(absolute_percent_error)
  7. model.compile(loss_func = nn.MSELoss(),
  8. optimizer= torch.optim.Adam(model.parameters(),lr = 0.01),
  9. metrics_dict={"mae":mean_absolute_error,"mape":mean_absolute_percent_error})
  10. dfhistory = model.fit(200,dl_train = dl_train, dl_val = dl_valid,log_step_freq = 20)

image.png

  1. # 结果可视化
  2. %matplotlib inline
  3. %config InlineBackend.figure_format = 'svg'
  4. w,b = model.state_dict()["fc.weight"],model.state_dict()["fc.bias"]
  5. plt.figure(figsize = (12,5))
  6. ax1 = plt.subplot(121)
  7. ax1.scatter(X[:,0],Y[:,0], c = "b",label = "samples")
  8. ax1.plot(X[:,0],w[0,0]*X[:,0]+b[0],"-r",linewidth = 5.0,label = "model")
  9. ax1.legend()
  10. plt.xlabel("x1")
  11. plt.ylabel("y",rotation = 0)
  12. ax2 = plt.subplot(122)
  13. ax2.scatter(X[:,1],Y[:,0], c = "g",label = "samples")
  14. ax2.plot(X[:,1],w[0,1]*X[:,1]+b[0],"-r",linewidth = 5.0,label = "model")
  15. ax2.legend()
  16. plt.xlabel("x2")
  17. plt.ylabel("y",rotation = 0)
  18. plt.show()

image.png

评估模型

  1. dfhistory.tail()

image.png

  1. %matplotlib inline
  2. %config InlineBackend.figure_format = 'svg'
  3. import matplotlib.pyplot as plt
  4. def plot_metric(dfhistory, metric):
  5. train_metrics = dfhistory[metric]
  6. val_metrics = dfhistory['val_'+metric]
  7. epochs = range(1, len(train_metrics) + 1)
  8. plt.plot(epochs, train_metrics, 'bo--')
  9. plt.plot(epochs, val_metrics, 'ro-')
  10. plt.title('Training and validation '+ metric)
  11. plt.xlabel("Epochs")
  12. plt.ylabel(metric)
  13. plt.legend(["train_"+metric, 'val_'+metric])
  14. plt.show()
  1. plot_metric(dfhistory,"loss")

image.png

  1. plot_metric(dfhistory,"mape")

image.png

  1. # 评估
  2. model.evaluate(dl_valid)

image.png

使用模型

  1. # 预测
  2. dl = DataLoader(TensorDataset(X))
  3. model.predict(dl)[0:10]

image.png


DNN二分类模型

此范例我们通过继承上述用户自定义 Model模型接口,实现DNN二分类模型。

准备数据

  1. import numpy as np
  2. import pandas as pd
  3. from matplotlib import pyplot as plt
  4. import torch
  5. from torch import nn
  6. %matplotlib inline
  7. %config InlineBackend.figure_format = 'svg'
  8. #正负样本数量
  9. n_positive,n_negative = 2000,2000
  10. #生成正样本, 小圆环分布
  11. r_p = 5.0 + torch.normal(0.0,1.0,size = [n_positive,1])
  12. theta_p = 2*np.pi*torch.rand([n_positive,1])
  13. Xp = torch.cat([r_p*torch.cos(theta_p),r_p*torch.sin(theta_p)],axis = 1)
  14. Yp = torch.ones_like(r_p)
  15. #生成负样本, 大圆环分布
  16. r_n = 8.0 + torch.normal(0.0,1.0,size = [n_negative,1])
  17. theta_n = 2*np.pi*torch.rand([n_negative,1])
  18. Xn = torch.cat([r_n*torch.cos(theta_n),r_n*torch.sin(theta_n)],axis = 1)
  19. Yn = torch.zeros_like(r_n)
  20. #汇总样本
  21. X = torch.cat([Xp,Xn],axis = 0)
  22. Y = torch.cat([Yp,Yn],axis = 0)
  23. #可视化
  24. plt.figure(figsize = (6,6))
  25. plt.scatter(Xp[:,0].numpy(),Xp[:,1].numpy(),c = "r")
  26. plt.scatter(Xn[:,0].numpy(),Xn[:,1].numpy(),c = "g")
  27. plt.legend(["positive","negative"]);

image.png

  1. ds = TensorDataset(X,Y)
  2. ds_train,ds_valid = torch.utils.data.random_split(ds,[int(len(ds)*0.7),len(ds)-int(len(ds)*0.7)])
  3. dl_train = DataLoader(ds_train,batch_size = 100,shuffle=True,num_workers=2)
  4. dl_valid = DataLoader(ds_valid,batch_size = 100,num_workers=2)

定义模型

  1. class Net(nn.Module):
  2. def __init__(self):
  3. super().__init__()
  4. self.fc1 = nn.Linear(2,4)
  5. self.fc2 = nn.Linear(4,8)
  6. self.fc3 = nn.Linear(8,1)
  7. def forward(self,x):
  8. x = F.relu(self.fc1(x))
  9. x = F.relu(self.fc2(x))
  10. y = nn.Sigmoid()(self.fc3(x))
  11. return y
  12. model = torchkeras.Model(Net())
  13. model.summary(input_shape =(2,))

image.png

训练模型

  1. # 准确率
  2. def accuracy(y_pred,y_true):
  3. y_pred = torch.where(y_pred>0.5,torch.ones_like(y_pred,dtype = torch.float32),torch.zeros_like(y_pred,dtype = torch.float32))
  4. acc = torch.mean(1-torch.abs(y_true-y_pred))
  5. return acc
  6. model.compile(loss_func = nn.BCELoss(),optimizer= torch.optim.Adam(model.parameters(),lr = 0.01),metrics_dict={"accuracy":accuracy})
  7. dfhistory = model.fit(100,dl_train = dl_train,dl_val = dl_valid,log_step_freq = 10)

image.png

  1. # 结果可视化
  2. fig, (ax1,ax2) = plt.subplots(nrows=1,ncols=2,figsize = (12,5))
  3. ax1.scatter(Xp[:,0],Xp[:,1], c="r")
  4. ax1.scatter(Xn[:,0],Xn[:,1],c = "g")
  5. ax1.legend(["positive","negative"]);
  6. ax1.set_title("y_true");
  7. Xp_pred = X[torch.squeeze(model.forward(X)>=0.5)]
  8. Xn_pred = X[torch.squeeze(model.forward(X)<0.5)]
  9. ax2.scatter(Xp_pred[:,0],Xp_pred[:,1],c = "r")
  10. ax2.scatter(Xn_pred[:,0],Xn_pred[:,1],c = "g")
  11. ax2.legend(["positive","negative"]);
  12. ax2.set_title("y_pred");

image.png

评估模型

  1. %matplotlib inline
  2. %config InlineBackend.figure_format = 'svg'
  3. import matplotlib.pyplot as plt
  4. def plot_metric(dfhistory, metric):
  5. train_metrics = dfhistory[metric]
  6. val_metrics = dfhistory['val_'+metric]
  7. epochs = range(1, len(train_metrics) + 1)
  8. plt.plot(epochs, train_metrics, 'bo--')
  9. plt.plot(epochs, val_metrics, 'ro-')
  10. plt.title('Training and validation '+ metric)
  11. plt.xlabel("Epochs")
  12. plt.ylabel(metric)
  13. plt.legend(["train_"+metric, 'val_'+metric])
  14. plt.show()
  1. plot_metric(dfhistory,"loss")

image.png

  1. plot_metric(dfhistory,"accuracy")

image.png

  1. model.evaluate(dl_valid)

image.png

使用模型

  1. model.predict(dl_valid)[0:10]

image.png