1. import os
  2. import datetime
  3. #打印时间
  4. def printbar():
  5. nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  6. print("\n"+"=========="*8 + "%s"%nowtime)
  7. #mac系统上pytorch和matplotlib在jupyter中同时跑需要更改环境变量
  8. os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

准备数据

cifar2数据集为cifar10数据集的子集,只包括前两种类别airplane和automobile。
训练集有airplane和automobile图片各5000张,测试集有airplane和automobile图片各1000张。
cifar2任务的目标是训练一个模型来对飞机airplane和机动车automobile两种图片进行分类。

在Pytorch中构建图片数据管道通常有两种方法。
第一种是使用 torchvision中的datasets.ImageFolder来读取图片然后用 DataLoader来并行加载。
第二种是通过继承 torch.utils.data.Dataset 实现用户自定义读取逻辑然后用 DataLoader来并行加载。第二种方法是读取用户自定义数据集的通用方法,既可以读取图片数据集,也可以读取文本数据集。
本篇我们介绍第一种方法。

  1. import torch
  2. from torch import nn
  3. from torch.utils.data import Dataset,DataLoader
  4. from torchvision import transforms,datasets
  5. transform_train = transforms.Compose(
  6. [transforms.ToTensor()])
  7. transform_valid = transforms.Compose(
  8. [transforms.ToTensor()])
  9. ds_train = datasets.ImageFolder("../data/cifar2/train/",
  10. transform = transform_train,target_transform= lambda t:torch.tensor([t]).float())
  11. ds_valid = datasets.ImageFolder("../data/cifar2/test/",
  12. transform = transform_train,target_transform= lambda t:torch.tensor([t]).float())
  13. print(ds_train.class_to_idx)
  14. print(ds_valid.class_to_idx)
  15. dl_train = DataLoader(ds_train,batch_size = 50,shuffle = True,num_workers=0,drop_last=True)
  16. dl_valid = DataLoader(ds_valid,batch_size = 50,shuffle = True,num_workers=0,drop_last=True)

image.png

  1. %matplotlib inline
  2. %config InlineBackend.figure_format = 'svg'
  3. #查看部分样本
  4. from matplotlib import pyplot as plt
  5. plt.figure(figsize=(8,8))
  6. for i in range(9):
  7. img,label = ds_train[i]
  8. img = img.permute(1,2,0)
  9. ax=plt.subplot(3,3,i+1)
  10. ax.imshow(img.numpy())
  11. ax.set_title("label = %d"%label.item())
  12. ax.set_xticks([])
  13. ax.set_yticks([])
  14. plt.show()

image.png

  1. # # Pytorch的图片默认顺序是 Batch,Channel,Width,Height
  2. for x,y in dl_train:
  3. print(x.shape,y.shape)
  4. break
  5. for x,y in dl_valid:
  6. print(x.shape,y.shape)
  7. # print(y)
  8. break

image.png


定义模型

使用Pytorch通常有三种方式构建模型:

  1. 使用nn.Sequential按层顺序构建模型;
  2. 继承nn.Module基类构建自定义模型;
  3. 继承nn.Module基类构建模型并辅助应用模型容器(nn.Sequential,nn.ModuleList,nn.ModuleDict)进行封装。

此处选择通过继承nn.Module基类构建自定义模型。

  1. #测试AdaptiveMaxPool2d的效果
  2. pool = nn.AdaptiveMaxPool2d((1,1))
  3. t = torch.randn(10,8,32,32)
  4. pool(t).shape

image.png

  1. class Net(nn.Module):
  2. def __init__(self):
  3. super(Net, self).__init__()
  4. self.conv1 = nn.Conv2d(in_channels=3,out_channels=32,kernel_size = 3)
  5. self.pool = nn.MaxPool2d(kernel_size = 2,stride = 2)
  6. self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
  7. self.dropout = nn.Dropout2d(p = 0.1)
  8. self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
  9. self.flatten = nn.Flatten()
  10. self.linear1 = nn.Linear(64,32)
  11. self.relu = nn.ReLU()
  12. self.linear2 = nn.Linear(32,1)
  13. self.sigmoid = nn.Sigmoid()
  14. def forward(self,x):
  15. x = self.conv1(x)
  16. x = self.pool(x)
  17. x = self.conv2(x)
  18. x = self.pool(x)
  19. x = self.dropout(x)
  20. x = self.adaptive_pool(x)
  21. x = self.flatten(x)
  22. x = self.linear1(x)
  23. x = self.relu(x)
  24. x = self.linear2(x)
  25. y = self.sigmoid(x)
  26. return y
  27. net = Net()
  28. print(net)

image.png


训练模型

Pytorch通常需要用户编写自定义训练循环,训练循环的代码风格因人而异。
有3类典型的训练循环代码风格:

  1. 脚本形式训练循环;
  2. 函数形式训练循环;
  3. 类形式训练循环。

此处介绍一种较通用的函数形式训练循环。

  1. import pandas as pd
  2. from sklearn.metrics import roc_auc_score
  3. model = net
  4. model.optimizer = torch.optim.SGD(model.parameters(),lr = 0.01)
  5. model.loss_func = torch.nn.BCELoss()
  6. model.metric_func = lambda y_pred,y_true: roc_auc_score(y_true.data.numpy(),y_pred.data.numpy())
  7. model.metric_name = "auc"
  8. def train_step(model,features,labels):
  9. # 训练模式,dropout层发生作用
  10. model.train()
  11. # 梯度清零
  12. model.optimizer.zero_grad()
  13. # 正向传播求损失
  14. predictions = model(features)
  15. loss = model.loss_func(predictions,labels)
  16. # print(labels.shape)
  17. metric = model.metric_func(predictions,labels)
  18. # 反向传播求梯度
  19. loss.backward()
  20. model.optimizer.step()
  21. return loss.item(),metric.item()
  22. def valid_step(model,features,labels):
  23. # 预测模式,dropout层不发生作用
  24. model.eval()
  25. # 关闭梯度计算
  26. with torch.no_grad():
  27. predictions = model(features)
  28. loss = model.loss_func(predictions,labels)
  29. metric = model.metric_func(predictions,labels)
  30. return loss.item(), metric.item()
  31. # 测试train_step效果
  32. features,labels = next(iter(dl_train))
  33. train_step(model,features,labels)

image.png

def train_model(model,epochs,dl_train,dl_valid,log_step_freq):

    metric_name = model.metric_name
    dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name]) 
    print("Start Training...")
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("=========="*8 + "%s"%nowtime)

    for epoch in range(1,epochs+1):  

        # 1,训练循环-------------------------------------------------
        loss_sum = 0.0
        metric_sum = 0.0
        step = 1

        for step, (features,labels) in enumerate(dl_train, 1):

            loss,metric = train_step(model,features,labels)


            # 打印batch级别日志
            loss_sum += loss
            metric_sum += metric
            if step%log_step_freq == 0:   
                print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
                      (step, loss_sum/step, metric_sum/step))

        # 2,验证循环-------------------------------------------------
        val_loss_sum = 0.0
        val_metric_sum = 0.0
        val_step = 1

        for val_step, (features,labels) in enumerate(dl_valid, 1):

            val_loss,val_metric = valid_step(model,features,labels)

            val_loss_sum += val_loss
            val_metric_sum += val_metric

        # 3,记录日志-------------------------------------------------
        info = (epoch, loss_sum/step, metric_sum/step, 
                val_loss_sum/val_step, val_metric_sum/val_step)
        dfhistory.loc[epoch-1] = info

        # 打印epoch级别日志
        print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
              "  = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f") 
              %info)
        nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("\n"+"=========="*8 + "%s"%nowtime)

    print('Finished Training...')

    return dfhistory
epochs = 20

dfhistory = train_model(model,epochs,dl_train,dl_valid,log_step_freq = 50)

image.png


评估模型

dfhistory

image.png

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt

def plot_metric(dfhistory, metric):
    train_metrics = dfhistory[metric]
    val_metrics = dfhistory['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics, 'bo--')
    plt.plot(epochs, val_metrics, 'ro-')
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()
plot_metric(dfhistory,"loss")

image.png

plot_metric(dfhistory,"auc")

image.png


使用模型

def predict(model,dl):
    model.eval()
    with torch.no_grad():
        result = torch.cat([model.forward(t[0]) for t in dl])
    return(result.data)
#预测概率
y_pred_probs = predict(model,dl_valid)
y_pred_probs

image.png

#预测类别
y_pred = torch.where(y_pred_probs>0.5,
        torch.ones_like(y_pred_probs),torch.zeros_like(y_pred_probs))
y_pred

image.png


保存模型

推荐使用保存参数方式保存Pytorch模型。

print(model.state_dict().keys())

image.png

# 保存模型参数

torch.save(model.state_dict(), "../data/1_2_model_parameter.pkl")

net_clone = Net()
net_clone.load_state_dict(torch.load("../data/1_2_model_parameter.pkl"))

predict(net_clone,dl_valid)

image.png