训练模型代码流程

Pytorch通常需要用户编写自定义训练循环,训练循环的代码风格因人而异。

有3类典型的训练循环代码风格:脚本形式训练循环,函数形式训练循环,类形式训练循环。

1. 脚本循环风格

  1. net = nn.Sequential()
  2. net.add_module("conv1",nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3))
  3. net.add_module("pool1",nn.MaxPool2d(kernel_size = 2,stride = 2))
  4. net.add_module("conv2",nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5))
  5. net.add_module("pool2",nn.MaxPool2d(kernel_size = 2,stride = 2))
  6. net.add_module("dropout",nn.Dropout2d(p = 0.1))
  7. net.add_module("adaptive_pool",nn.AdaptiveMaxPool2d((1,1)))
  8. net.add_module("flatten",nn.Flatten())
  9. net.add_module("linear1",nn.Linear(64,32))
  10. net.add_module("relu",nn.ReLU())
  11. net.add_module("linear2",nn.Linear(32,10))
  12. print(net)
  1. import datetime
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.metrics import accuracy_score
  5. def accuracy(y_pred,y_true):
  6. y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1).data
  7. return accuracy_score(y_true,y_pred_cls)
  8. loss_func = nn.CrossEntropyLoss()
  9. optimizer = torch.optim.Adam(params=net.parameters(),lr = 0.01)
  10. metric_func = accuracy
  11. metric_name = "accuracy"
  12. epochs = 3
  13. log_step_freq = 100
  14. dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name])
  15. print("Start Training...")
  16. nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  17. print("=========="*8 + "%s"%nowtime)
  18. for epoch in range(1,epochs+1):
  19. # 1,训练循环-------------------------------------------------
  20. net.train()
  21. loss_sum = 0.0
  22. metric_sum = 0.0
  23. step = 1
  24. for step, (features,labels) in enumerate(dl_train, 1):
  25. # 梯度清零
  26. optimizer.zero_grad()
  27. # 正向传播求损失
  28. predictions = net(features)
  29. loss = loss_func(predictions,labels)
  30. metric = metric_func(predictions,labels)
  31. # 反向传播求梯度
  32. loss.backward()
  33. optimizer.step()
  34. # 打印batch级别日志
  35. loss_sum += loss.item()
  36. metric_sum += metric.item()
  37. if step%log_step_freq == 0:
  38. print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
  39. (step, loss_sum/step, metric_sum/step))
  40. # 2,验证循环-------------------------------------------------
  41. net.eval()
  42. val_loss_sum = 0.0
  43. val_metric_sum = 0.0
  44. val_step = 1
  45. for val_step, (features,labels) in enumerate(dl_valid, 1):
  46. with torch.no_grad():
  47. predictions = net(features)
  48. val_loss = loss_func(predictions,labels)
  49. val_metric = metric_func(predictions,labels)
  50. val_loss_sum += val_loss.item()
  51. val_metric_sum += val_metric.item()
  52. # 3,记录日志-------------------------------------------------
  53. info = (epoch, loss_sum/step, metric_sum/step,
  54. val_loss_sum/val_step, val_metric_sum/val_step)
  55. dfhistory.loc[epoch-1] = info
  56. # 打印epoch级别日志
  57. print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
  58. " = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f")
  59. %info)
  60. nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  61. print("\n"+"=========="*8 + "%s"%nowtime)
  62. print('Finished Training...')

2. 函数模式

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layers = nn.ModuleList([
            nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Dropout2d(p = 0.1),
            nn.AdaptiveMaxPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,10)]
        )
    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
        return x
net = Net()
def train_step(model,features,labels):

    # 训练模式,dropout层发生作用
    model.train()

    # 梯度清零
    model.optimizer.zero_grad()

    # 正向传播求损失
    predictions = model(features)
    loss = model.loss_func(predictions,labels)
    metric = model.metric_func(predictions,labels)

    # 反向传播求梯度
    loss.backward()
    model.optimizer.step()

    return loss.item(),metric.item()

@torch.no_grad()
def valid_step(model,features,labels):

    # 预测模式,dropout层不发生作用
    model.eval()

    predictions = model(features)
    loss = model.loss_func(predictions,labels)
    metric = model.metric_func(predictions,labels)

    return loss.item(), metric.item()
def train_model(model,epochs,dl_train,dl_valid,log_step_freq):

    metric_name = model.metric_name
    dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name]) 
    print("Start Training...")
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("=========="*8 + "%s"%nowtime)

    for epoch in range(1,epochs+1):  

        # 1,训练循环-------------------------------------------------
        loss_sum = 0.0
        metric_sum = 0.0
        step = 1

        for step, (features,labels) in enumerate(dl_train, 1):

            loss,metric = train_step(model,features,labels)

            # 打印batch级别日志
            loss_sum += loss
            metric_sum += metric
            if step%log_step_freq == 0:   
                print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
                      (step, loss_sum/step, metric_sum/step))

        # 2,验证循环-------------------------------------------------
        val_loss_sum = 0.0
        val_metric_sum = 0.0
        val_step = 1

        for val_step, (features,labels) in enumerate(dl_valid, 1):

            val_loss,val_metric = valid_step(model,features,labels)

            val_loss_sum += val_loss
            val_metric_sum += val_metric

        # 3,记录日志-------------------------------------------------
        info = (epoch, loss_sum/step, metric_sum/step, 
                val_loss_sum/val_step, val_metric_sum/val_step)
        dfhistory.loc[epoch-1] = info

        # 打印epoch级别日志
        print(("\nEPOCH = %d, loss = %.3f,"+ metric_name + \
              "  = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f") 
              %info)
        nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("\n"+"=========="*8 + "%s"%nowtime)

    print('Finished Training...')
    return dfhistory

3. Torchkeras

https://github.com/lyhue1991

这位大佬讲pytorch进行了进一步封装,使得torch训练可以像keras一样简单,源码也基本可以读懂。本Torch系列也来自他的20天eatpytorch系列,表示敬意💪!

orchkeras 是在pytorch上实现的仿keras的高层次Model接口。有了它,你可以像Keras那样,对pytorch构建的模型进行summary,compile,fit,evaluate , predict五连击。一切都像行云流水般自然。

不过我这里必须要提醒的是,简便的同时,往往带来了更多的复杂。免费的往往是最贵的。

用到的话去github查就行,这里就不写了,还是建议用原生态。