机器学习配置过程 - pytorch - 《programming》

1
1
1
1

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 输入图像channel: 1; 输出channel: 6; 5*5卷积核     # 计算公式 W - d + 1
        self.conv1 = nn.Conv2d(1, 6, 5)     # Conv2d 接收4个参数, (batch_size, channel, height, width)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 线性映射关系: y = Wx + b
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    # backward 函数在使用 autograd 时自动定义
    def forward(self, x):
        # 2*2 pooling
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))   # pool 的默认 stride 为池化矩阵的宽度   # 32 -> 28 -> 14
        # pool方阵可以用一个数指定大小
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)    # 14 -> 10 -> 5, 16*5*5
        x = x.view(-1, self.num_flat_features(x))   # 计算了x的维数之后, 展开成一维的
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self, x):
        size = x.size()[1:]  # 出去批处理维度的其他所有维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
params = list(net.parameters())     # len(params) = 10
input = torch.randn(1, 1, 32, 32)
out = net(input)
'''
损失函数
'''
target = torch.randn(10)
target = target.view(1, -1)     # 使目标值与数据值尺寸一致
criterion = nn.MSELoss()
loss = criterion(out, target)
'''
反向传播
'''
# print("conv1.bias.grad before zero\n", net.conv1.bias.grad)   # None
net.zero_grad()            # 清除所有参数的梯度缓存。
# print("conv1.bias.grad before backward\n", net.conv1.bias.grad)   # None
loss.backward()   # 随机梯度的反向传播
# print("conv1.bias.grad after backward\n", net.conv1.bias.grad)   # Tensor 1*6
'''
反向传播过程
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss
'''
'''
更新权重
weight = weight - learning_rate * gradient
'''
learning_rate = 0.01
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
optimizer.zero_grad()
optimizer.step()   # 更新参数
# print("after update conv1's .weight---\n", params[0])   # 没变

过程框架：
导入数据集 —>> 编写模型 —>> 定义损失函数、优化函数 —>> 进行训练，得到损失值，进行优化，更新参数，再训练 —>> 测试（每个epoch 训练一次、测试一次？？）

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])   # 均值, 标准差。使图像在(-1, 1)范围内归一化
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)    # momentum ?
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
def train():
    for epoch in range(4):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()    # 将参数的梯度清零。
                                     # 因为训练的过程通常使用mini-batch方法，所以如果不将梯度清零的话，梯度会与上一个batch的数据相关
            # 前向传播 + 反向传播 + 优化
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()      # 通过 autograd 包，自动运算 tensor 的梯度
            optimizer.step()     # 得到前一步的梯度，再通过梯度下降 更新参数(step() 执行一次优化步骤)
            running_loss += loss.item()
            if i % 2000 == 1999:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i+1, running_loss / 2000))
                running_loss = 0.0
        correct = 0
        total = 0
        # 不用梯度来更新参数，提高运算性能
        with torch.no_grad():
            for data in testloader:
                # images.size() = [4, 3, 32, 32]; labels.size() = [4]
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = net(images)
                # outputs.data 是一个4*10的tensor
                _, predicted = torch.max(outputs.data, 1)  # 获得最大值的(具体值, 下标)
                total += labels.size(0)
                # predicted 和 labels 为什么是一个四维向量?  <= batch_size = 4
                # predicted == labels 为 tensor([ True,  True, False, False], device='cuda:0')等
                correct += (predicted == labels).sum().item()  # .sum() 计算出为真的个数
        print("Accuracy of the network on the 10000 test images: %d  %%" % (100 * correct / total))
    print("Finished training")
if __name__ == '__main__':
    train()