摘要

:::info 用PyTorch实现了MNIST手写数据集10分类问题 :::

前言

:::info MNIST数据集我就不介绍了
主要就是这个数据集的每张照片的shape=(28, 28), 里面包含了从0~910个数字,本文就是利用神经网络的方法去构建一个能够识别0~9,也就是10种数字的模型 :::

导入库

  1. import torch
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from torchvision.datasets import MNIST
  5. from torchvision.transforms import transforms
  6. from torch import optim
  7. import torch.nn as nn
  8. import torch.nn.functional as F

定义参数

# params define
batch_size = 200
learning_rate = 0.001
epochs = 20

加载数据集

:::tips 此处加载的是MNIST在线数据集, 因此首次运行此代码的时候,要联网下载 :::

# 加载数据集
train_loader = torch.utils.data.DataLoader(
    MNIST(root='./data', train=True, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        # 进行标准化处理, 标准化处理指的是将data减去均值, 然后÷标准差, 这样就得到均值为0, 方差为1的数据集
        transforms.Normalize((0.1307,), (0.3081,))  # 搞懂这两个参数
    ])),
    batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    MNIST(root='./data', train=False, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))  # todo 搞懂这两个参数
    ])),
    batch_size=batch_size, shuffle=True
)

定义神经网络模型

class mnist_model(nn.Module):
    def __init__(self):
        super(mnist_model, self).__init__()
        # 参数的随机初始化
        # 横向200个, 纵向784个
        w1, b1 = torch.randn(200, 784, requires_grad=True), torch.randn(200, requires_grad=True)
        w2, b2 = torch.randn(200, 200, requires_grad=True), torch.randn(200, requires_grad=True)
        w3, b3 = torch.randn(10, 200, requires_grad=True), torch.zeros(10, requires_grad=True)  # 全填充0

        # 下面是参数的初始化操作, 如果没有这个初始化操作的话, 可能造成梯度后期无法更新
        torch.nn.init.kaiming_normal_(w1)
        torch.nn.init.kaiming_normal_(w2)
        torch.nn.init.kaiming_normal_(w3)

    def forward(self, x):
        # w.t() 等效于 w.T 都是求转置矩阵
        x = x @ w1.t() + b1
        # F.relu()函数将小于0的数据置为0, 大于0的数据不变
        x = F.relu(x)
        x = x @ w2.t() + b2
        x = F.relu(x)
        x = x @ w3.t() + b3
        x = F.relu(x)
        return x

定义交叉熵函数

:::success 解释一下CrossEntropyLoss()
正常的过程应该是
1. 计算SoftMax()
2. 计算LogSoftMax()
3. 计算NLLLoss()
而CrossEntropyLoss()将这三步融合了, 只需要调用一次就好了 :::

criteon = nn.CrossEntropyLoss()

定义一个显示图片的函数

def show_img(img_data):
    if not isinstance(img_data, torch.Tensor):
        raise ValueError('instance error')
    if img_data.shape != (28, 28):
        raise ValueError('shape error')
    from matplotlib import pyplot as plt
    plt.imshow(img_data)
    plt.show()

创建模型实例

# 创建模型实例
model = mnist_model()

创建优化器

optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)

编写train的代码

:::tips 这里是每进行一个epoch之后, 就进行测量Test集的准确度
经过多个epoch之后,测量最后一个epoch对应的模型权重对Test测量出的准确度 Accuracy, 若Accuracy>0.95则break出当前的for循环 :::

for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # -1是自动推断的意思
        data = data.view(-1, 28*28)
        # todo 我也不知道这算是前向传播还是反向传播
        logits = model(data)  # logits是经过3个层计算出来的结果, shape(1, 10)
        # 计算loss交叉熵
        loss = criteon(logits, target)  # logits是train出来的targets, target是real的targets, 对二者求loss, 采用的是CrossEntropyLoss()交叉熵
        # 进行梯度下降算法
        optimizer.zero_grad()
        """
            loss是损失值, loss里带logits, 而logits带w1, w2, w3, b1, b2, b3, 同时这6个变量带required_grad标志
            loss.backward()是对其进行求导, 求的导数保存在grad中
        """
        loss.backward()
        """
            根据上一步求得的导数信息, 进行梯度下降
        """
        optimizer.step()
        # 打印日志 每一轮完成的是200个, 总共300轮, 共60000条
        if (batch_idx+1) % 100 == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)] \t Loss:{:.06f}".format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()
            ))
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        logits = forward(data)
        # .item()就是获取其数值的意思
        test_loss += criteon(logits, target).item()
        # logits, shape(1, 10), 预测的是10个分类中的可能性, logits.data.max(1)返回的是可能性最大的哪一个
        pred = logits.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\n Test set: Average loss : {:.4f}, Accuracy:{}/{} ({:.04f}%) \n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)
    ))
    if 100. * correct / len(test_loader.dataset) > 95:
        print('Current w1 & w2 & w3 is \n{} \n{} \n{}'.format(w1, w2, w3))
        break
    # print('\n w1:{}  w2:{}  w3:{}'.format(w1, w2, w3))
    # print('Current w1 & w2 & w3 is \n{} \n{} \n{}'.format(w1, w2, w3))

Train Epoch: 0 [19800/60000 (33%)] Loss:0.510757 Train Epoch: 0 [39800/60000 (66%)] Loss:0.489801 Train Epoch: 0 [59800/60000 (100%)] Loss:0.397195

Test set: Average loss : 0.0019, Accuracy:8849/10000 (88.4900%)

Train Epoch: 1 [19800/60000 (33%)] Loss:0.283614 Train Epoch: 1 [39800/60000 (66%)] Loss:0.533774 Train Epoch: 1 [59800/60000 (100%)] Loss:0.394449

Test set: Average loss : 0.0018, Accuracy:8919/10000 (89.1900%)

Train Epoch: 2 [19800/60000 (33%)] Loss:0.431440 Train Epoch: 2 [39800/60000 (66%)] Loss:0.325441 Train Epoch: 2 [59800/60000 (100%)] Loss:0.360078

Test set: Average loss : 0.0018, Accuracy:8972/10000 (89.7200%)

Train Epoch: 3 [19800/60000 (33%)] Loss:0.329010 Train Epoch: 3 [39800/60000 (66%)] Loss:0.328927 Train Epoch: 3 [59800/60000 (100%)] Loss:0.267577

Test set: Average loss : 0.0017, Accuracy:9001/10000 (90.0100%)

Train Epoch: 4 [19800/60000 (33%)] Loss:0.298385 Train Epoch: 4 [39800/60000 (66%)] Loss:0.276086 Train Epoch: 4 [59800/60000 (100%)] Loss:0.322347

Test set: Average loss : 0.0017, Accuracy:9044/10000 (90.4400%)

Train Epoch: 5 [19800/60000 (33%)] Loss:0.275157 Train Epoch: 5 [39800/60000 (66%)] Loss:0.406938 Train Epoch: 5 [59800/60000 (100%)] Loss:0.260176

Test set: Average loss : 0.0016, Accuracy:9069/10000 (90.6900%)

Train Epoch: 6 [19800/60000 (33%)] Loss:0.286559 Train Epoch: 6 [39800/60000 (66%)] Loss:0.317441 Train Epoch: 6 [59800/60000 (100%)] Loss:0.317349

Test set: Average loss : 0.0016, Accuracy:9089/10000 (90.8900%)

Train Epoch: 7 [19800/60000 (33%)] Loss:0.348282 Train Epoch: 7 [39800/60000 (66%)] Loss:0.310945 Train Epoch: 7 [59800/60000 (100%)] Loss:0.318187

Test set: Average loss : 0.0015, Accuracy:9113/10000 (91.1300%)

Train Epoch: 8 [19800/60000 (33%)] Loss:0.303592 Train Epoch: 8 [39800/60000 (66%)] Loss:0.242741 Train Epoch: 8 [59800/60000 (100%)] Loss:0.306747

Test set: Average loss : 0.0015, Accuracy:9125/10000 (91.2500%)

Train Epoch: 9 [19800/60000 (33%)] Loss:0.242279 Train Epoch: 9 [39800/60000 (66%)] Loss:0.357418 Train Epoch: 9 [59800/60000 (100%)] Loss:0.361650

Test set: Average loss : 0.0015, Accuracy:9141/10000 (91.4100%)

Train Epoch: 10 [19800/60000 (33%)] Loss:0.296708 Train Epoch: 10 [39800/60000 (66%)] Loss:0.252936 Train Epoch: 10 [59800/60000 (100%)] Loss:0.325649

Test set: Average loss : 0.0015, Accuracy:9154/10000 (91.5400%)

Train Epoch: 11 [19800/60000 (33%)] Loss:0.323194 Train Epoch: 11 [39800/60000 (66%)] Loss:0.351030 Train Epoch: 11 [59800/60000 (100%)] Loss:0.372997

Test set: Average loss : 0.0014, Accuracy:9173/10000 (91.7300%)

Train Epoch: 12 [19800/60000 (33%)] Loss:0.307830 Train Epoch: 12 [39800/60000 (66%)] Loss:0.249295 Train Epoch: 12 [59800/60000 (100%)] Loss:0.294375

Test set: Average loss : 0.0014, Accuracy:9185/10000 (91.8500%)

Train Epoch: 13 [19800/60000 (33%)] Loss:0.328178 Train Epoch: 13 [39800/60000 (66%)] Loss:0.310206 Train Epoch: 13 [59800/60000 (100%)] Loss:0.250517

Test set: Average loss : 0.0014, Accuracy:9209/10000 (92.0900%)

Train Epoch: 14 [19800/60000 (33%)] Loss:0.247009 Train Epoch: 14 [39800/60000 (66%)] Loss:0.207665 Train Epoch: 14 [59800/60000 (100%)] Loss:0.379712

Test set: Average loss : 0.0014, Accuracy:9214/10000 (92.1400%)

Train Epoch: 15 [19800/60000 (33%)] Loss:0.160634 Train Epoch: 15 [39800/60000 (66%)] Loss:0.228721 Train Epoch: 15 [59800/60000 (100%)] Loss:0.233944

Test set: Average loss : 0.0013, Accuracy:9230/10000 (92.3000%)

Train Epoch: 16 [19800/60000 (33%)] Loss:0.229239 Train Epoch: 16 [39800/60000 (66%)] Loss:0.287494 Train Epoch: 16 [59800/60000 (100%)] Loss:0.252327

Test set: Average loss : 0.0013, Accuracy:9233/10000 (92.3300%)

Train Epoch: 17 [19800/60000 (33%)] Loss:0.188378 Train Epoch: 17 [39800/60000 (66%)] Loss:0.188356 Train Epoch: 17 [59800/60000 (100%)] Loss:0.288556

Test set: Average loss : 0.0013, Accuracy:9246/10000 (92.4600%)

Train Epoch: 18 [19800/60000 (33%)] Loss:0.172678 Train Epoch: 18 [39800/60000 (66%)] Loss:0.289051 Train Epoch: 18 [59800/60000 (100%)] Loss:0.259598

Test set: Average loss : 0.0013, Accuracy:9258/10000 (92.5800%)

Train Epoch: 19 [19800/60000 (33%)] Loss:0.241649 Train Epoch: 19 [39800/60000 (66%)] Loss:0.216921 Train Epoch: 19 [59800/60000 (100%)] Loss:0.203279

Test set: Average loss : 0.0013, Accuracy:9260/10000 (92.6000%)

:::info 可以看到, 模型训练到后面,精读Accuracy很难再提升了, 我猜原因有一部分是模型设计的不行吧,或者是别的原因
但是网课老师说,神经网络发展到现在,MNIST数据集的识别准确度早就可以达到99.9%了… :::

完整代码

"""
p39
多分类的一个实战
"""

import torch
import numpy as np
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from torch import nn
import torch.nn.functional as F
from torch import optim

# params define
batch_size = 200
learning_rate = 0.001
epochs = 20
# 加载数据集
train_loader = torch.utils.data.DataLoader(
    MNIST(root='./data', train=True, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        # 进行标准化处理, 标准化处理指的是将data减去均值, 然后÷标准差, 这样就得到均值为0, 方差为1的数据集
        transforms.Normalize((0.1307,), (0.3081,))  # 搞懂这两个参数
    ])),
    batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    MNIST(root='./data', train=False, download=True, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))  # todo 搞懂这两个参数
    ])),
    batch_size=batch_size, shuffle=True
)
# 参数的随机初始化
# 横向200个, 纵向784个
w1, b1 = torch.randn(200, 784, requires_grad=True), torch.randn(200, requires_grad=True)
w2, b2 = torch.randn(200, 200, requires_grad=True), torch.randn(200, requires_grad=True)
w3, b3 = torch.randn(10, 200, requires_grad=True), torch.zeros(10, requires_grad=True)  # 全填充0

# 下面是参数的初始化操作, 如果没有这个初始化操作的话, 可能造成梯度后期无法更新
torch.nn.init.kaiming_normal_(w1)
torch.nn.init.kaiming_normal_(w2)
torch.nn.init.kaiming_normal_(w3)

print('Init w1 & w2 & w3 is \n{} \n{} \n{}'.format(w1, w2, w3))

def forward(x):
    # w.t() 等效于 w.T 都是求转置矩阵
    x = x@w1.t() + b1
    # F.relu()函数将小于0的数据置为0, 大于0的数据不变
    x = F.relu(x)
    x = x@w2.t() + b2
    x = F.relu(x)
    x = x@w3.t() + b3
    x = F.relu(x)
    return x


optimizer = optim.SGD([w1, b1, w2, b2, w3, b3], lr=learning_rate)
"""
    解释一下CrossEntropyLoss()
    正常的过程应该是
    1. 计算SoftMax()
    2. 计算LogSoftMax()
    3. 计算NLLLoss()
    而CrossEntropyLoss()将这三步融合了, 只需要调用一次就好了
"""
criteon = nn.CrossEntropyLoss()




def show_img(img_data):
    if not isinstance(img_data, torch.Tensor):
        raise ValueError('instance error')
    if img_data.shape != (28, 28):
        raise ValueError('shape error')
    from matplotlib import pyplot as plt
    plt.imshow(img_data)
    plt.show()


class mnist_model(nn.Module):
    def __init__(self):
        super(mnist_model, self).__init__()
        # 参数的随机初始化
        # 横向200个, 纵向784个
        w1, b1 = torch.randn(200, 784, requires_grad=True), torch.randn(200, requires_grad=True)
        w2, b2 = torch.randn(200, 200, requires_grad=True), torch.randn(200, requires_grad=True)
        w3, b3 = torch.randn(10, 200, requires_grad=True), torch.zeros(10, requires_grad=True)  # 全填充0

        # 下面是参数的初始化操作, 如果没有这个初始化操作的话, 可能造成梯度后期无法更新
        torch.nn.init.kaiming_normal_(w1)
        torch.nn.init.kaiming_normal_(w2)
        torch.nn.init.kaiming_normal_(w3)

    def forward(self, x):
        # w.t() 等效于 w.T 都是求转置矩阵
        x = x @ w1.t() + b1
        # F.relu()函数将小于0的数据置为0, 大于0的数据不变
        x = F.relu(x)
        x = x @ w2.t() + b2
        x = F.relu(x)
        x = x @ w3.t() + b3
        x = F.relu(x)
        return x
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # -1是自动推断的意思
        data = data.view(-1, 28*28)

        # todo 我也不知道这算是前向传播还是反向传播
        logits = forward(data)  # logits是经过3个层计算出来的结果, shape(1, 10)
        # 计算loss交叉熵
        # if batch_idx % 2000 == 0:
        #     continue
        loss = criteon(logits, target)  # logits是train出来的targets, target是real的targets, 对二者求loss, 采用的是CrossEntropyLoss()交叉熵
        # 进行梯度下降算法
        optimizer.zero_grad()
        """
            loss是损失值, loss里带logits, 而logits带w1, w2, w3, b1, b2, b3, 同时这6个变量带required_grad标志
            loss.backward()是对其进行求导, 求的导数保存在grad中
        """
        loss.backward()
        """
            根据上一步求得的导数信息, 进行梯度下降
        """
        optimizer.step()
        # 打印日志 每一轮完成的是200个, 总共300轮, 共60000条
        if batch_idx % 100 == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)] \t Loss:{:.06f}".format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()
            ))
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data = data.view(-1, 28 * 28)
        logits = forward(data)
        # .item()就是获取其数值的意思
        test_loss += criteon(logits, target).item()
        # logits, shape(1, 10), 预测的是10个分类中的可能性, logits.data.max(1)返回的是可能性最大的哪一个
        pred = logits.data.max(1)[1]
        correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\n Test set: Average loss : {:.4f}, Accuracy:{}/{} ({:.04f}%) \n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)
    ))
    if 100. * correct / len(test_loader.dataset) > 80:
        print('Current w1 & w2 & w3 is \n{} \n{} \n{}'.format(w1, w2, w3))
        break
    # print('\n w1:{}  w2:{}  w3:{}'.format(w1, w2, w3))
    # print('Current w1 & w2 & w3 is \n{} \n{} \n{}'.format(w1, w2, w3))