1
1
1
1
import torchimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimclass Net(nn.Module):def __init__(self):super(Net, self).__init__()# 输入图像channel: 1; 输出channel: 6; 5*5卷积核 # 计算公式 W - d + 1self.conv1 = nn.Conv2d(1, 6, 5) # Conv2d 接收4个参数, (batch_size, channel, height, width)self.conv2 = nn.Conv2d(6, 16, 5)# 线性映射关系: y = Wx + bself.fc1 = nn.Linear(16*5*5, 120)self.fc2 = nn.Linear(120, 84)self.fc3 = nn.Linear(84, 10)# backward 函数在使用 autograd 时自动定义def forward(self, x):# 2*2 poolingx = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # pool 的默认 stride 为池化矩阵的宽度 # 32 -> 28 -> 14# pool方阵可以用一个数指定大小x = F.max_pool2d(F.relu(self.conv2(x)), 2) # 14 -> 10 -> 5, 16*5*5x = x.view(-1, self.num_flat_features(x)) # 计算了x的维数之后, 展开成一维的x = F.relu(self.fc1(x))x = F.relu(self.fc2(x))x = self.fc3(x)return xdef num_flat_features(self, x):size = x.size()[1:] # 出去批处理维度的其他所有维度num_features = 1for s in size:num_features *= sreturn num_featuresnet = Net()params = list(net.parameters()) # len(params) = 10input = torch.randn(1, 1, 32, 32)out = net(input)'''损失函数'''target = torch.randn(10)target = target.view(1, -1) # 使目标值与数据值尺寸一致criterion = nn.MSELoss()loss = criterion(out, target)'''反向传播'''# print("conv1.bias.grad before zero\n", net.conv1.bias.grad) # Nonenet.zero_grad() # 清除所有参数的梯度缓存。# print("conv1.bias.grad before backward\n", net.conv1.bias.grad) # Noneloss.backward() # 随机梯度的反向传播# print("conv1.bias.grad after backward\n", net.conv1.bias.grad) # Tensor 1*6'''反向传播过程input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d-> view -> linear -> relu -> linear -> relu -> linear-> MSELoss-> loss''''''更新权重weight = weight - learning_rate * gradient'''learning_rate = 0.01optimizer = optim.SGD(net.parameters(), lr=learning_rate)optimizer.zero_grad()optimizer.step() # 更新参数# print("after update conv1's .weight---\n", params[0]) # 没变
过程框架:
导入数据集 —>> 编写模型 —>> 定义损失函数、优化函数 —>> 进行训练,得到损失值,进行优化,更新参数,再训练 —>> 测试 (每个epoch 训练一次、测试一次??)
import torchimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimimport torchvisionimport torchvision.transforms as transformsimport matplotlib.pyplot as pltimport numpy as nptransform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # 均值, 标准差。使图像在(-1, 1)范围内归一化trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=0)testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')def imshow(img):img = img / 2 + 0.5npimg = img.numpy()plt.imshow(np.transpose(npimg, (1, 2, 0)))plt.show()class Net(nn.Module):def __init__(self):super(Net, self).__init__()self.conv1 = nn.Conv2d(3, 6, 5)self.pool = nn.MaxPool2d(2, 2)self.conv2 = nn.Conv2d(6, 16, 5)self.fc1 = nn.Linear(16*5*5, 120)self.fc2 = nn.Linear(120, 84)self.fc3 = nn.Linear(84, 10)def forward(self, x):x = self.pool(F.relu(self.conv1(x)))x = self.pool(F.relu(self.conv2(x)))x = x.view(-1, 16*5*5)x = F.relu(self.fc1(x))x = F.relu(self.fc2(x))x = self.fc3(x)return xnet = Net()criterion = nn.CrossEntropyLoss()optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9) # momentum ?device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")net.to(device)def train():for epoch in range(4):running_loss = 0.0for i, data in enumerate(trainloader, 0):inputs, labels = datainputs, labels = inputs.to(device), labels.to(device)optimizer.zero_grad() # 将参数的梯度清零。# 因为训练的过程通常使用mini-batch方法,所以如果不将梯度清零的话,梯度会与上一个batch的数据相关# 前向传播 + 反向传播 + 优化outputs = net(inputs)loss = criterion(outputs, labels)loss.backward() # 通过 autograd 包,自动运算 tensor 的梯度optimizer.step() # 得到前一步的梯度,再通过梯度下降 更新参数(step() 执行一次优化步骤)running_loss += loss.item()if i % 2000 == 1999:print('[%d, %5d] loss: %.3f' % (epoch + 1, i+1, running_loss / 2000))running_loss = 0.0correct = 0total = 0# 不用梯度来更新参数,提高运算性能with torch.no_grad():for data in testloader:# images.size() = [4, 3, 32, 32]; labels.size() = [4]images, labels = dataimages, labels = images.to(device), labels.to(device)outputs = net(images)# outputs.data 是一个4*10的tensor_, predicted = torch.max(outputs.data, 1) # 获得最大值的(具体值, 下标)total += labels.size(0)# predicted 和 labels 为什么是一个四维向量? <= batch_size = 4# predicted == labels 为 tensor([ True, True, False, False], device='cuda:0')等correct += (predicted == labels).sum().item() # .sum() 计算出为真的个数print("Accuracy of the network on the 10000 test images: %d %%" % (100 * correct / total))print("Finished training")if __name__ == '__main__':train()
