新增了加载自定义数据集的正确方法;
    新增了K折交叉验证
    精确度65%左右
    image.png

    1. # 导入模块
    2. import numpy as np
    3. import random
    4. import torch
    5. import torch.nn as nn
    6. import torch.nn.functional as F
    7. from torch.utils.data import DataLoader, Dataset, TensorDataset
    8. from torchvision import models
    9. from torchvision import datasets, utils
    10. import torch
    11. import numpy as np
    12. from torchvision import transforms
    13. from torch.utils.data import DataLoader
    14. import matplotlib.pyplot as plt
    15. import torchvision.models as models
    16. from torch import nn
    17. from datetime import datetime
    18. import torch.nn.functional as F
    19. num_workers = 0
    20. tf = transforms.Compose([
    21. #transforms.ToPILImage(),
    22. transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    23. transforms.RandomRotation(degrees=15),
    24. transforms.ColorJitter(),
    25. transforms.RandomResizedCrop(224),
    26. transforms.RandomHorizontalFlip(),
    27. transforms.ToTensor(),
    28. transforms.Normalize(mean=[0.485, 0.456, 0.406],
    29. std=[0.229, 0.224, 0.225]),
    30. ])
    31. tf2 = transforms.Compose([
    32. transforms.ToTensor(),
    33. transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    34. ])
    35. full_data = datasets.ImageFolder('./shallow/',transform=tf2)
    36. loader = DataLoader(full_data,batch_size=len(full_data),shuffle=True,num_workers=num_workers)
    37. examples = enumerate(loader) #如何得知train_loader可以被拆解为两部分?
    38. idx, (examples_data, examples_target) = next(examples)
    39. examples_target = torch.unsqueeze(examples_target,1)
    40. print(examples_target.size())
    41. print(examples_data.size())
    42. X = examples_data
    43. Y = examples_target
    44. net = models.resnet18(pretrained=True)
    45. # 创建一个数据集
    46. #X = torch.rand(500,3, 32, 32)
    47. #Y = torch.rand(500, 1)
    48. print(Y.size())
    49. #print(X)
    50. # random shuffle
    51. index = [i for i in range(len(X))]
    52. random.shuffle(index)
    53. X = X[index]
    54. Y = Y[index]
    55. print(Y.size())
    56. def get_kfold_data(k, i, X, y):
    57. # 返回第 i+1 折 (i = 0 -> k-1) 交叉验证时所需要的训练和验证数据,X_train为训练集,X_valid为验证集
    58. fold_size = X.shape[0] // k # 每份的个数:数据总条数/折数(组数)
    59. val_start = i * fold_size
    60. if i != k - 1:
    61. val_end = (i + 1) * fold_size
    62. X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
    63. X_train = torch.cat((X[0:val_start], X[val_end:]), dim=0)
    64. y_train = torch.cat((y[0:val_start], y[val_end:]), dim=0)
    65. else: # 若是最后一折交叉验证
    66. X_valid, y_valid = X[val_start:], y[val_start:] # 若不能整除,将多的case放在最后一折里
    67. X_train = X[0:val_start]
    68. y_train = y[0:val_start]
    69. return X_train, y_train, X_valid, y_valid
    70. def traink(model, X_train, y_train, X_val, y_val, BATCH_SIZE, learning_rate, TOTAL_EPOCHS):
    71. train_loader = DataLoader(TensorDataset(X_train, y_train), BATCH_SIZE, shuffle=True)
    72. val_loader = DataLoader(TensorDataset(X_val, y_val), BATCH_SIZE, shuffle=True)
    73. criterion = nn.CrossEntropyLoss()
    74. optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    75. losses = []
    76. val_losses = []
    77. train_acc = []
    78. val_acc = []
    79. for epoch in range(TOTAL_EPOCHS):
    80. model.train()
    81. correct = 0 # 记录正确的个数,每个epoch训练完成之后打印accuracy
    82. for i, (images, labels) in enumerate(train_loader):
    83. images = images.float()
    84. labels = torch.squeeze(labels.type(torch.LongTensor))
    85. optimizer.zero_grad() # 清零
    86. outputs = model(images)
    87. # 计算损失函数
    88. loss = criterion(outputs, labels)
    89. loss.backward()
    90. optimizer.step()
    91. losses.append(loss.item())
    92. # 计算正确率
    93. y_hat = model(images)
    94. pred = y_hat.max(1, keepdim=True)[1]
    95. correct += pred.eq(labels.view_as(pred)).sum().item()
    96. if (i + 1) % 10 == 0:
    97. # 每10个batches打印一次loss
    98. print('Epoch : %d/%d, Iter : %d/%d, Loss: %.4f' % (epoch + 1, TOTAL_EPOCHS,
    99. i + 1, len(X_train) // BATCH_SIZE,
    100. loss.item()))
    101. accuracy = 100. * correct / len(X_train)
    102. print('Epoch: {}, Loss: {:.5f}, Training set accuracy: {}/{} ({:.3f}%)'.format(
    103. epoch + 1, loss.item(), correct, len(X_train), accuracy))
    104. train_acc.append(accuracy)
    105. # 每个epoch计算测试集accuracy
    106. model.eval()
    107. val_loss = 0
    108. correct = 0
    109. with torch.no_grad():
    110. for i, (images, labels) in enumerate(val_loader):
    111. images = images.float()
    112. labels = torch.squeeze(labels.type(torch.LongTensor))
    113. optimizer.zero_grad()
    114. y_hat = model(images)
    115. loss = criterion(y_hat, labels).item() # batch average loss
    116. val_loss += loss * len(labels) # sum up batch loss
    117. pred = y_hat.max(1, keepdim=True)[1] # get the index of the max log-probability
    118. correct += pred.eq(labels.view_as(pred)).sum().item()
    119. val_losses.append(val_loss / len(X_val))
    120. accuracy = 100. * correct / len(X_val)
    121. print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
    122. val_loss, correct, len(X_val), accuracy))
    123. val_acc.append(accuracy)
    124. return losses, val_losses, train_acc, val_acc
    125. def k_fold(k, X_train, y_train, num_epochs=3, learning_rate=0.0001, batch_size=16):
    126. train_loss_sum, valid_loss_sum = 0, 0
    127. train_acc_sum, valid_acc_sum = 0, 0
    128. for i in range(k):
    129. print('*' * 25, '第', i + 1, '折', '*' * 25)
    130. data = get_kfold_data(k, i, X_train, y_train) # 获取k折交叉验证的训练和验证数据
    131. #net = net() # 实例化模型(某已经定义好的模型)
    132. # 每份数据进行训练
    133. train_loss, val_loss, train_acc, val_acc = traink(net, *data, batch_size, learning_rate, num_epochs)
    134. print('train_loss:{:.5f}, train_acc:{:.3f}%'.format(train_loss[-1], train_acc[-1]))
    135. print('valid loss:{:.5f}, valid_acc:{:.3f}%\n'.format(val_loss[-1], val_acc[-1]))
    136. train_loss_sum += train_loss[-1]
    137. valid_loss_sum += val_loss[-1]
    138. train_acc_sum += train_acc[-1]
    139. valid_acc_sum += val_acc[-1]
    140. print('\n', '#' * 10, '最终k折交叉验证结果', '#' * 10)
    141. print('average train loss:{:.4f}, average train accuracy:{:.3f}%'.format(train_loss_sum / k, train_acc_sum / k))
    142. print('average valid loss:{:.4f}, average valid accuracy:{:.3f}%'.format(valid_loss_sum / k, valid_acc_sum / k))
    143. return
    144. k_fold(10, X, Y, num_epochs=3, learning_rate=0.0001, batch_size=16)