PyTorch 使用自己的数据集

  1. from __future__ import print_function
  2. import torch
  3. import torch.nn as nn
  4. import torch.nn.functional as F
  5. import torch.autograd.variable as Variable
  6. import torch.optim as optim
  7. import torchvision
  8. import torchvision.transforms as transforms
  9. import numpy as np
  10. import matplotlib.pyplot as plt
  1. %matplotlib inline

保存 CIFAR10 为图片

CIFAR10 数据集说明

  • 包含有 data_batch_1,…,data_batch_5,test_batch
  • 每一个batch文件包括一个字典,字典的元素是:
    • data:一个尺寸为 10000×3072,数据格式为 uint8numpy array,每一行数据存储了一张 32×32 彩色图片的数据,前 1024 位是图像的红色通道数据,接着是绿色通道和蓝色通道。
    • label:一个包含 10000 个 0-9 数字的列表,对应 data 里每张图片的标签。

读取数据集并保存

重复执行会清空csv文件

  1. from matplotlib.image import imsave
  2. import numpy as np
  3. import csv
  4. import os
  5. def unpickle(file):
  6. import pickle
  7. with open(file, 'rb') as fo:
  8. dict = pickle.load(fo, encoding='bytes')
  9. return dict
  10. path = './data/cifar10/cifar-10-batches-py/'
  11. root = './data/cifar10/cifar10/'
  12. header = ['label', 'index']
  13. rows = []
  14. for i in range(5):
  15. file = path + 'data_batch_' + str(i+1)
  16. Xtr = unpickle(file)
  17. print('正在处理训练集:', i)
  18. for j in range(10000): # 处理10k张图片
  19. img = np.reshape(Xtr[b'data'][j], (3, 32, 32)) # R:1024,G:1024,B:1024
  20. img = img.transpose(1, 2, 0) # 转置成为图片的格式(H,W,C)
  21. picName = './data/cifar10/cifar10/train/' + \
  22. str(int(i*10000+j)) + '.jpg'
  23. if not os.path.exists(picName):
  24. imsave(picName, img)
  25. rows.append((Xtr[b'labels'][j], j))
  26. with open(root+'train/train.csv', 'w', encoding='utf-8', newline='') as f:
  27. w = csv.writer(f)
  28. w.writerow(header)
  29. w.writerows(rows)
  30. rows = []
  31. Xte = unpickle(path+'test_batch')
  32. print('正在处理测试集: 0')
  33. for j in range(10000): # 处理10k张图片
  34. img = np.reshape(Xte[b'data'][j], (3, 32, 32))
  35. img = img.transpose(1, 2, 0) # 转置成为图片的格式(H,W,C)
  36. picName = './data/cifar10/cifar10/test/' + \
  37. str(j) + '.jpg'
  38. if not os.path.exists(picName):
  39. imsave(picName, img)
  40. rows.append((Xte[b'labels'][j], j))
  41. with open(root+'test/test.csv', 'w', encoding='utf-8', newline='') as f:
  42. w = csv.writer(f)
  43. w.writerow(header)
  44. w.writerows(rows)
  45. print('处理完毕')
  1. 正在处理训练集: 0
  2. 正在处理训练集: 1
  3. 正在处理训练集: 2
  4. 正在处理训练集: 3
  5. 正在处理训练集: 4

显示一张图片

  1. from matplotlib.pyplot import imshow
  2. # show a single image
  3. img = np.reshape(Xtr[b'data'][0], (3, 32, 32)).transpose(1, 2, 0)
  4. imshow(img)
  1. <matplotlib.image.AxesImage at 0x18d183d2400>

output_9_1.png

定义自己的类

定义自己的数据集类,主要是在继承(torch.utils.data.Dataset)后修改初始化(__init__(self))和读取图片(__getitem__(self, index))的函数(和 __len__(self)

  • 训练集每张图片的命名规则为“number.jpg”,train.csv标记了各个图片的类别
  • 测试集每张图片的命名规则为“number.jpg”,rain.csv标记了各个图片的类别
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. from torch.utils.data import Dataset
  6. # import csv
  7. # 数据保存在 ./data/cifar10/cifar10/train(test)/
  8. # root = './data/cifar10/cifar10/
  9. class myDataset(Dataset):
  10. def __init__(self, root, train=True, transform=None):
  11. '''
  12. Args:
  13. root : 根目录
  14. transform: 数据处理方式
  15. '''
  16. if train:
  17. self.root = root + 'train/'
  18. data = pd.read_csv(root + 'train/train.csv')
  19. else:
  20. self.root = root + 'test/'
  21. data = pd.read_csv(root + 'test/test.csv')
  22. self.imgs = data['index'].values
  23. self.labels = data['label'].values
  24. if transform:
  25. self.transform = transform
  26. def __getitem__(self, index):
  27. target = self.labels[index]
  28. image = plt.imread(self.root+str(int(index))+'.jpg')
  29. if self.transform is not None:
  30. image = self.transform(image)
  31. return image, target
  32. def __len__(self):
  33. return len(self.imgs)

使用自己的类

  1. from torch.utils.data import Dataset, DataLoader
  2. batch_size = 4
  3. epoch_size = 3
  4. transform = transforms.Compose(
  5. [transforms.ToTensor(),
  6. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  7. trainset = myDataset(root='./data/cifar10/cifar10/',
  8. train=True,
  9. transform=transform)
  10. trainloader = torch.utils.data.DataLoader(trainset,
  11. batch_size=batch_size,
  12. shuffle=True,
  13. num_workers=0)
  14. testset = myDataset(root='./data/cifar10/cifar10/',
  15. train=False,
  16. transform=transform)
  17. testloader = torch.utils.data.DataLoader(testset,
  18. batch_size=batch_size,
  19. shuffle=False,
  20. num_workers=0)
  21. classes = ('plane', 'car', 'bird', 'cat',
  22. 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

BUG

当 shuffle 设置为 True 时,num_sample 报错

原因

因为之前的csv文件被清空导致数据为空

  1. def imshow(img):
  2. img = img / 2 + 0.5 # 去标准化
  3. npimg = img.numpy() # 将torch.FloatTensor 转换为numpy
  4. # plt.axis("off") # 不显示坐标尺寸
  5. plt.imshow(np.transpose(npimg, (1, 2, 0))) # 进行转置
  6. plt.show() # 显示图片
  7. # get some random training images
  8. dataiter = iter(trainloader)
  9. images, labels = dataiter.next()
  10. # show images
  11. imshow(torchvision.utils.make_grid(images))
  12. # print labels
  13. print(' '.join('%11s' % classes[labels[j]] for j in range(batch_size)))

output_16_0.png

  1. plane dog truck frog

torchvision.datasets.ImageFolder

保存/加载模型

保存模型

  1. # 模型保存
  2. torch.save(net.state_dict(), './model/learn0.pt') # .pt or .pth

定义网络

  1. # 网络结构
  2. class Net(nn.Module): # nn.Module 是所有神经网络的基类,自定义的网络应该继承自它
  3. def __init__(self):
  4. super(Net, self).__init__()
  5. self.conv1 = nn.Conv2d(3, 6, 5)
  6. self.pool = nn.MaxPool2d(2, 2)
  7. self.conv2 = nn.Conv2d(6, 16, 5)
  8. self.fc1 = nn.Linear(16 * 5 * 5, 120)
  9. self.fc2 = nn.Linear(120, 84)
  10. self.fc3 = nn.Linear(84, 10)
  11. def forward(self, x):
  12. x = self.pool(F.relu(self.conv1(x)))
  13. x = self.pool(F.relu(self.conv2(x)))
  14. x = x.view(-1, 16 * 5 * 5)
  15. x = F.relu(self.fc1(x))
  16. x = F.relu(self.fc2(x))
  17. x = self.fc3(x)
  18. return x

加载模型

  1. model = Net().to('cuda')
  2. model.load_state_dict(torch.load('./model/learn0.pt'))
  3. model.eval()
  1. Net(
  2. (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  3. (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  4. (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  5. (fc1): Linear(in_features=400, out_features=120, bias=True)
  6. (fc2): Linear(in_features=120, out_features=84, bias=True)
  7. (fc3): Linear(in_features=84, out_features=10, bias=True)
  8. )

model.load_state_dict(torch.load(‘./model/learn0.pt’))

  1. IncompatibleKeys(missing_keys=[], unexpected_keys=[])

猜测是加载模型正确,没有遗漏和错误。

测试模型

  1. # 加载测试集
  2. test_set = torchvision.datasets.CIFAR10(
  3. root='./data/cifar10',
  4. train=False,
  5. transform=transforms.Compose(
  6. [transforms.ToTensor(),
  7. transforms.Normalize((0.5, 0.5, 0.5),
  8. (0.5, 0.5, 0.5))])
  9. )
  10. testloader = torch.utils.data.DataLoader(
  11. test_set,
  12. batch_size=4,
  13. shuffle=True,
  14. num_workers=2
  15. )
  16. classes = ('plane', 'car', 'bird', 'cat',
  17. 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
  1. # 测试
  2. dataiter = iter(testloader)
  3. images, labels = dataiter.next()
  4. def imshow(img):
  5. img = img / 2 + 0.5 # 去标准化
  6. npimg = img.numpy() # 将torch.FloatTensor 转换为numpy
  7. # plt.axis("off") # 不显示坐标尺寸
  8. plt.imshow(np.transpose(npimg, (1, 2, 0))) # 进行转置
  9. plt.show() # 显示图片
  10. # print images
  11. print('GroundTruth:')
  12. imshow(torchvision.utils.make_grid(images))
  13. print(' '.join('%11s' % classes[labels[j]] for j in range(4)))
  14. # 测试一次
  15. outputs = model(images.to('cuda'))
  16. _, predicted = torch.max(outputs, 1)
  17. print('Predicted:\n', ' '.join('%11s' % classes[predicted[j]]
  18. for j in range(4)))
  1. GroundTruth:

output_28_1.png

  1. dog deer dog bird
  2. Predicted:
  3. cat car cat bird

测试自己的图片

网络输入尺寸是 32x32,因此需要把自己的图片进行预处理,之后才可以进行测试

需要注意的是,如果使用 plt.imread() 读入图片,返回的是 np.ndarray 类型,此时需要用 transforms.ToPILImage() 转化为 PIL 类型

采用 PIL.Image(Pillow for Python3.x) 则无需进行转化

  1. img_path = './data/cifar10/mytest/car-1.jpg' # √
  2. # img_path = './data/cifar10/mytest/dog-1.jpg' # ×
  3. # # 采用 plt.imread() 读入图片
  4. # img = plt.imread(img_path)
  5. # print('图片尺寸:',np.array(img).shape,type(img))
  6. # transform = transforms.Compose(
  7. # [transforms.ToPILImage(),
  8. # transforms.Resize([32,32]),
  9. # transforms.ToTensor(),
  10. # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  11. # 采用 PIL.Image 读取图片
  12. from PIL import Image
  13. img = Image.open(img_path)
  14. print('未 transform 的图片尺寸:',np.array(img).shape,' 图片类型:',type(img))
  15. plt.imshow(np.array(img))
  16. plt.show()
  17. transform = transforms.Compose(
  18. [transforms.Resize([32,32]),
  19. transforms.ToTensor(),
  20. transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
  21. img = transform(img).reshape(-1,3,32,32)
  22. print('已 transpose 的图片尺寸:',np.array(img).shape,' 图片类型:',type(img))
  23. # plt.imshow(img)
  24. imshow(torchvision.utils.make_grid(img))
  25. output = model(img.to('cuda'))
  26. _, predicted = torch.max(output, 1)
  27. print('模型输出为:',classes[predicted])
  1. transform 的图片尺寸: (435, 728, 3) 图片类型: <class 'PIL.JpegImagePlugin.JpegImageFile'>

output_31_1.png

  1. transpose 的图片尺寸: (1, 3, 32, 32) 图片类型: <class 'torch.Tensor'>

output_31_3.png

  1. 模型输出为: car
  1. !pip install Pillow
  1. Looking in indexes: https://pypi.doubanio.com/simple/
  2. Requirement already satisfied: Pillow in d:\office\office\python\python3.6.2\lib\site-packages (4.3.0)
  3. Requirement already satisfied: olefile in d:\office\office\python\python3.6.2\lib\site-packages (from Pillow) (0.44)

格式转换

  1. # !jupyter nbconvert --to html --template full learn.ipynb
  2. # !jupyter nbconvert --to markdown learn.ipynb
  3. !jupyter nbconvert --to html --template full learn-1.ipynb
  4. !jupyter nbconvert --to markdown learn-1.ipynb
  1. [NbConvertApp] Converting notebook learn-1.ipynb to html
  2. [NbConvertApp] Writing 335545 bytes to learn-1.html
  3. [NbConvertApp] Converting notebook learn-1.ipynb to markdown
  4. [NbConvertApp] Support files will be in learn-1_files\
  5. [NbConvertApp] Making directory learn-1_files
  6. [NbConvertApp] Making directory learn-1_files
  7. [NbConvertApp] Writing 8365 bytes to learn-1.md