18.train_GPU

方式一

gpu通常使用在:

模型 demo = demo.cuda()

损失函数l;oss_fn = loss_fn.cuda()

数据:[训练],[验证]

  1. imgs = imgs.cuda()
  2. targets = targets.cuda()

通常是使用这种方法

  1. if torch.cuda.is_available():
  2. demo = demo.cuda()

经测试:

在使用cpu的情况下,total_train_step = 100 所使用的时间为10.69461178779602

在使用gpu的情况下,total_train_step = 100 所使用的时间为3.9713311195373535

方式二

在开头加上一下两种形式的代码:

对于单显卡,以下两种写法没有区别。

  1. device = torch.device("cuda")
  2. device = torch.device("cuda:0") # 多张显卡时,可以用来指定显卡 0代表第一张显卡

这种方式更加常见

  1. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tips:

无论是哪种使用gpu的方式,在model 和 loss 处都可以不用重新赋值,但在数据(例如图片和标签处)必须重新赋值。

例如

  1. # 模型
  2. demo.to(device)
  3. demo.cuda()
  4. # loss
  5. loss_fn.to(device)
  6. loss_fn.cuda()
  7. # imgs and targets
  8. imgs = imgs.to(device)
  9. targets = targets.to(device)
  10. if torch.cuda.is_available():
  11. imgs = imgs.cuda()
  12. targets = targets.cuda()

code:

  1. import torch
  2. import torchvision
  3. from torch.utils.data import DataLoader
  4. from torch.utils.tensorboard import SummaryWriter
  5. #import time
  6. from models import *
  7. # 定义训练时使用的设备
  8. #device = torch.device("cpu")
  9. #device = torch.device("cuda")
  10. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  11. # 准备数据集
  12. train_data = torchvision.datasets.CIFAR10(root="./dataset",train=True,transform=torchvision.transforms.ToTensor(),
  13. download=True)
  14. test_data = torchvision.datasets.CIFAR10(root="./dataset",train=False,transform=torchvision.transforms.ToTensor(),
  15. download=True)
  16. train_data_size = len(train_data)
  17. test_data_size = len(test_data)
  18. print("训练数据长度为:{}".format(train_data_size))
  19. print("测试数据长度为:{}".format(test_data_size))
  20. # 加载数据集
  21. train_dataloader = DataLoader(train_data,batch_size=64)
  22. test_dataloader = DataLoader(test_data,batch_size=64)
  23. # 创建神经网络
  24. demo = DEMO()
  25. demo.to(device)
  26. # demo = demo.to(device) 也可以的。第一种方式也是一样,但 图片 和 标签之类的数据必须重新赋值
  27. # if torch.cuda.is_available():
  28. # demo = demo.cuda()
  29. # 定义损失函数
  30. loss_fn = nn.CrossEntropyLoss()
  31. loss_fn.to(device)
  32. #loss_fn = loss_fn.to(device) 也可以。第一种方式也是一样
  33. # if torch.cuda.is_available():
  34. # loss_fn = loss_fn.cuda()
  35. # 定义优化器
  36. #learing_rate = 1e-2 = 1 × (10) ^ (-2)
  37. learning_rate = 0.01
  38. optimizer = torch.optim.SGD(demo.parameters(),lr=learning_rate)
  39. # tensorboard
  40. writer = SummaryWriter("./train_logs")
  41. # 训练
  42. # 设置一些参数
  43. total_train_step = 0
  44. total_test_step = 0
  45. epoch = 10
  46. #start_time = time.time()
  47. for i in range(epoch):
  48. print("--------------epoch:{}----------------".format(i+1))
  49. # 训练
  50. demo.train() # 在使用一些特殊层时要调用,不适用时也可以调用,实践中很常用 同 demo.eval()
  51. for data in train_dataloader:
  52. imgs, targets = data
  53. # 图片 标签之类的数据 在使用cuda是必须要重新赋值
  54. imgs = imgs.to(device)
  55. targets = targets.to(device)
  56. # if torch.cuda.is_available():
  57. # imgs = imgs.cuda()
  58. # targets = targets.cuda()
  59. output = demo(imgs)
  60. loss = loss_fn(output,targets)
  61. optimizer.zero_grad()
  62. loss.backward()
  63. optimizer.step()
  64. total_train_step += 1
  65. if total_train_step % 100 == 0:
  66. #end_time = time.time()
  67. #print("total_train_step = 100 所使用的时间为{}".format(end_time - start_time))
  68. print("训练次数:{},train_Loss = {}".format(total_train_step,loss.item()))
  69. writer.add_scalar("train_loss",loss.item(),total_train_step)
  70. # 验证
  71. demo.eval()
  72. total_test_loss = 0
  73. # 梯度为零,不优化参数
  74. total_acc = 0
  75. with torch.no_grad():
  76. for data in test_dataloader:
  77. imgs,targets = data
  78. imgs = imgs.to(device)
  79. targets = targets.to(device)
  80. # if torch.cuda.is_available():
  81. # imgs = imgs.cuda()
  82. # targets = targets.cuda()
  83. output = demo(imgs)
  84. loss = loss_fn(output, targets)
  85. total_test_loss = total_test_loss + loss
  86. pre = output.argmax(1)
  87. acc = ((pre == targets).sum())
  88. total_acc = total_acc + acc
  89. accuracy = total_acc / test_data_size
  90. print("total_test_loss = {}".format(total_test_loss))
  91. print("accuracy = {}".format(accuracy))
  92. writer.add_scalar("test_loss",total_test_loss,i+1)
  93. writer.add_scalar("accuracy",accuracy,i+1)
  94. torch.save(demo,"pretrained_demo_{}.pth".format(i+1))
  95. print("pretrained_demo已保存")
  96. writer.close()