1.GPU

1.1 计算可用GPU个数

  1. torch.cuda.device_count() #返回个数

1.2 选择训练设备

  1. device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  2. #torch.cuda.is_available()用于判断是否有GPU

1.3 并行训练

  1. torch.nn.DataParallel(model)

2. 获取文件数据

2.1 获取VOC数据集类别

  1. def get_classes(classes_path):
  2. #classes_path为存放类别文件的路径
  3. with open(classes_path, encoding='utf-8') as f:
  4. class_names = f.readlines()
  5. class_names = [c.strip() for c in class_names]
  6. return class_names, len(class_names)

2.2 获取先验框

  1. def get_anchors(anchors_path):
  2. '''loads the anchors from a file'''
  3. with open(anchors_path, encoding='utf-8') as f:
  4. anchors = f.readline()
  5. anchors = [float(x) for x in anchors.split(',')]
  6. #将列表转为numpy,并reshape为(n,2)
  7. anchors = np.array(anchors).reshape(-1, 2)
  8. return anchors, len(anchors)

3. 网络结构

3.1 模型定义

  1. class NET(nn.Module):
  2. def __init__(self, anchors_mask, num_classes, pretrained = False):
  3. super(Net, self).__init__()
  4. def forward(self, x):

3.2 参数初始化

  1. def weights_init(net, init_type='normal', init_gain = 0.02):
  2. def init_func(m):
  3. classname = m.__class__.__name__
  4. if hasattr(m, 'weight') and classname.find('Conv') != -1:
  5. if init_type == 'normal':
  6. torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
  7. elif init_type == 'xavier':
  8. torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
  9. elif init_type == 'kaiming':
  10. torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
  11. elif init_type == 'orthogonal':
  12. torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
  13. else:
  14. raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
  15. elif classname.find('BatchNorm2d') != -1:
  16. torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
  17. torch.nn.init.constant_(m.bias.data, 0.0)
  18. print('initialize network with %s type' % init_type)
  19. #使用init_func递归的应用于net的子模型
  20. net.apply(init_func)

3.3 模型参数加载

  1. model_dict = model.state_dict()
  2. #加载模型参数到字典中
  3. pretrained_dict = torch.load(pth, map_location=device)
  4. #读取参数的键值对,并判断是否和网络结构的键相同
  5. pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
  6. #更新参数
  7. model_dict.update(pretrained_dict)
  8. #加载参数到模型中
  9. model.load_state_dict(model_dict)

3.4 参数冻结

  1. for param in model.backbone.parameters():
  2. param.requires_grad = False

4. 数据处理

4.1 mosaic数据增强

  1. def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
  2. h, w = input_shape #模型输入图片尺寸,[416,416]
  3. min_offset_x = self.rand(0.3, 0.7)
  4. min_offset_y = self.rand(0.3, 0.7)
  5. image_datas = []
  6. box_datas = []
  7. index = 0
  8. for line in annotation_line:#获取图片路径 标签[x1,y1,x2,y2,c]
  9. line_content = line.split()
  10. #PIL读取的图片时RGB格式
  11. image = Image.open(line_content[0])
  12. #同image = image.convert('RGB')(该操作是为了防止弧度图引起的错误)
  13. image = cvtColor(image)
  14. #原始图片的大小,用于图片缩放后,对label的处理
  15. iw, ih = image.size
  16. #将边框信息转为numpy数组形式[n,5]
  17. box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
  18. #随机翻转
  19. flip = self.rand()<.5
  20. if flip and len(box)>0:
  21. image = image.transpose(Image.FLIP_LEFT_RIGHT)
  22. box[:, [0,2]] = iw - box[:, [2,0]]
  23. #随机resize
  24. new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
  25. scale = self.rand(.4, 1)
  26. if new_ar < 1:
  27. nh = int(scale*h)
  28. nw = int(nh*new_ar)
  29. else:
  30. nw = int(scale*w)
  31. nh = int(nw/new_ar)
  32. image = image.resize((nw, nh), Image.BICUBIC)
  33. #添加灰度条
  34. if index == 0:
  35. dx = int(w*min_offset_x) - nw
  36. dy = int(h*min_offset_y) - nh
  37. elif index == 1:
  38. dx = int(w*min_offset_x) - nw
  39. dy = int(h*min_offset_y)
  40. elif index == 2:
  41. dx = int(w*min_offset_x)
  42. dy = int(h*min_offset_y)
  43. elif index == 3:
  44. dx = int(w*min_offset_x)
  45. dy = int(h*min_offset_y) - nh
  46. new_image = Image.new('RGB', (w,h), (128,128,128))
  47. new_image.paste(image, (dx, dy))
  48. image_data = np.array(new_image)
  49. index = index + 1
  50. box_data = []
  51. #对边框信息进行对应的处理
  52. if len(box)>0:
  53. np.random.shuffle(box)
  54. box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
  55. box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
  56. box[:, 0:2][box[:, 0:2]<0] = 0
  57. box[:, 2][box[:, 2]>w] = w
  58. box[:, 3][box[:, 3]>h] = h
  59. box_w = box[:, 2] - box[:, 0]
  60. box_h = box[:, 3] - box[:, 1]
  61. box = box[np.logical_and(box_w>1, box_h>1)]
  62. box_data = np.zeros((len(box),5))
  63. box_data[:len(box)] = box
  64. image_datas.append(image_data)
  65. box_datas.append(box_data)
  66. #将四张图片分割后拼接到一起
  67. cutx = int(w * min_offset_x)
  68. cuty = int(h * min_offset_y)
  69. new_image = np.zeros([h, w, 3])
  70. new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
  71. new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
  72. new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
  73. new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
  74. new_image = np.array(new_image, np.uint8)
  75. #对图像进行色域变换
  76. #计算色域变换的参数
  77. r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
  78. #将图像转换到HSV上
  79. hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
  80. dtype = new_image.dtype
  81. #进行变换
  82. x = np.arange(0, 256, dtype=r.dtype)
  83. lut_hue = ((x * r[0]) % 180).astype(dtype)
  84. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  85. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  86. new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
  87. new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
  88. #处理边框
  89. new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
  90. return new_image, new_boxes

4.2 数据格式转换(x1,y1,x2,y2—->x_c,y_c,w,h)

  1. # x/416,y/416归一化
  2. box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
  3. box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
  4. #x1,y1,x2,y2--->x_c,y_c,w,h
  5. box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
  6. box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2

4.3 特征层网格生成

  1. grid_y, grid_x = torch.meshgrid([torch.arange(h), torch.arange(w)])