一、mosaic

通过随机缩放随机裁剪随机排布的方式进行拼接,提升对小目标的检测效果。
image.png

1.1 主函数

  1. def __getitem__(self, index):
  2. index = index % self.length
  3. #---------------------------------------------------#
  4. # 训练时进行数据的随机增强
  5. # 推理时不进行数据的随机增强
  6. #---------------------------------------------------#
  7. if self.mosaic:
  8. if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:
  9. #随机选取张图片的标签信息
  10. lines = sample(self.annotation_lines, 3)
  11. #将本次index的图片加入列表,总共四张图片进行mosaic
  12. lines.append(self.annotation_lines[index])
  13. #随机打乱顺序
  14. shuffle(lines)
  15. image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
  16. else:
  17. image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
  18. else:
  19. image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
  20. image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
  21. box = np.array(box, dtype=np.float32)
  22. if len(box) != 0:
  23. box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
  24. box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
  25. return image, box

1.2 get_random_data_with_Mosaic函数

输入:四张图片的标签信息,输入尺寸
输出:图像和真实框坐标

  1. image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
  1. def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
  2. #640,640
  3. h, w = input_shape
  4. #随机选取最小偏置
  5. min_offset_x = self.rand(0.3, 0.7)
  6. min_offset_y = self.rand(0.3, 0.7)
  7. image_datas = []
  8. box_datas = []
  9. index = 0
  10. #选取一张图片的标签
  11. for line in annotation_line:
  12. #---------------------------------#
  13. # 每一行进行分割
  14. # 分割为两部分,第一部分为图片存储地址,第二部分为真实框坐标和类别
  15. #---------------------------------#
  16. line_content = line.split()
  17. #---------------------------------#
  18. # 打开图片
  19. #---------------------------------#
  20. image = Image.open(line_content[0])
  21. image = cvtColor(image)
  22. #---------------------------------#
  23. # 图片的大小
  24. #---------------------------------#
  25. iw, ih = image.size
  26. #---------------------------------#
  27. # 保存框的位置和类别
  28. #---------------------------------#
  29. box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
  30. #---------------------------------#
  31. # 是否翻转图片
  32. #---------------------------------#
  33. flip = self.rand()<.5
  34. if flip and len(box)>0:
  35. image = image.transpose(Image.FLIP_LEFT_RIGHT)
  36. #左右翻转x坐标
  37. box[:, [0,2]] = iw - box[:, [2,0]]
  38. #------------------------------------------#
  39. # 对图像进行缩放并且进行长和宽的扭曲
  40. #------------------------------------------#
  41. new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
  42. scale = self.rand(.4, 1)
  43. if new_ar < 1:
  44. nh = int(scale*h)
  45. nw = int(nh*new_ar)
  46. else:
  47. nw = int(scale*w)
  48. nh = int(nw/new_ar)
  49. image = image.resize((nw, nh), Image.BICUBIC)
  50. #-----------------------------------------------#
  51. # 将图片进行放置,分别对应四张分割图片的位置
  52. #-----------------------------------------------#
  53. if index == 0:
  54. dx = int(w*min_offset_x) - nw
  55. dy = int(h*min_offset_y) - nh
  56. elif index == 1:
  57. dx = int(w*min_offset_x) - nw
  58. dy = int(h*min_offset_y)
  59. elif index == 2:
  60. dx = int(w*min_offset_x)
  61. dy = int(h*min_offset_y)
  62. elif index == 3:
  63. dx = int(w*min_offset_x)
  64. dy = int(h*min_offset_y) - nh
  65. new_image = Image.new('RGB', (w,h), (128,128,128))
  66. new_image.paste(image, (dx, dy))
  67. image_data = np.array(new_image)
  68. index = index + 1
  69. box_data = []
  70. #---------------------------------#
  71. # 对box进行重新处理
  72. #---------------------------------#
  73. if len(box)>0:
  74. np.random.shuffle(box)
  75. box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
  76. box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
  77. #防止越界
  78. box[:, 0:2][box[:, 0:2]<0] = 0
  79. box[:, 2][box[:, 2]>w] = w
  80. box[:, 3][box[:, 3]>h] = h
  81. #计算宽高
  82. box_w = box[:, 2] - box[:, 0]
  83. box_h = box[:, 3] - box[:, 1]
  84. box = box[np.logical_and(box_w>1, box_h>1)]
  85. box_data = np.zeros((len(box),5))
  86. box_data[:len(box)] = box
  87. image_datas.append(image_data)
  88. box_datas.append(box_data)
  89. #---------------------------------#
  90. # 将图片分割,放在一起
  91. #---------------------------------#
  92. cutx = int(w * min_offset_x)
  93. cuty = int(h * min_offset_y)
  94. new_image = np.zeros([h, w, 3])
  95. new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
  96. new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
  97. new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
  98. new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
  99. new_image = np.array(new_image, np.uint8)
  100. #---------------------------------#
  101. # 对图像进行色域变换
  102. # 计算色域变换的参数
  103. #---------------------------------#
  104. r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
  105. #---------------------------------#
  106. # 将图像转到HSV上
  107. #---------------------------------#
  108. hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
  109. dtype = new_image.dtype
  110. #---------------------------------#
  111. # 应用变换
  112. #---------------------------------#
  113. x = np.arange(0, 256, dtype=r.dtype)
  114. lut_hue = ((x * r[0]) % 180).astype(dtype)
  115. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  116. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  117. new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
  118. new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
  119. #---------------------------------#
  120. # 对框进行进一步的处理
  121. #---------------------------------#
  122. new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
  123. return new_image, new_boxes