一、mosaic
通过随机缩放、随机裁剪、随机排布的方式进行拼接,提升对小目标的检测效果。
1.1 主函数
def __getitem__(self, index):index = index % self.length#---------------------------------------------------## 训练时进行数据的随机增强# 推理时不进行数据的随机增强#---------------------------------------------------#if self.mosaic:if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:#随机选取张图片的标签信息lines = sample(self.annotation_lines, 3)#将本次index的图片加入列表,总共四张图片进行mosaiclines.append(self.annotation_lines[index])#随机打乱顺序shuffle(lines)image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)else:image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)else:image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))box = np.array(box, dtype=np.float32)if len(box) != 0:box[:, 2:4] = box[:, 2:4] - box[:, 0:2]box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2return image, box
1.2 get_random_data_with_Mosaic函数
输入:四张图片的标签信息,输入尺寸
输出:图像和真实框坐标
image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):#640,640h, w = input_shape#随机选取最小偏置min_offset_x = self.rand(0.3, 0.7)min_offset_y = self.rand(0.3, 0.7)image_datas = []box_datas = []index = 0#选取一张图片的标签for line in annotation_line:#---------------------------------## 每一行进行分割# 分割为两部分,第一部分为图片存储地址,第二部分为真实框坐标和类别#---------------------------------#line_content = line.split()#---------------------------------## 打开图片#---------------------------------#image = Image.open(line_content[0])image = cvtColor(image)#---------------------------------## 图片的大小#---------------------------------#iw, ih = image.size#---------------------------------## 保存框的位置和类别#---------------------------------#box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])#---------------------------------## 是否翻转图片#---------------------------------#flip = self.rand()<.5if flip and len(box)>0:image = image.transpose(Image.FLIP_LEFT_RIGHT)#左右翻转x坐标box[:, [0,2]] = iw - box[:, [2,0]]#------------------------------------------## 对图像进行缩放并且进行长和宽的扭曲#------------------------------------------#new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)scale = self.rand(.4, 1)if new_ar < 1:nh = int(scale*h)nw = int(nh*new_ar)else:nw = int(scale*w)nh = int(nw/new_ar)image = image.resize((nw, nh), Image.BICUBIC)#-----------------------------------------------## 将图片进行放置,分别对应四张分割图片的位置#-----------------------------------------------#if index == 0:dx = int(w*min_offset_x) - nwdy = int(h*min_offset_y) - nhelif index == 1:dx = int(w*min_offset_x) - nwdy = int(h*min_offset_y)elif index == 2:dx = int(w*min_offset_x)dy = int(h*min_offset_y)elif index == 3:dx = int(w*min_offset_x)dy = int(h*min_offset_y) - nhnew_image = Image.new('RGB', (w,h), (128,128,128))new_image.paste(image, (dx, dy))image_data = np.array(new_image)index = index + 1box_data = []#---------------------------------## 对box进行重新处理#---------------------------------#if len(box)>0:np.random.shuffle(box)box[:, [0,2]] = box[:, [0,2]]*nw/iw + dxbox[:, [1,3]] = box[:, [1,3]]*nh/ih + dy#防止越界box[:, 0:2][box[:, 0:2]<0] = 0box[:, 2][box[:, 2]>w] = wbox[:, 3][box[:, 3]>h] = h#计算宽高box_w = box[:, 2] - box[:, 0]box_h = box[:, 3] - box[:, 1]box = box[np.logical_and(box_w>1, box_h>1)]box_data = np.zeros((len(box),5))box_data[:len(box)] = boximage_datas.append(image_data)box_datas.append(box_data)#---------------------------------## 将图片分割,放在一起#---------------------------------#cutx = int(w * min_offset_x)cuty = int(h * min_offset_y)new_image = np.zeros([h, w, 3])new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]new_image = np.array(new_image, np.uint8)#---------------------------------## 对图像进行色域变换# 计算色域变换的参数#---------------------------------#r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1#---------------------------------## 将图像转到HSV上#---------------------------------#hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))dtype = new_image.dtype#---------------------------------## 应用变换#---------------------------------#x = np.arange(0, 256, dtype=r.dtype)lut_hue = ((x * r[0]) % 180).astype(dtype)lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)lut_val = np.clip(x * r[2], 0, 255).astype(dtype)new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)#---------------------------------## 对框进行进一步的处理#---------------------------------#new_boxes = self.merge_bboxes(box_datas, cutx, cuty)return new_image, new_boxes
