一、mosaic
- 1.1 主函数
- 1.2 get_random_data_with_Mosaic函数

一、mosaic

通过随机缩放、随机裁剪、随机排布的方式进行拼接，提升对小目标的检测效果。

1.1 主函数

def __getitem__(self, index):
    index = index % self.length
    #---------------------------------------------------#
    #   训练时进行数据的随机增强
    #   推理时不进行数据的随机增强
    #---------------------------------------------------#
    if self.mosaic:
        if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:
            #随机选取张图片的标签信息
            lines = sample(self.annotation_lines, 3)
            #将本次index的图片加入列表，总共四张图片进行mosaic
            lines.append(self.annotation_lines[index])
            #随机打乱顺序
            shuffle(lines)
            image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)
        else:
            image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
        else:
            image, box      = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
            image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
            box         = np.array(box, dtype=np.float32)
            if len(box) != 0:
                box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
                box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return image, box

1.2 get_random_data_with_Mosaic函数

输入：四张图片的标签信息，输入尺寸
输出：图像和真实框坐标

image, box  = self.get_random_data_with_Mosaic(lines, self.input_shape)

def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
    #640,640
    h, w = input_shape
    #随机选取最小偏置
    min_offset_x = self.rand(0.3, 0.7)
    min_offset_y = self.rand(0.3, 0.7)
    image_datas = [] 
    box_datas   = []
    index       = 0
    #选取一张图片的标签
    for line in annotation_line:
        #---------------------------------#
        #   每一行进行分割
        #    分割为两部分，第一部分为图片存储地址，第二部分为真实框坐标和类别
        #---------------------------------#
        line_content = line.split()
        #---------------------------------#
        #   打开图片
        #---------------------------------#
        image = Image.open(line_content[0])
        image = cvtColor(image)
        #---------------------------------#
        #   图片的大小
        #---------------------------------#
        iw, ih = image.size
        #---------------------------------#
        #   保存框的位置和类别
        #---------------------------------#
        box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
        #---------------------------------#
        #   是否翻转图片
        #---------------------------------#
        flip = self.rand()<.5
        if flip and len(box)>0:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)
            #左右翻转x坐标
            box[:, [0,2]] = iw - box[:, [2,0]]
        #------------------------------------------#
        #   对图像进行缩放并且进行长和宽的扭曲
        #------------------------------------------#
        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
        scale = self.rand(.4, 1)
        if new_ar < 1:
            nh = int(scale*h)
            nw = int(nh*new_ar)
        else:
            nw = int(scale*w)
            nh = int(nw/new_ar)
        image = image.resize((nw, nh), Image.BICUBIC)
        #-----------------------------------------------#
        #   将图片进行放置，分别对应四张分割图片的位置
        #-----------------------------------------------#
        if index == 0:
            dx = int(w*min_offset_x) - nw
            dy = int(h*min_offset_y) - nh
        elif index == 1:
            dx = int(w*min_offset_x) - nw
            dy = int(h*min_offset_y)
        elif index == 2:
            dx = int(w*min_offset_x)
            dy = int(h*min_offset_y)
        elif index == 3:
            dx = int(w*min_offset_x)
            dy = int(h*min_offset_y) - nh
        new_image = Image.new('RGB', (w,h), (128,128,128))
        new_image.paste(image, (dx, dy))
        image_data = np.array(new_image)
        index = index + 1
        box_data = []
        #---------------------------------#
        #   对box进行重新处理
        #---------------------------------#
        if len(box)>0:
            np.random.shuffle(box)
            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
            #防止越界
            box[:, 0:2][box[:, 0:2]<0] = 0
            box[:, 2][box[:, 2]>w] = w
            box[:, 3][box[:, 3]>h] = h
            #计算宽高
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w>1, box_h>1)]
            box_data = np.zeros((len(box),5))
            box_data[:len(box)] = box
        image_datas.append(image_data)
        box_datas.append(box_data)
    #---------------------------------#
    #   将图片分割，放在一起
    #---------------------------------#
    cutx = int(w * min_offset_x)
    cuty = int(h * min_offset_y)
    new_image = np.zeros([h, w, 3])
    new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
    new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
    new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
    new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
    new_image       = np.array(new_image, np.uint8)
    #---------------------------------#
    #   对图像进行色域变换
    #   计算色域变换的参数
    #---------------------------------#
    r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
    #---------------------------------#
    #   将图像转到HSV上
    #---------------------------------#
    hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
    dtype           = new_image.dtype
    #---------------------------------#
    #   应用变换
    #---------------------------------#
    x       = np.arange(0, 256, dtype=r.dtype)
    lut_hue = ((x * r[0]) % 180).astype(dtype)
    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
    new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
    new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
    #---------------------------------#
    #   对框进行进一步的处理
    #---------------------------------#
    new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
    return new_image, new_boxes

CV 代码笔记

YOLOX数据增强代码（图像预处理）

一、mosaic

1.1 主函数

1.2 get_random_data_with_Mosaic函数