1.GPU

1.1 计算可用GPU个数

torch.cuda.device_count() #返回个数

1.2 选择训练设备

device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#torch.cuda.is_available()用于判断是否有GPU

1.3 并行训练

torch.nn.DataParallel(model)

2. 获取文件数据

2.1 获取VOC数据集类别

def get_classes(classes_path):
    #classes_path为存放类别文件的路径
    with open(classes_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names, len(class_names)

2.2 获取先验框

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path, encoding='utf-8') as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    #将列表转为numpy，并reshape为(n,2)
    anchors = np.array(anchors).reshape(-1, 2)
    return anchors, len(anchors)

3. 网络结构

3.1 模型定义

class NET(nn.Module):
    def __init__(self, anchors_mask, num_classes, pretrained = False):
        super(Net, self).__init__()
    def forward(self, x):

3.2 参数初始化

def weights_init(net, init_type='normal', init_gain = 0.02):
    def init_func(m):
        classname = m.__class__.__name__
        if hasattr(m, 'weight') and classname.find('Conv') != -1:
            if init_type == 'normal':
                torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
            elif init_type == 'xavier':
                torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
            elif init_type == 'kaiming':
                torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
            elif init_type == 'orthogonal':
                torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
            else:
                raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
        elif classname.find('BatchNorm2d') != -1:
            torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
            torch.nn.init.constant_(m.bias.data, 0.0)
    print('initialize network with %s type' % init_type)
    #使用init_func递归的应用于net的子模型
    net.apply(init_func)

3.3 模型参数加载

model_dict = model.state_dict()
#加载模型参数到字典中
pretrained_dict = torch.load(pth, map_location=device)
#读取参数的键值对，并判断是否和网络结构的键相同
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
#更新参数
model_dict.update(pretrained_dict)
#加载参数到模型中
model.load_state_dict(model_dict)

3.4 参数冻结

for param in model.backbone.parameters():
    param.requires_grad = False

4. 数据处理

4.1 mosaic数据增强

def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
        h, w = input_shape #模型输入图片尺寸，[416,416]
        min_offset_x = self.rand(0.3, 0.7)
        min_offset_y = self.rand(0.3, 0.7)
        image_datas = [] 
        box_datas   = []
        index       = 0
        for line in annotation_line:#获取图片路径 标签[x1,y1,x2,y2,c]
            line_content = line.split()
            #PIL读取的图片时RGB格式
            image = Image.open(line_content[0])
            #同image = image.convert('RGB')(该操作是为了防止弧度图引起的错误)
            image = cvtColor(image)
            #原始图片的大小，用于图片缩放后，对label的处理
            iw, ih = image.size
            #将边框信息转为numpy数组形式[n,5]
            box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
            #随机翻转
            flip = self.rand()<.5
            if flip and len(box)>0:
                image = image.transpose(Image.FLIP_LEFT_RIGHT)
                box[:, [0,2]] = iw - box[:, [2,0]]
            #随机resize
            new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
            scale = self.rand(.4, 1)
            if new_ar < 1:
                nh = int(scale*h)
                nw = int(nh*new_ar)
            else:
                nw = int(scale*w)
                nh = int(nw/new_ar)
            image = image.resize((nw, nh), Image.BICUBIC)
            #添加灰度条
            if index == 0:
                dx = int(w*min_offset_x) - nw
                dy = int(h*min_offset_y) - nh
            elif index == 1:
                dx = int(w*min_offset_x) - nw
                dy = int(h*min_offset_y)
            elif index == 2:
                dx = int(w*min_offset_x)
                dy = int(h*min_offset_y)
            elif index == 3:
                dx = int(w*min_offset_x)
                dy = int(h*min_offset_y) - nh
            new_image = Image.new('RGB', (w,h), (128,128,128))
            new_image.paste(image, (dx, dy))
            image_data = np.array(new_image)
            index = index + 1
            box_data = []
            #对边框信息进行对应的处理
            if len(box)>0:
                np.random.shuffle(box)
                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
                box[:, 0:2][box[:, 0:2]<0] = 0
                box[:, 2][box[:, 2]>w] = w
                box[:, 3][box[:, 3]>h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w>1, box_h>1)]
                box_data = np.zeros((len(box),5))
                box_data[:len(box)] = box
            image_datas.append(image_data)
            box_datas.append(box_data)
        #将四张图片分割后拼接到一起
        cutx = int(w * min_offset_x)
        cuty = int(h * min_offset_y)
        new_image = np.zeros([h, w, 3])
        new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
        new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
        new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
        new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
        new_image       = np.array(new_image, np.uint8)
        #对图像进行色域变换
        #计算色域变换的参数
        r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
        #将图像转换到HSV上
        hue, sat, val   = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
        dtype           = new_image.dtype
        #进行变换
        x       = np.arange(0, 256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
        lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
        new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
        #处理边框
        new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
        return new_image, new_boxes

4.2 数据格式转换（x1,y1,x2,y2—->x_c,y_c,w,h）

# x/416,y/416归一化
box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
#x1,y1,x2,y2--->x_c,y_c,w,h
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2

4.3 特征层网格生成

grid_y, grid_x  = torch.meshgrid([torch.arange(h), torch.arange(w)])

CV 代码笔记

训练常用代码

1.GPU

1.1 计算可用GPU个数

1.2 选择训练设备

1.3 并行训练

2. 获取文件数据

2.1 获取VOC数据集类别

2.2 获取先验框

3. 网络结构

3.1 模型定义

3.2 参数初始化

3.3 模型参数加载

3.4 参数冻结

4. 数据处理

4.1 mosaic数据增强

4.2 数据格式转换（x1,y1,x2,y2—->x_c,y_c,w,h）

4.3 特征层网格生成