1.GPU
1.1 计算可用GPU个数
torch.cuda.device_count() #返回个数
1.2 选择训练设备
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')#torch.cuda.is_available()用于判断是否有GPU
1.3 并行训练
torch.nn.DataParallel(model)
2. 获取文件数据
2.1 获取VOC数据集类别
def get_classes(classes_path): #classes_path为存放类别文件的路径 with open(classes_path, encoding='utf-8') as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names, len(class_names)
2.2 获取先验框
def get_anchors(anchors_path): '''loads the anchors from a file''' with open(anchors_path, encoding='utf-8') as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] #将列表转为numpy,并reshape为(n,2) anchors = np.array(anchors).reshape(-1, 2) return anchors, len(anchors)
3. 网络结构
3.1 模型定义
class NET(nn.Module): def __init__(self, anchors_mask, num_classes, pretrained = False): super(Net, self).__init__() def forward(self, x):
3.2 参数初始化
def weights_init(net, init_type='normal', init_gain = 0.02): def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and classname.find('Conv') != -1: if init_type == 'normal': torch.nn.init.normal_(m.weight.data, 0.0, init_gain) elif init_type == 'xavier': torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain) elif init_type == 'kaiming': torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': torch.nn.init.orthogonal_(m.weight.data, gain=init_gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) elif classname.find('BatchNorm2d') != -1: torch.nn.init.normal_(m.weight.data, 1.0, 0.02) torch.nn.init.constant_(m.bias.data, 0.0) print('initialize network with %s type' % init_type) #使用init_func递归的应用于net的子模型 net.apply(init_func)
3.3 模型参数加载
model_dict = model.state_dict()#加载模型参数到字典中pretrained_dict = torch.load(pth, map_location=device)#读取参数的键值对,并判断是否和网络结构的键相同pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}#更新参数model_dict.update(pretrained_dict)#加载参数到模型中model.load_state_dict(model_dict)
3.4 参数冻结
for param in model.backbone.parameters(): param.requires_grad = False
4. 数据处理
4.1 mosaic数据增强
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4): h, w = input_shape #模型输入图片尺寸,[416,416] min_offset_x = self.rand(0.3, 0.7) min_offset_y = self.rand(0.3, 0.7) image_datas = [] box_datas = [] index = 0 for line in annotation_line:#获取图片路径 标签[x1,y1,x2,y2,c] line_content = line.split() #PIL读取的图片时RGB格式 image = Image.open(line_content[0]) #同image = image.convert('RGB')(该操作是为了防止弧度图引起的错误) image = cvtColor(image) #原始图片的大小,用于图片缩放后,对label的处理 iw, ih = image.size #将边框信息转为numpy数组形式[n,5] box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]]) #随机翻转 flip = self.rand()<.5 if flip and len(box)>0: image = image.transpose(Image.FLIP_LEFT_RIGHT) box[:, [0,2]] = iw - box[:, [2,0]] #随机resize new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter) scale = self.rand(.4, 1) if new_ar < 1: nh = int(scale*h) nw = int(nh*new_ar) else: nw = int(scale*w) nh = int(nw/new_ar) image = image.resize((nw, nh), Image.BICUBIC) #添加灰度条 if index == 0: dx = int(w*min_offset_x) - nw dy = int(h*min_offset_y) - nh elif index == 1: dx = int(w*min_offset_x) - nw dy = int(h*min_offset_y) elif index == 2: dx = int(w*min_offset_x) dy = int(h*min_offset_y) elif index == 3: dx = int(w*min_offset_x) dy = int(h*min_offset_y) - nh new_image = Image.new('RGB', (w,h), (128,128,128)) new_image.paste(image, (dx, dy)) image_data = np.array(new_image) index = index + 1 box_data = [] #对边框信息进行对应的处理 if len(box)>0: np.random.shuffle(box) box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy box[:, 0:2][box[:, 0:2]<0] = 0 box[:, 2][box[:, 2]>w] = w box[:, 3][box[:, 3]>h] = h box_w = box[:, 2] - box[:, 0] box_h = box[:, 3] - box[:, 1] box = box[np.logical_and(box_w>1, box_h>1)] box_data = np.zeros((len(box),5)) box_data[:len(box)] = box image_datas.append(image_data) box_datas.append(box_data) #将四张图片分割后拼接到一起 cutx = int(w * min_offset_x) cuty = int(h * min_offset_y) new_image = np.zeros([h, w, 3]) new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :] new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :] new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :] new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :] new_image = np.array(new_image, np.uint8) #对图像进行色域变换 #计算色域变换的参数 r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 #将图像转换到HSV上 hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV)) dtype = new_image.dtype #进行变换 x = np.arange(0, 256, dtype=r.dtype) lut_hue = ((x * r[0]) % 180).astype(dtype) lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB) #处理边框 new_boxes = self.merge_bboxes(box_datas, cutx, cuty) return new_image, new_boxes
4.2 数据格式转换(x1,y1,x2,y2—->x_c,y_c,w,h)
# x/416,y/416归一化box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]#x1,y1,x2,y2--->x_c,y_c,w,hbox[:, 2:4] = box[:, 2:4] - box[:, 0:2]box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
4.3 特征层网格生成
grid_y, grid_x = torch.meshgrid([torch.arange(h), torch.arange(w)])