1.GPU
1.1 计算可用GPU个数
torch.cuda.device_count() #返回个数
1.2 选择训练设备
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#torch.cuda.is_available()用于判断是否有GPU
1.3 并行训练
torch.nn.DataParallel(model)
2. 获取文件数据
2.1 获取VOC数据集类别
def get_classes(classes_path):
#classes_path为存放类别文件的路径
with open(classes_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names, len(class_names)
2.2 获取先验框
def get_anchors(anchors_path):
'''loads the anchors from a file'''
with open(anchors_path, encoding='utf-8') as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
#将列表转为numpy,并reshape为(n,2)
anchors = np.array(anchors).reshape(-1, 2)
return anchors, len(anchors)
3. 网络结构
3.1 模型定义
class NET(nn.Module):
def __init__(self, anchors_mask, num_classes, pretrained = False):
super(Net, self).__init__()
def forward(self, x):
3.2 参数初始化
def weights_init(net, init_type='normal', init_gain = 0.02):
def init_func(m):
classname = m.__class__.__name__
if hasattr(m, 'weight') and classname.find('Conv') != -1:
if init_type == 'normal':
torch.nn.init.normal_(m.weight.data, 0.0, init_gain)
elif init_type == 'xavier':
torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain)
elif init_type == 'kaiming':
torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
elif init_type == 'orthogonal':
torch.nn.init.orthogonal_(m.weight.data, gain=init_gain)
else:
raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)
print('initialize network with %s type' % init_type)
#使用init_func递归的应用于net的子模型
net.apply(init_func)
3.3 模型参数加载
model_dict = model.state_dict()
#加载模型参数到字典中
pretrained_dict = torch.load(pth, map_location=device)
#读取参数的键值对,并判断是否和网络结构的键相同
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
#更新参数
model_dict.update(pretrained_dict)
#加载参数到模型中
model.load_state_dict(model_dict)
3.4 参数冻结
for param in model.backbone.parameters():
param.requires_grad = False
4. 数据处理
4.1 mosaic数据增强
def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
h, w = input_shape #模型输入图片尺寸,[416,416]
min_offset_x = self.rand(0.3, 0.7)
min_offset_y = self.rand(0.3, 0.7)
image_datas = []
box_datas = []
index = 0
for line in annotation_line:#获取图片路径 标签[x1,y1,x2,y2,c]
line_content = line.split()
#PIL读取的图片时RGB格式
image = Image.open(line_content[0])
#同image = image.convert('RGB')(该操作是为了防止弧度图引起的错误)
image = cvtColor(image)
#原始图片的大小,用于图片缩放后,对label的处理
iw, ih = image.size
#将边框信息转为numpy数组形式[n,5]
box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])
#随机翻转
flip = self.rand()<.5
if flip and len(box)>0:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
box[:, [0,2]] = iw - box[:, [2,0]]
#随机resize
new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.4, 1)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#添加灰度条
if index == 0:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y) - nh
elif index == 1:
dx = int(w*min_offset_x) - nw
dy = int(h*min_offset_y)
elif index == 2:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y)
elif index == 3:
dx = int(w*min_offset_x)
dy = int(h*min_offset_y) - nh
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)
index = index + 1
box_data = []
#对边框信息进行对应的处理
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
image_datas.append(image_data)
box_datas.append(box_data)
#将四张图片分割后拼接到一起
cutx = int(w * min_offset_x)
cuty = int(h * min_offset_y)
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
new_image = np.array(new_image, np.uint8)
#对图像进行色域变换
#计算色域变换的参数
r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
#将图像转换到HSV上
hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
dtype = new_image.dtype
#进行变换
x = np.arange(0, 256, dtype=r.dtype)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)
#处理边框
new_boxes = self.merge_bboxes(box_datas, cutx, cuty)
return new_image, new_boxes
4.2 数据格式转换(x1,y1,x2,y2—->x_c,y_c,w,h)
# x/416,y/416归一化
box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
#x1,y1,x2,y2--->x_c,y_c,w,h
box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
4.3 特征层网格生成
grid_y, grid_x = torch.meshgrid([torch.arange(h), torch.arange(w)])