github-yolov3
    models.py

    1. import os
    2. import torch.nn.functional as F
    3. from utils.parse_config import *
    4. from utils.utils import *
    5. ONNX_EXPORT = False
    6. def create_modules(module_defs):
    7. """
    8. Constructs module list of layer blocks from module configuration in module_defs
    9. """
    10. hyperparams = module_defs.pop(0)
    11. output_filters = [int(hyperparams['channels'])]
    12. module_list = nn.ModuleList()
    13. yolo_index = -1
    14. for i, module_def in enumerate(module_defs):
    15. modules = nn.Sequential()
    16. if module_def['type'] == 'convolutional':
    17. bn = int(module_def['batch_normalize'])
    18. filters = int(module_def['filters'])
    19. kernel_size = int(module_def['size'])
    20. pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
    21. modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
    22. out_channels=filters,
    23. kernel_size=kernel_size,
    24. stride=int(module_def['stride']),
    25. padding=pad,
    26. bias=not bn))
    27. if bn:
    28. modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
    29. if module_def['activation'] == 'leaky':
    30. modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1, inplace=True))
    31. elif module_def['type'] == 'maxpool':
    32. kernel_size = int(module_def['size'])
    33. stride = int(module_def['stride'])
    34. if kernel_size == 2 and stride == 1:
    35. modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1)))
    36. maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
    37. modules.add_module('maxpool_%d' % i, maxpool)
    38. elif module_def['type'] == 'upsample':
    39. upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
    40. modules.add_module('upsample_%d' % i, upsample)
    41. elif module_def['type'] == 'route':
    42. layers = [int(x) for x in module_def['layers'].split(',')]
    43. filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
    44. modules.add_module('route_%d' % i, EmptyLayer())
    45. elif module_def['type'] == 'shortcut':
    46. filters = output_filters[int(module_def['from'])]
    47. modules.add_module('shortcut_%d' % i, EmptyLayer())
    48. elif module_def['type'] == 'yolo':
    49. yolo_index += 1
    50. anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
    51. # Extract anchors
    52. anchors = [float(x) for x in module_def['anchors'].split(',')]
    53. anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
    54. anchors = [anchors[i] for i in anchor_idxs]
    55. nc = int(module_def['classes']) # number of classes
    56. img_size = hyperparams['height']
    57. # Define detection layer
    58. modules.add_module('yolo_%d' % i, YOLOLayer(anchors, nc, img_size, yolo_index))
    59. # Register module list and number of output filters
    60. module_list.append(modules)
    61. output_filters.append(filters)
    62. return hyperparams, module_list
    63. class EmptyLayer(nn.Module):
    64. """Placeholder for 'route' and 'shortcut' layers"""
    65. def __init__(self):
    66. super(EmptyLayer, self).__init__()
    67. def forward(self, x):
    68. return x


    下面是比较重要的yolo层

    1. class YOLOLayer(nn.Module):
    2. def __init__(self, anchors, nc, img_size, yolo_index):
    3. super(YOLOLayer, self).__init__()
    4. self.anchors = torch.Tensor(anchors)
    5. self.na = len(anchors) # number of anchors (3)
    6. self.nc = nc # number of classes (80)
    7. self.nx = 0 # initialize number of x gridpoints
    8. self.ny = 0 # initialize number of y gridpoints
    9. if ONNX_EXPORT: # grids must be computed in __init__
    10. stride = [32, 16, 8][yolo_index] # stride of this layer
    11. nx = int(img_size[1] / stride) # number x grid points
    12. ny = int(img_size[0] / stride) # number y grid points
    13. create_grids(self, max(img_size), (nx, ny))
    14. def forward(self, p, img_size, var=None):
    15. if ONNX_EXPORT:
    16. bs = 1 # batch size
    17. else:
    18. bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1]
    19. if (self.nx, self.ny) != (nx, ny):
    20. create_grids(self, img_size, (nx, ny), p.device)
    21. # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
    22. p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
    23. if self.training:
    24. return p
    25. elif ONNX_EXPORT:
    26. # Constants CAN NOT BE BROADCAST, ensure correct shape!
    27. ngu = self.ng.repeat((1, self.na * self.nx * self.ny, 1))
    28. grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2))
    29. anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / ngu
    30. # p = p.view(-1, 5 + self.nc)
    31. # xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0] # x, y
    32. # wh = torch.exp(p[..., 2:4]) * anchor_wh[0] # width, height
    33. # p_conf = torch.sigmoid(p[:, 4:5]) # Conf
    34. # p_cls = F.softmax(p[:, 5:85], 1) * p_conf # SSD-like conf
    35. # return torch.cat((xy / ngu[0], wh, p_conf, p_cls), 1).t()
    36. p = p.view(1, -1, 5 + self.nc)
    37. xy = torch.sigmoid(p[..., 0:2]) + grid_xy # x, y
    38. wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height
    39. p_conf = torch.sigmoid(p[..., 4:5]) # Conf
    40. p_cls = p[..., 5:5 + self.nc]
    41. # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
    42. # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf
    43. p_cls = torch.exp(p_cls).permute((2, 1, 0))
    44. p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent
    45. p_cls = p_cls.permute(2, 1, 0)
    46. return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
    47. else: # inference
    48. io = p.clone() # inference output
    49. io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy
    50. io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
    51. # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method
    52. io[..., 4:] = torch.sigmoid(io[..., 4:]) # p_conf, p_cls
    53. # io[..., 5:] = F.softmax(io[..., 5:], dim=4) # p_cls
    54. io[..., :4] *= self.stride
    55. if self.nc == 1:
    56. io[..., 5] = 1 # single-class model https://github.com/ultralytics/yolov3/issues/235
    57. # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
    58. return io.view(bs, -1, 5 + self.nc), p

    p: (bs, 255, 13, 13)->(bs, 3, 13, 13, 85)

    1. if self.training:
      return p
    2. else:
      对p做变形处理
      io = p
      io: (bs, 3, 13, 13, 85)->(bs, 507, 85) + 数据的变形(xy = sigmoid(xy)+offset, wh = exp(wh) * anchor_wh), 85元素包括:xy, wh, objectness score(是目标的概率), 80种classes
      return io, p


      整个网络的前向传播如下文代码所示。

    class Darknet(nn.Module):
        """YOLOv3 object detection model"""
    
        def __init__(self, cfg, img_size=(416, 416)):
            super(Darknet, self).__init__()
    
            self.module_defs = parse_model_cfg(cfg)
            self.module_defs[0]['cfg'] = cfg
            self.module_defs[0]['height'] = img_size
            self.hyperparams, self.module_list = create_modules(self.module_defs)
            self.yolo_layers = get_yolo_layers(self)
    
            # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
            self.version = np.array([0, 2, 5], dtype=np.int32)  # (int32) version info: major, minor, revision
            self.seen = np.array([0], dtype=np.int64)  # (int64) number of images seen during training
    
        def forward(self, x, var=None):
            img_size = max(x.shape[-2:])
            layer_outputs = []
            output = []
    
            for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
                mtype = module_def['type']
                if mtype in ['convolutional', 'upsample', 'maxpool']:
                    x = module(x)
                elif mtype == 'route':
                    layer_i = [int(x) for x in module_def['layers'].split(',')]
                    if len(layer_i) == 1:
                        x = layer_outputs[layer_i[0]]
                    else:
                        x = torch.cat([layer_outputs[i] for i in layer_i], 1)
                elif mtype == 'shortcut':
                    layer_i = int(module_def['from'])
                    x = layer_outputs[-1] + layer_outputs[layer_i]
                elif mtype == 'yolo':
                    x = module[0](x, img_size)
                    output.append(x)
                layer_outputs.append(x)
    
            if self.training:
                return output
            elif ONNX_EXPORT:
                output = torch.cat(output, 1)  # cat 3 layers 85 x (507, 2028, 8112) to 85 x 10647
                nc = self.module_list[self.yolo_layers[0]][0].nc  # number of classes
                return output[5:5 + nc].t(), output[:4].t()  # ONNX scores, boxes
            else:
                io, p = list(zip(*output))  # inference output, training output
                return torch.cat(io, 1), p
    
        def fuse(self):
            # Fuse Conv2d + BatchNorm2d layers throughout model
            fused_list = nn.ModuleList()
            for a in list(self.children())[0]:
                for i, b in enumerate(a):
                    if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
                        # fuse this bn layer with the previous conv2d layer
                        conv = a[i - 1]
                        fused = torch_utils.fuse_conv_and_bn(conv, b)
                        a = nn.Sequential(fused, *list(a.children())[i + 1:])
                        break
                fused_list.append(a)
            self.module_list = fused_list
            # model_info(self)  # yolov3-spp reduced from 225 to 152 layers
    

    分析一下上文的代码,研究整个网络的输出是啥

    1. if self.training
      output.append(YOLOLayer层的输出), yolo层的输出在训练时为(bs, 3, ng, ng, 85), ng是该层的特征图尺寸
      return output, output维度(3, pi), 其中pi(bs, 3, ng, ng, 85)
    2. else
      output.append(YOLOLayer层的输出), yolo层的输出包括:io(bs, ng*ng*3, 85), p(bs, 3, ng, ng, 85)
      io, p = list(zip(*output)), 做变形
      io = torch.cat(io, 1) # 从左往右拼接, 拼接以后的维度应该是(bs, 3549, 85)
      return io, p, p维度(3, pi), 其中pi(bs, 3, ng, ng, 85)


      辅助函数

    def get_yolo_layers(model):
        a = [module_def['type'] == 'yolo' for module_def in model.module_defs]
        return [i for i, x in enumerate(a) if x]  # [82, 94, 106] for yolov3
    
    
    def create_grids(self, img_size=416, ng=(13, 13), device='cpu'):
        nx, ny = ng  # x and y grid size
        self.img_size = img_size
        self.stride = img_size / max(ng)
    
        # build xy offsets
        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
        self.grid_xy = torch.stack((xv, yv), 2).to(device).float().view((1, 1, ny, nx, 2))
    
        # build wh gains
        self.anchor_vec = self.anchors.to(device) / self.stride
        self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device)
        self.ng = torch.Tensor(ng).to(device)
        self.nx = nx
        self.ny = ny
    

    这里对anchor_vec是什么及其维度做个分析,在compute_loss中有用到。

    module_def={} 
    module_def['anchors']="10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326" 
    module_def['mask']="1,2,3" 
    
    anchor_idxs = [int(x) for x in module_def['mask'].split(',')] 
    # Extract anchors 
    anchors = [float(x) for x in module_def['anchors'].split(',')] 
    anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] 
    anchors = [anchors[i] for i in anchor_idxs] 
    print(anchors) 
    '''
    anchors:
    [(16.0, 30.0), (33.0, 23.0), (30.0, 61.0)] 
    ''' 
    self.anchors = torch.Tensor(anchors) # torch.Size([3, 2]) 
    self.anchor_vec = self.anchors.to(device) / self.stride # torch.Size([3, 2])
    


    下面的代码挺重要的,写的是如何加载权重,如何保存权重。
    但由于时间关系,这部分先不看了,以后来补注释

    
    def load_darknet_weights(self, weights, cutoff=-1):
        # Parses and loads the weights stored in 'weights'
        # cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
        weights_file = weights.split(os.sep)[-1]
    
        # Try to download weights if not available locally
        if not os.path.isfile(weights):
            try:
                url = 'https://pjreddie.com/media/files/' + weights_file
                print('Downloading ' + url + ' to ' + weights)
                os.system('curl ' + url + ' -o ' + weights)
            except IOError:
                print(weights + ' not found.\nTry https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI')
    
        # Establish cutoffs
        if weights_file == 'darknet53.conv.74':
            cutoff = 75
        elif weights_file == 'yolov3-tiny.conv.15':
            cutoff = 15
    
        # Read weights file
        with open(weights, 'rb') as f:
            # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
            self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
            self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training
    
            weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
    
        ptr = 0
        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
            if module_def['type'] == 'convolutional':
                conv_layer = module[0]
                if module_def['batch_normalize']:
                    # Load BN bias, weights, running mean and running variance
                    bn_layer = module[1]
                    num_b = bn_layer.bias.numel()  # Number of biases
                    # Bias
                    bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
                    bn_layer.bias.data.copy_(bn_b)
                    ptr += num_b
                    # Weight
                    bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
                    bn_layer.weight.data.copy_(bn_w)
                    ptr += num_b
                    # Running Mean
                    bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
                    bn_layer.running_mean.data.copy_(bn_rm)
                    ptr += num_b
                    # Running Var
                    bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
                    bn_layer.running_var.data.copy_(bn_rv)
                    ptr += num_b
                else:
                    # Load conv. bias
                    num_b = conv_layer.bias.numel()
                    conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
                    conv_layer.bias.data.copy_(conv_b)
                    ptr += num_b
                # Load conv. weights
                num_w = conv_layer.weight.numel()
                conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
                conv_layer.weight.data.copy_(conv_w)
                ptr += num_w
    
        return cutoff
    
    
    def save_weights(self, path='model.weights', cutoff=-1):
        # Converts a PyTorch model to Darket format (*.pt to *.weights)
        # Note: Does not work if model.fuse() is applied
        with open(path, 'wb') as f:
            # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
            self.version.tofile(f)  # (int32) version info: major, minor, revision
            self.seen.tofile(f)  # (int64) number of images seen during training
    
            # Iterate through layers
            for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
                if module_def['type'] == 'convolutional':
                    conv_layer = module[0]
                    # If batch norm, load bn first
                    if module_def['batch_normalize']:
                        bn_layer = module[1]
                        bn_layer.bias.data.cpu().numpy().tofile(f)
                        bn_layer.weight.data.cpu().numpy().tofile(f)
                        bn_layer.running_mean.data.cpu().numpy().tofile(f)
                        bn_layer.running_var.data.cpu().numpy().tofile(f)
                    # Load conv bias
                    else:
                        conv_layer.bias.data.cpu().numpy().tofile(f)
                    # Load conv weights
                    conv_layer.weight.data.cpu().numpy().tofile(f)
    
    
    def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'):
        # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
        # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
    
        # Initialize model
        model = Darknet(cfg)
    
        # Load weights and save
        if weights.endswith('.pt'):  # if PyTorch format
            model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
            save_weights(model, path='converted.weights', cutoff=-1)
            print("Success: converted '%s' to 'converted.weights'" % weights)
    
        elif weights.endswith('.weights'):  # darknet format
            _ = load_darknet_weights(model, weights)
    
            chkpt = {'epoch': -1,
                     'best_fitness': None,
                     'training_results': None,
                     'model': model.state_dict(),
                     'optimizer': None}
    
            torch.save(chkpt, 'converted.pt')
            print("Success: converted '%s' to 'converted.pt'" % weights)
    
        else:
            print('Error: extension not supported.')