mmdet之YOLOv3源码阅读

优先级: 2

前言

https://github.com/open-mmlab/mmdetection/tree/v2.23.0

本文精读的是mmdetection v2.23.0版本的YOLOv3版本

YOLOv3继承于mmdet的

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图1

整体网络

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图2

Darknet53

darknet52 github入口

darknet53的网络结构

darknet53借鉴了resnet残差模块的思想,设计了darknet53

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图3

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图4

  1. arch_settings = {
  2. 53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),
  3. (512, 1024)))
  4. }
  1. @BACKBONES.register_module()
  2. class Darknet(BaseModule):
  3. """Darknet backbone.
  4. Args:
  5. depth (int): Depth of Darknet. Currently only support 53.
  6. out_indices (Sequence[int]): Output from which stages.
  7. frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
  8. -1 means not freezing any parameters. Default: -1.
  9. conv_cfg (dict): Config dict for convolution layer. Default: None.
  10. norm_cfg (dict): Dictionary to construct and config norm layer.
  11. Default: dict(type='BN', requires_grad=True)
  12. act_cfg (dict): Config dict for activation layer.
  13. Default: dict(type='LeakyReLU', negative_slope=0.1).
  14. norm_eval (bool): Whether to set norm layers to eval mode, namely,
  15. freeze running stats (mean and var). Note: Effect on Batch Norm
  16. and its variants only.
  17. pretrained (str, optional): model pretrained path. Default: None
  18. init_cfg (dict or list[dict], optional): Initialization config dict.
  19. Default: None
  20. Example:
  21. >>> from mmdet.models import Darknet
  22. >>> import torch
  23. >>> self = Darknet(depth=53)
  24. >>> self.eval()
  25. >>> inputs = torch.rand(1, 3, 416, 416)
  26. >>> level_outputs = self.forward(inputs)
  27. >>> for level_out in level_outputs:
  28. ... print(tuple(level_out.shape))
  29. ...
  30. (1, 256, 52, 52)
  31. (1, 512, 26, 26)
  32. (1, 1024, 13, 13)
  33. """
  34. # Dict(depth: (layers, channels))
  35. arch_settings = {
  36. 53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),
  37. (512, 1024)))
  38. }
  39. def __init__(self,
  40. depth=53,
  41. out_indices=(3, 4, 5),
  42. frozen_stages=-1,
  43. conv_cfg=None,
  44. norm_cfg=dict(type='BN', requires_grad=True),
  45. act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
  46. norm_eval=True,
  47. pretrained=None,
  48. init_cfg=None):
  49. super(Darknet, self).__init__(init_cfg)
  50. if depth not in self.arch_settings:
  51. raise KeyError(f'invalid depth {depth} for darknet')
  52. self.depth = depth
  53. self.out_indices = out_indices
  54. self.frozen_stages = frozen_stages
  55. self.layers, self.channels = self.arch_settings[depth]
  56. cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
  57. self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg)
  58. self.cr_blocks = ['conv1']
  59. for i, n_layers in enumerate(self.layers):
  60. layer_name = f'conv_res_block{i + 1}'
  61. in_c, out_c = self.channels[i]
  62. self.add_module(
  63. layer_name,
  64. self.make_conv_res_block(in_c, out_c, n_layers, **cfg))
  65. self.cr_blocks.append(layer_name)
  66. self.norm_eval = norm_eval
  67. assert not (init_cfg and pretrained), \
  68. 'init_cfg and pretrained cannot be specified at the same time'
  69. if isinstance(pretrained, str):
  70. warnings.warn('DeprecationWarning: pretrained is deprecated, '
  71. 'please use "init_cfg" instead')
  72. self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
  73. elif pretrained is None:
  74. if init_cfg is None:
  75. self.init_cfg = [
  76. dict(type='Kaiming', layer='Conv2d'),
  77. dict(
  78. type='Constant',
  79. val=1,
  80. layer=['_BatchNorm', 'GroupNorm'])
  81. ]
  82. else:
  83. raise TypeError('pretrained must be a str or None')
  84. def forward(self, x):
  85. outs = []
  86. for i, layer_name in enumerate(self.cr_blocks):
  87. cr_block = getattr(self, layer_name)
  88. x = cr_block(x)
  89. if i in self.out_indices:
  90. outs.append(x)
  91. return tuple(outs)
  92. def _freeze_stages(self):
  93. if self.frozen_stages >= 0:
  94. for i in range(self.frozen_stages):
  95. m = getattr(self, self.cr_blocks[i])
  96. m.eval()
  97. for param in m.parameters():
  98. param.requires_grad = False
  99. def train(self, mode=True):
  100. super(Darknet, self).train(mode)
  101. self._freeze_stages()
  102. if mode and self.norm_eval:
  103. for m in self.modules():
  104. if isinstance(m, _BatchNorm):
  105. m.eval()
  106. @staticmethod
  107. def make_conv_res_block(in_channels,
  108. out_channels,
  109. res_repeat,
  110. conv_cfg=None,
  111. norm_cfg=dict(type='BN', requires_grad=True),
  112. act_cfg=dict(type='LeakyReLU',
  113. negative_slope=0.1)):
  114. """In Darknet backbone, ConvLayer is usually followed by ResBlock. This
  115. function will make that. The Conv layers always have 3x3 filters with
  116. stride=2. The number of the filters in Conv layer is the same as the
  117. out channels of the ResBlock.
  118. Args:
  119. in_channels (int): The number of input channels.
  120. out_channels (int): The number of output channels.
  121. res_repeat (int): The number of ResBlocks.
  122. conv_cfg (dict): Config dict for convolution layer. Default: None.
  123. norm_cfg (dict): Dictionary to construct and config norm layer.
  124. Default: dict(type='BN', requires_grad=True)
  125. act_cfg (dict): Config dict for activation layer.
  126. Default: dict(type='LeakyReLU', negative_slope=0.1).
  127. """
  128. cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
  129. model = nn.Sequential()
  130. model.add_module(
  131. 'conv',
  132. ConvModule(
  133. in_channels, out_channels, 3, stride=2, padding=1, **cfg))
  134. for idx in range(res_repeat):
  135. model.add_module('res{}'.format(idx),
  136. ResBlock(out_channels, **cfg))
  137. return model

darknet在mmdet中的工程实现

mmdet中的DarkNet模块,继承于mmcv.runner.base_module.BaseModule

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图5

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图6

残差块的定义如下,先经过1x1的卷积通道数变为原来的一半,再经过3x3的卷积通道数变回来。

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图7

mmdet之YOLOv3源码阅读 425fcebc4b9047098e116bfb21302754 - 图8

resblock的gihub链接

  1. class ResBlock(BaseModule):
  2. """The basic residual block used in Darknet. Each ResBlock consists of two
  3. ConvModules and the input is added to the final output. Each ConvModule is
  4. composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer
  5. has half of the number of the filters as much as the second convLayer. The
  6. first convLayer has filter size of 1x1 and the second one has the filter
  7. size of 3x3.
  8. Args:
  9. in_channels (int): The input channels. Must be even.
  10. conv_cfg (dict): Config dict for convolution layer. Default: None.
  11. norm_cfg (dict): Dictionary to construct and config norm layer.
  12. Default: dict(type='BN', requires_grad=True)
  13. act_cfg (dict): Config dict for activation layer.
  14. Default: dict(type='LeakyReLU', negative_slope=0.1).
  15. init_cfg (dict or list[dict], optional): Initialization config dict.
  16. Default: None
  17. """
  18. def __init__(self,
  19. in_channels,
  20. conv_cfg=None,
  21. norm_cfg=dict(type='BN', requires_grad=True),
  22. act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
  23. init_cfg=None):
  24. super(ResBlock, self).__init__(init_cfg)
  25. assert in_channels % 2 == 0 # ensure the in_channels is even
  26. half_in_channels = in_channels // 2
  27. # shortcut
  28. cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
  29. self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg)
  30. self.conv2 = ConvModule(
  31. half_in_channels, in_channels, 3, padding=1, **cfg)
  32. def forward(self, x):
  33. residual = x
  34. out = self.conv1(x)
  35. out = self.conv2(out)
  36. out = out + residual
  37. return out

YoloV3的Neck

参考