首先看config文件中,以Faster-RCNN为例,backbone的主要参数为:
backbone=dict(type='ResNet', #backbone名称depth=50, #网络的深度50 --> ResNet50num_stages=4, #4个残差块out_indices=(0, 1, 2, 3), #指输出哪几层卷积后的特征作为输出,0-C2,1-C3,2-C4,3-C5frozen_stages=1, # 第一层冻结,参数不参与训练style='pytorch')'''style 源码中的解释:If style is "pytorch", the stride-two layer is the 3x3 conv layer,if it is "caffe", the stride-two layer is the first 1x1 conv layer.'''
然后转到/mmdet/models/backbone/resnet.py中查看构建backbone:
class Bottleneck(nn.Module):expansion = 4 # 表示输出的channel的膨胀系数为4 64-->256def __init__(self,inplanes,planes,stride=1,dilation=1, # 空洞卷积:正常卷积核的空洞率为dilation=1downsample=None,style='pytorch',with_cp=False,conv_cfg=None,norm_cfg=dict(type='BN'),dcn=None, # 添加可变形卷积gcb=None, #gen_attention=None): # 添加注意力机制模块"""Bottleneck block for ResNet.If style is "pytorch", the stride-two layer is the 3x3 conv layer,if it is "caffe", the stride-two layer is the first 1x1 conv layer."""super(Bottleneck, self).__init__()assert style in ['pytorch', 'caffe']assert dcn is None or isinstance(dcn, dict)assert gcb is None or isinstance(gcb, dict)assert gen_attention is None or isinstance(gen_attention, dict)self.inplanes = inplanes # inputself.planes = planes # outputself.stride = strideself.dilation = dilationself.style = styleself.with_cp = with_cpself.conv_cfg = conv_cfg # 设定卷积核的一些参数self.norm_cfg = norm_cfg # 设定normlize层的参数:BN or GNself.dcn = dcnself.with_dcn = dcn is not Noneself.gcb = gcbself.with_gcb = gcb is not Noneself.gen_attention = gen_attentionself.with_gen_attention = gen_attention is not None
首先是一些基础参数;
"""Bottleneck block for ResNet.If style is "pytorch", the stride-two layer is the 3x3 conv layer,if it is "caffe", the stride-two layer is the first 1x1 conv layer."""if self.style == 'pytorch':self.conv1_stride = 1self.conv2_stride = strideelse:self.conv1_stride = strideself.conv2_stride = 1self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3)# 添加normlize层,一个残差块共三个conv层和三个norm层# build_norm_layer返回的是该norm层的名字’str‘与其 'nn.model'的对象self.conv1 = build_conv_layer(conv_cfg, # conv_cfg = None build常规卷积核inplanes,planes,kernel_size=1,stride=self.conv1_stride,bias=False)self.add_module(self.norm1_name, norm1) # 将norm对象赋值给self.norm1_namefallback_on_stride = Falseif self.with_dcn: # 是否添加可变形卷积fallback_on_stride = dcn.pop('fallback_on_stride', False)if not self.with_dcn or fallback_on_stride:self.conv2 = build_conv_layer(conv_cfg,planes,planes,kernel_size=3,stride=self.conv2_stride,padding=dilation,dilation=dilation,bias=False)else: # 添加常规卷积核assert self.conv_cfg is None, 'conv_cfg cannot be None for DCN'self.conv2 = build_conv_layer(dcn,planes,planes,kernel_size=3,stride=self.conv2_stride,padding=dilation,dilation=dilation,bias=False)self.add_module(self.norm2_name, norm2)self.conv3 = build_conv_layer(conv_cfg,planes,planes * self.expansion,kernel_size=1,bias=False)self.add_module(self.norm3_name, norm3)self.relu = nn.ReLU(inplace=True)self.downsample = downsampleif self.with_gcb:gcb_inplanes = planes * self.expansionself.context_block = ContextBlock(inplanes=gcb_inplanes, **gcb)# gen_attentionif self.with_gen_attention:self.gen_attention_block = GeneralizedAttention(planes, **gen_attention)
将残差块搭建完成;
@propertydef norm1(self):return getattr(self, self.norm1_name)@propertydef norm2(self):return getattr(self, self.norm2_name)@propertydef norm3(self):return getattr(self, self.norm3_name)
@property装饰器就是负责把一个方法变成属性调用的;**getattr()** 函数用于返回一个对象属性值。
# 每一个残差块的forward前向传播def forward(self, x):def _inner_forward(x):identity = xout = self.conv1(x)out = self.norm1(out)out = self.relu(out)out = self.conv2(out)out = self.norm2(out)out = self.relu(out)if self.with_gen_attention:out = self.gen_attention_block(out)out = self.conv3(out)out = self.norm3(out)if self.with_gcb:out = self.context_block(out)if self.downsample is not None:identity = self.downsample(x)out += identityreturn outif self.with_cp and x.requires_grad:out = cp.checkpoint(_inner_forward, x)else:out = _inner_forward(x)out = self.relu(out)return out
def make_res_layer(block,inplanes,planes,blocks,stride=1,dilation=1,style='pytorch',with_cp=False,conv_cfg=None,norm_cfg=dict(type='BN'),dcn=None,gcb=None,gen_attention=None,gen_attention_blocks=[]):downsample = Noneif stride != 1 or inplanes != planes * block.expansion:downsample = nn.Sequential(build_conv_layer(conv_cfg,inplanes,planes * block.expansion,kernel_size=1,stride=stride,bias=False),build_norm_layer(norm_cfg, planes * block.expansion)[1],)layers = []layers.append(block(inplanes=inplanes,planes=planes,stride=stride,dilation=dilation,downsample=downsample,style=style,with_cp=with_cp,conv_cfg=conv_cfg,norm_cfg=norm_cfg,dcn=dcn,gcb=gcb,gen_attention=gen_attention if(0 in gen_attention_blocks) else None))inplanes = planes * block.expansionfor i in range(1, blocks):layers.append(block(inplanes=inplanes,planes=planes,stride=1,dilation=dilation,style=style,with_cp=with_cp,conv_cfg=conv_cfg,norm_cfg=norm_cfg,dcn=dcn,gcb=gcb,gen_attention=gen_attention if(i in gen_attention_blocks) else None))return nn.Sequential(*layers)
下面是最主要的构建ResNet网络的代码块:
@BACKBONES.register_moduleclass ResNet(nn.Module):"""ResNet backbone.Args:depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.in_channels (int): Number of input image channels. Normally 3.num_stages (int): Resnet stages, normally 4.strides (Sequence[int]): Strides of the first block of each stage.dilations (Sequence[int]): Dilation of each stage.out_indices (Sequence[int]): Output from which stages.style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-twolayer is the 3x3 conv layer, otherwise the stride-two layer isthe first 1x1 conv layer.frozen_stages (int): Stages to be frozen (stop grad and set eval mode).-1 means not freezing any parameters.norm_cfg (dict): dictionary to construct and config norm layer.norm_eval (bool): Whether to set norm layers to eval mode, namely,freeze running stats (mean and var). Note: Effect on Batch Normand its variants only.with_cp (bool): Use checkpoint or not. Using checkpoint will save somememory while slowing down the training speed.zero_init_residual (bool): whether to use zero init for last norm layerin resblocks to let them behave as identity.Example:>>> from mmdet.models import ResNet>>> import torch>>> self = ResNet(depth=18)>>> self.eval()>>> inputs = torch.rand(1, 3, 32, 32)>>> level_outputs = self.forward(inputs)>>> for level_out in level_outputs:... print(tuple(level_out.shape))(1, 64, 8, 8)(1, 128, 4, 4)(1, 256, 2, 2)(1, 512, 1, 1)"""arch_settings = {18: (BasicBlock, (2, 2, 2, 2)),34: (BasicBlock, (3, 4, 6, 3)),50: (Bottleneck, (3, 4, 6, 3)),101: (Bottleneck, (3, 4, 23, 3)),152: (Bottleneck, (3, 8, 36, 3))}def __init__(self,depth,in_channels=3,num_stages=4,strides=(1, 2, 2, 2),dilations=(1, 1, 1, 1),out_indices=(0, 1, 2, 3),style='pytorch',frozen_stages=-1,conv_cfg=None,norm_cfg=dict(type='BN', requires_grad=True),norm_eval=True,dcn=None,stage_with_dcn=(False, False, False, False),gcb=None,stage_with_gcb=(False, False, False, False),gen_attention=None,stage_with_gen_attention=((), (), (), ()),with_cp=False,zero_init_residual=True):super(ResNet, self).__init__()if depth not in self.arch_settings:raise KeyError('invalid depth {} for resnet'.format(depth))self.depth = depthself.num_stages = num_stagesassert num_stages >= 1 and num_stages <= 4self.strides = stridesself.dilations = dilationsassert len(strides) == len(dilations) == num_stagesself.out_indices = out_indicesassert max(out_indices) < num_stagesself.style = styleself.frozen_stages = frozen_stagesself.conv_cfg = conv_cfgself.norm_cfg = norm_cfgself.with_cp = with_cpself.norm_eval = norm_evalself.dcn = dcnself.stage_with_dcn = stage_with_dcnif dcn is not None:assert len(stage_with_dcn) == num_stagesself.gen_attention = gen_attentionself.gcb = gcbself.stage_with_gcb = stage_with_gcbif gcb is not None:assert len(stage_with_gcb) == num_stagesself.zero_init_residual = zero_init_residualself.block, stage_blocks = self.arch_settings[depth]self.stage_blocks = stage_blocks[:num_stages]self.inplanes = 64self._make_stem_layer(in_channels) # channel:3 --> 64# stem_layer,进入残差块之前需要对图片进行一次卷及操作self.res_layers = []for i, num_blocks in enumerate(self.stage_blocks):stride = strides[i]dilation = dilations[i]dcn = self.dcn if self.stage_with_dcn[i] else Nonegcb = self.gcb if self.stage_with_gcb[i] else Noneplanes = 64 * 2**ires_layer = make_res_layer(self.block,self.inplanes,planes,num_blocks,stride=stride,dilation=dilation,style=self.style,with_cp=with_cp,conv_cfg=conv_cfg,norm_cfg=norm_cfg,dcn=dcn,gcb=gcb,gen_attention=gen_attention,gen_attention_blocks=stage_with_gen_attention[i])self.inplanes = planes * self.block.expansionlayer_name = 'layer{}'.format(i + 1)self.add_module(layer_name, res_layer)self.res_layers.append(layer_name)# 将4stage的残差块添加到res_layer的list中self._freeze_stages()self.feat_dim = self.block.expansion * 64 * 2**(len(self.stage_blocks) - 1)@propertydef norm1(self):return getattr(self, self.norm1_name)def _make_stem_layer(self, in_channels):self.conv1 = build_conv_layer(self.conv_cfg,in_channels,64,kernel_size=7,stride=2,padding=3,bias=False)self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)self.add_module(self.norm1_name, norm1)self.relu = nn.ReLU(inplace=True)self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)# frezze某些层的参数不参与训练def _freeze_stages(self):if self.frozen_stages >= 0:self.norm1.eval()for m in [self.conv1, self.norm1]:for param in m.parameters():param.requires_grad = Falsefor i in range(1, self.frozen_stages + 1):m = getattr(self, 'layer{}'.format(i))m.eval()for param in m.parameters():param.requires_grad = Falsedef init_weights(self, pretrained=None):if isinstance(pretrained, str):logger = get_root_logger()load_checkpoint(self, pretrained, strict=False, logger=logger)elif pretrained is None:for m in self.modules():if isinstance(m, nn.Conv2d):kaiming_init(m)elif isinstance(m, (_BatchNorm, nn.GroupNorm)):constant_init(m, 1)if self.dcn is not None:for m in self.modules():if isinstance(m, Bottleneck) and hasattr(m, 'conv2_offset'):constant_init(m.conv2_offset, 0)if self.zero_init_residual:for m in self.modules():if isinstance(m, Bottleneck):constant_init(m.norm3, 0)elif isinstance(m, BasicBlock):constant_init(m.norm2, 0)else:raise TypeError('pretrained must be a str or None')def forward(self, x):x = self.conv1(x)x = self.norm1(x)x = self.relu(x)x = self.maxpool(x)outs = []for i, layer_name in enumerate(self.res_layers):res_layer = getattr(self, layer_name)x = res_layer(x)if i in self.out_indices:outs.append(x)return tuple(outs)def train(self, mode=True):super(ResNet, self).train(mode)self._freeze_stages()if mode and self.norm_eval:for m in self.modules():# trick: eval have effect on BatchNorm onlyif isinstance(m, _BatchNorm):m.eval()
