symbol_builder.py这个脚本主要包括import_module,get_symbol_train,get_symbol三个函数,后面两个函数类似,可以从get_symbol_train函数开始看,这里面调用了import_module函数。

    1. from __future__ import absolute_import
    2. import mxnet as mx
    3. # common.py脚本中的这两个函数主要是用到定义新层和anchor的。
    4. from .common import multi_layer_feature, multibox_layer
    5. def import_module(module_name):
    6. """Helper function to import module"""
    7. import sys, os
    8. import importlib
    9. sys.path.append(os.path.dirname(__file__))
    10. # module_name只是一个str,这里调用importlib库的import_modole函数来导入指定名称的symbol脚本
    11. return importlib.import_module(module_name)
    12. def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
    13. sizes, ratios, normalizations=-1, steps=[], min_filter=128,
    14. nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    15. """Build network symbol for training SSD
    16. Parameters
    17. ----------
    18. network : str
    19. base network symbol name
    20. num_classes : int
    21. number of object classes not including background
    22. from_layers : list of str
    23. feature extraction layers, use '' for add extra layers
    24. For example:
    25. from_layers = ['relu4_3', 'fc7', '', '', '', '']
    26. which means extract feature from relu4_3 and fc7, adding 4 extra layers
    27. on top of fc7
    28. num_filters : list of int
    29. number of filters for extra layers, you can use -1 for extracted features,
    30. however, if normalization and scale is applied, the number of filter for
    31. that layer must be provided.
    32. For example:
    33. num_filters = [512, -1, 512, 256, 256, 256]
    34. strides : list of int
    35. strides for the 3x3 convolution appended, -1 can be used for extracted
    36. feature layers
    37. pads : list of int
    38. paddings for the 3x3 convolution, -1 can be used for extracted layers
    39. sizes : list or list of list
    40. [min_size, max_size] for all layers or [[], [], []...] for specific layers
    41. ratios : list or list of list
    42. [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    43. normalizations : int or list of int
    44. use normalizations value for all layers or [...] for specific layers,
    45. -1 indicate no normalizations and scales
    46. steps : list
    47. specify steps for each MultiBoxPrior layer, leave empty, it will calculate
    48. according to layer dimensions
    49. min_filter : int
    50. minimum number of filters used in 1x1 convolution
    51. nms_thresh : float
    52. non-maximum suppression threshold
    53. force_suppress : boolean
    54. whether suppress different class objects
    55. nms_topk : int
    56. apply NMS to top K detections
    57. Returns
    58. -------
    59. mx.Symbol
    60. """
    61. label = mx.sym.Variable('label')
    62. # 调用前面的import_module函数,network是一个str,比如代码里面默认是‘vgg16_reduced’,
    63. # 因此后面的这个get_symbol就是调用的symbol文件夹下面的vgg16_reduced.py脚本中的get_symbol函数来得到symbol,
    64. # 因此body的数据类型是Symbol,是vgg16网络结构的定义。
    65. body = import_module(network).get_symbol(num_classes, **kwargs)
    66. # 这里是调用symbol文件夹下的common.py脚本中的multi_layer_feature函数,生成的layers是一个列表,
    67. # 里面存放着包括relu4_3,relu7等6个symbol(这里是以输入图像大小为300*300的VGG16为例,所以是6个symbol)
    68. layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
    69. min_filter=min_filter)
    70. # 然后是调用commong.py脚本中的multibox_layer函数得到loc_preds,cls_preds,anchor_boxes三个symbol,
    71. # 这三个symbol分别表示回归层的计算,分类层的计算以及anchor是怎么生成和设计的。
    72. # 另外这个函数包含具体的取哪些层做特征融合以及怎么融合的过程。
    73. loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
    74. num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
    75. num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
    76. # mxnet.contrib.symbol.MultiBoxTarget的官方解释是用来Compute Multibox training targets,
    77. # 换句话说就是用来设定正负样本和处理模型预测值与真实值之间的关系。
    78. # 在输入中,anchor_boxes和cls_preds都是前面刚刚生成的symbol,维度都是3维,label也是一个symbol,
    79. # 是在该函数的第一行就定义的一个variable,在训练网络过程中这个label就是ground truth的信息。
    80. # overlap_threshold这个参数是用来判断哪些anchor是正样本的阈值,
    81. # 官方的介绍是:Anchor-GT overlap threshold to be regarded as a possitive match。
    82. # negative_mining_ratio表示max negative to positive samples ratio,也就是负样本和正样本的最大比例,
    83. # 之所以会有这个参数是因为一般数据中负样本的数量会远远大于正样本,通过限定负样本和正样本的比例可以使得模型更好收敛。
    84. # minimum_negative_samples表示负样本的最少数量。negative_mining_thresh表示threshold used for negative mining。
    85. # 最后得到的tmp也是一个symbol,包含3个部分,分别是loc_target,也就是回归的目标,简而言之是中心坐标和宽高的偏置;
    86. # loc_target_mask,这个mask的存在是因为框回归时候的损失函数回传只会回传和真实object框的IOU大于设定的overlap_threshold的anchors;
    87. # cls_target,表示每个anchor的真实标签。
    88. tmp = mx.contrib.symbol.MultiBoxTarget(
    89. *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
    90. ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
    91. negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
    92. name="multibox_target")
    93. loc_target = tmp[0]
    94. loc_target_mask = tmp[1]
    95. cls_target = tmp[2]
    96. # 针对分类,采用softmaxoutput来输出概率,其主要输入是网络生成的类别预测值:cls_preds和每个anchor的真实值:cls_target。
    97. cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
    98. ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
    99. normalization='valid', name="cls_prob")
    100. # 针对边框回归,采用smooth L1损失函数。mxnet.symbol.smooth_l1是用来计算smooth L1损失,
    101. # 可以参看mxnet官网关于这个symbol的介绍:https://mxnet.incubator.apache.org/api/python/symbol.html#mxnet.symbol.smooth_l1。
    102. # 这里的输入用到了网络生成的框坐标预测值:loc_preds,真实框坐标:loc_target,回归框的mask:loc_target_mask。
    103. # loc_target_mask * (loc_preds - loc_target)表示只将有object的框的预测与实际值的差值作为损失函数的输入。
    104. # 另外这3个输入的维度都是一样的。
    105. loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
    106. data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    107. # 这个mxnet.symbol.makeloss是用来生成你自己的loss函数的,其输入得是一个自定义的symbol,这里是用上面得到的smooth_l1损失作为输入。
    108. # 官方的一个例子:cross_entropy = label * log(out) + (1 - label) * log(1 - out),loss = MakeLoss(cross_entropy)。
    109. # 这个normalization参数的含义如下:If this is set to null, the output gradient will not be normalized.
    110. # If this is set to batch, the output gradient will be divided by the batch size.
    111. # If this is set to valid, the output gradient will be divided by the number of valid input elements。
    112. loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
    113. normalization='valid', name="loc_loss")
    114. # monitoring training status
    115. # 接下来这两个层并不参与梯度回传(因为grad_scale=0),只是用来提供后续计算某些值时的输入。
    116. # 因此即便后面用mx.symbol.Group()接口将这两层的输出也绑定在一起,但是对loss是没有贡献的。
    117. # nms操作是在mx.contrib.symbol.MultiBoxDetection()接口中实现的。
    118. # 现在在MXNet文档是调整为mx.symbol.contrib.MultiBoxDetection()接口。
    119. # 最后得到的det变量包含每个anchor的预测类别、置信度和4个坐标相关的值。
    120. cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    121. det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
    122. name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
    123. variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    124. det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
    125. # group output
    126. # 最后输出分类的类别概率(每个anchor都有):cls_prob;回归的损失:loc_loss;每个anchor的真实标签:cls_label。
    127. out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    128. return out
    129. def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
    130. strides, pads, normalizations=-1, steps=[], min_filter=128,
    131. nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    132. """Build network for testing SSD
    133. Parameters
    134. ----------
    135. network : str
    136. base network symbol name
    137. num_classes : int
    138. number of object classes not including background
    139. from_layers : list of str
    140. feature extraction layers, use '' for add extra layers
    141. For example:
    142. from_layers = ['relu4_3', 'fc7', '', '', '', '']
    143. which means extract feature from relu4_3 and fc7, adding 4 extra layers
    144. on top of fc7
    145. num_filters : list of int
    146. number of filters for extra layers, you can use -1 for extracted features,
    147. however, if normalization and scale is applied, the number of filter for
    148. that layer must be provided.
    149. For example:
    150. num_filters = [512, -1, 512, 256, 256, 256]
    151. strides : list of int
    152. strides for the 3x3 convolution appended, -1 can be used for extracted
    153. feature layers
    154. pads : list of int
    155. paddings for the 3x3 convolution, -1 can be used for extracted layers
    156. sizes : list or list of list
    157. [min_size, max_size] for all layers or [[], [], []...] for specific layers
    158. ratios : list or list of list
    159. [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    160. normalizations : int or list of int
    161. use normalizations value for all layers or [...] for specific layers,
    162. -1 indicate no normalizations and scales
    163. steps : list
    164. specify steps for each MultiBoxPrior layer, leave empty, it will calculate
    165. according to layer dimensions
    166. min_filter : int
    167. minimum number of filters used in 1x1 convolution
    168. nms_thresh : float
    169. non-maximum suppression threshold
    170. force_suppress : boolean
    171. whether suppress different class objects
    172. nms_topk : int
    173. apply NMS to top K detections
    174. Returns
    175. -------
    176. mx.Symbol
    177. """
    178. body = import_module(network).get_symbol(num_classes, **kwargs)
    179. layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
    180. min_filter=min_filter)
    181. loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
    182. num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
    183. num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
    184. cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
    185. name='cls_prob')
    186. out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
    187. name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
    188. variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    189. return out