典型应用示例 - mmdetection（或SCRFD）快捷模式剪枝 - 《EasyPruner剪枝工具》

快捷模式在mmdetection中部署：

快捷模式在mmdetection中部署：

注意事项

mmdetection中对模型的forward 函数进行了重载，与一般的forward只需要输入Tensor不同，还需要img_meta，这与EasyPruner所需要的jit.trace里面的forward函数不兼容。本工具包参考了mmdetection自带的转onnx代码的一些写法，进行解决。

示例

以FOCS目标检测方法或者SCRFD人脸检测方法为例：

步骤1 常规训练

步骤2 准备微调代码，在微调前，加载预训练模型，并加入剪枝代码

之后，在建模型后，加载常规训练得到的模型权重，并进行剪枝，SCRFD中不支持默认加载预训练模型，需要手工加载一下，例如在mmdet/apis/train.py 的train_detector中修改如下：

            data_loaders = [
          build_dataloader(
              ds,
              cfg.data.samples_per_gpu,
              cfg.data.workers_per_gpu,
              # cfg.gpus will be ignored if distributed
              len(cfg.gpu_ids),
              dist=distributed,
              seed=cfg.seed) for ds in dataset
      ]
  '''
  以上为原始mmdetection代码
  '''
  # 对网络进行修改，使其改为单变量输入
    from easypruner import  fastpruner
    from easypruner.utils.rebuild import rebuild
    from functools import partial
    import copy
    from mmdet.core.export import pytorch2onnx
    imgpath = "data/retinaface/val/images/0--Parade/0_Parade_Parade_0_194.jpg"  # 需指定任意一个图片
    normalize_cfg = {'mean': [127,127,127], 'std': [55,55,55]} #格式需要，随便指定即可
    input_shape = [1,3,640,640] # 格式需要，按实际情况指定即可
    input_config = { 
        'input_shape': input_shape,
        'input_path': imgpath,
        'normalize_cfg': normalize_cfg
    }   
    one_img, one_meta = pytorch2onnx.preprocess_example_input(input_config)
    model.forward = partial(
        model.forward, img_metas=[[one_meta]], return_loss=False)
    #加载模型并剪枝
    state_dict = torch.load("34Gmodel.pth")
    device = next(model.parameters()).device
    model.load_state_dict(state_dict['state_dict'],strict=True)
    model.cpu()
    fastpruner.fastpruner(model, prune_factor = 0.4, method="Ratio", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试，注意大小写
    #fastpruner.fastpruner(model, prune_factor = 0.5, method="Uniform", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试，注意大小写
    #fastpruner.fastpruner(model, prune_factor = 0.01, method="Order", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试，注意大小写
    model.to(device)
    #保存剪枝后的模型权重
    state_dict = state_dict['state_dict']
    for k,v in model.state_dict().items():
        state_dict[k] = v
    save_path = './model_pruned_34_0.4uniform.pt' #
    torch.save(model.state_dict(),save_path) 
    #torch.export()
    #复原模型
    model.forward = model.forward.func 
    exit(0) #剪枝完可以直接训练finetune/或者exit退出，后续通过rebuild重构网络再finetune
 '''
 以下为原始mmdetection代码
 '''
     # put model on gpus
      if distributed:
         find_unused_parameters = cfg.get('find_unused_parameters', False)
         # Sets the `find_unused_parameters` parameter in
         # torch.nn.parallel.DistributedDataParallel
         model = MMDistributedDataParallel(
             model.cuda(),
             device_ids=[torch.cuda.current_device()],
             broadcast_buffers=False,
             find_unused_parameters=find_unused_parameters)
      else:
         model = MMDataParallel(
             model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)

使用uniform或者测试模型flops压缩率时，需要将模型定义与模型前向中(甚至模型转onnx）函数中的neck和head部分去掉。因为bbox每次产生的个数不一样所以flops也不一样，所以我们仅考虑backbone的flops大小。比如faster_rcnn，我们需要修改 mmdet/models/detectors/two_stage.py。以2.18.0版本mmdet为例：

@DETECTORS.register_module()
class TwoStageDetector(BaseDetector):
    """Base class for two-stage detectors.
    Two-stage detectors typically consisting of a region proposal network and a
    task-specific regression head.
    """
    def __init__(self,
                 backbone,
                 neck=None,
                 rpn_head=None,
                 roi_head=None,
                 train_cfg=None,
                 test_cfg=None,
                 pretrained=None,
                 init_cfg=None):
        super(TwoStageDetector, self).__init__(init_cfg)
        if pretrained:
            warnings.warn('DeprecationWarning: pretrained is deprecated, '
                          'please use "init_cfg" instead')
            backbone.pretrained = pretrained
        self.backbone = build_backbone(backbone)
        ####注释掉这里
        '''
        if neck is not None:
            self.neck = build_neck(neck)
        if rpn_head is not None:
            rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None
            rpn_head_ = rpn_head.copy()
            rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)
            self.rpn_head = build_head(rpn_head_)
        if roi_head is not None:
            # update train and test cfg here for now
            # TODO: refactor assigner & sampler
            rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None
            roi_head.update(train_cfg=rcnn_train_cfg)
            roi_head.update(test_cfg=test_cfg.rcnn)
            roi_head.pretrained = pretrained
            self.roi_head = build_head(roi_head)
        '''
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
    @property
    def with_rpn(self):
        """bool: whether the detector has RPN"""
        return hasattr(self, 'rpn_head') and self.rpn_head is not None
...
    def forward_dummy(self, img):
        """Used for computing network flops.
        See `mmdetection/tools/analysis_tools/get_flops.py`
        """
        outs = ()
        # backbone
        x = self.extract_feat(img)
        return x #增加这行
        #注释这里
        '''
        # rpn
        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            outs = outs + (rpn_outs, )
        proposals = torch.randn(1000, 4).to(img.device)
        # roi_head
        roi_outs = self.roi_head.forward_dummy(x, proposals)
        outs = outs + (roi_outs, )
        return outs
        '''
    def forward_train(self,
                      img,
                      img_metas,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None,
                      **kwargs):
        """
        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.
            img_metas (list[dict]): list of image info dict where each dict
                has: 'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
                For details on the values of these keys see
                `mmdet/datasets/pipelines/formatting.py:Collect`.
            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
            gt_labels (list[Tensor]): class indices corresponding to each box
            gt_bboxes_ignore (None | list[Tensor]): specify which bounding
                boxes can be ignored when computing the loss.
            gt_masks (None | Tensor) : true segmentation masks for each box
                used if the architecture supports a segmentation task.
            proposals : override rpn proposals with custom proposals. Use when
                `with_rpn` is False.
        Returns:
            dict[str, Tensor]: a dictionary of loss components
        """
        x = self.extract_feat(img)
        return x #增加这行
        #注释这里
        '''
        losses = dict()
        # RPN forward and loss
        if self.with_rpn:
            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            rpn_losses, proposal_list = self.rpn_head.forward_train(
                x,
                img_metas,
                gt_bboxes,
                gt_labels=None,
                gt_bboxes_ignore=gt_bboxes_ignore,
                proposal_cfg=proposal_cfg,
                **kwargs)
            losses.update(rpn_losses)
        else:
            proposal_list = proposals
        roi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,
                                                 gt_bboxes, gt_labels,
                                                 gt_bboxes_ignore, gt_masks,
                                                 **kwargs)
        losses.update(roi_losses)
        return losses
        '''
...
   def simple_test(self, img, img_metas, proposals=None, rescale=False):
        """Test without augmentation."""
        #注释这里
        #assert self.with_bbox, 'Bbox head must be implemented.'
        x = self.extract_feat(img)
        return x #增加这行
        #注释这里
        '''
        if proposals is None:
            proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
        else:
            proposal_list = proposals
        return self.roi_head.simple_test(
            x, proposal_list, img_metas, rescale=rescale)
        '''
...
    def onnx_export(self, img, img_metas):
        img_shape = torch._shape_as_tensor(img)[2:]
        img_metas[0]['img_shape_for_onnx'] = img_shape
        x = self.extract_feat(img)
        return x #增加这行
        #注释这里
        '''
        proposals = self.rpn_head.onnx_export(x, img_metas)
        if hasattr(self.roi_head, 'onnx_export'):
            return self.roi_head.onnx_export(x, proposals, img_metas)
        else:
            raise NotImplementedError(
                f'{self.__class__.__name__} can not '
                f'be exported to ONNX. Please refer to the '
                f'list of supported models,'
                f'https://mmdetection.readthedocs.io/en/latest/tutorials/pytorch2onnx.html#list-of-supported-models-exportable-to-onnx'  # noqa E501
            )
        '''
...

步骤 3 基于常规训练的模型微调

步骤 4 单独准确率测试、部署转onnx时，需要将权重加载代码改为用rebuild函数加载，如以下代码：

依然在mmdet/apis/train.py 的train_detector中进行修改

            data_loaders = [
          build_dataloader(
              ds,
              cfg.data.samples_per_gpu,
              cfg.data.workers_per_gpu,
              # cfg.gpus will be ignored if distributed
              len(cfg.gpu_ids),
              dist=distributed,
              seed=cfg.seed) for ds in dataset
      ]
  '''
  以上为原始mmdetection代码
  '''        
    from easypruner.utils.rebuild import rebuild 
    state_dict = torch.load("model_pruned_34_0.4uniform_finetuned.pt")#剪枝后的模型权重文件
    #import pdb;pdb.set_trace() 
    model = rebuild(model , state_dict)
  '''
  以下为原始mmdetection代码
  '''
     # put model on gpus
      if distributed:
         find_unused_parameters = cfg.get('find_unused_parameters', False)
         # Sets the `find_unused_parameters` parameter in
         # torch.nn.parallel.DistributedDataParallel
         model = MMDistributedDataParallel(
             model.cuda(),
             device_ids=[torch.cuda.current_device()],
             broadcast_buffers=False,
             find_unused_parameters=find_unused_parameters)
      else:
         model = MMDataParallel(
             model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)