快捷模式在mmdetection中部署:
注意事项
mmdetection中对模型的forward 函数进行了重载,与一般的forward只需要输入Tensor不同,还需要img_meta,这与EasyPruner所需要的jit.trace里面的forward函数不兼容。本工具包参考了mmdetection自带的转onnx代码的一些写法,进行解决。
示例
以FOCS目标检测方法或者SCRFD人脸检测方法为例:
步骤1 常规训练
步骤2 准备微调代码,在微调前,加载预训练模型,并加入剪枝代码
之后,在建模型后,加载常规训练得到的模型权重,并进行剪枝,SCRFD中不支持默认加载预训练模型,需要手工加载一下,例如在mmdet/apis/train.py 的train_detector中修改如下:
data_loaders = [build_dataloader(ds,cfg.data.samples_per_gpu,cfg.data.workers_per_gpu,# cfg.gpus will be ignored if distributedlen(cfg.gpu_ids),dist=distributed,seed=cfg.seed) for ds in dataset]'''以上为原始mmdetection代码'''# 对网络进行修改,使其改为单变量输入from easypruner import fastprunerfrom easypruner.utils.rebuild import rebuildfrom functools import partialimport copyfrom mmdet.core.export import pytorch2onnximgpath = "data/retinaface/val/images/0--Parade/0_Parade_Parade_0_194.jpg" # 需指定任意一个图片normalize_cfg = {'mean': [127,127,127], 'std': [55,55,55]} #格式需要,随便指定即可input_shape = [1,3,640,640] # 格式需要,按实际情况指定即可input_config = {'input_shape': input_shape,'input_path': imgpath,'normalize_cfg': normalize_cfg}one_img, one_meta = pytorch2onnx.preprocess_example_input(input_config)model.forward = partial(model.forward, img_metas=[[one_meta]], return_loss=False)#加载模型并剪枝state_dict = torch.load("34Gmodel.pth")device = next(model.parameters()).devicemodel.load_state_dict(state_dict['state_dict'],strict=True)model.cpu()fastpruner.fastpruner(model, prune_factor = 0.4, method="Ratio", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试,注意大小写#fastpruner.fastpruner(model, prune_factor = 0.5, method="Uniform", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试,注意大小写#fastpruner.fastpruner(model, prune_factor = 0.01, method="Order", input_dim=[3,640,640])##Ratio 和uniform两种方式都可以试试,注意大小写model.to(device)#保存剪枝后的模型权重state_dict = state_dict['state_dict']for k,v in model.state_dict().items():state_dict[k] = vsave_path = './model_pruned_34_0.4uniform.pt' #torch.save(model.state_dict(),save_path)#torch.export()#复原模型model.forward = model.forward.funcexit(0) #剪枝完可以直接训练finetune/或者exit退出,后续通过rebuild重构网络再finetune'''以下为原始mmdetection代码'''# put model on gpusif distributed:find_unused_parameters = cfg.get('find_unused_parameters', False)# Sets the `find_unused_parameters` parameter in# torch.nn.parallel.DistributedDataParallelmodel = MMDistributedDataParallel(model.cuda(),device_ids=[torch.cuda.current_device()],broadcast_buffers=False,find_unused_parameters=find_unused_parameters)else:model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
使用uniform或者测试模型flops压缩率时,需要将模型定义与模型前向中(甚至模型转onnx)函数中的neck和head部分去掉。因为bbox每次产生的个数不一样所以flops也不一样,所以我们仅考虑backbone的flops大小。比如faster_rcnn,我们需要修改 mmdet/models/detectors/two_stage.py。以2.18.0版本mmdet为例:
@DETECTORS.register_module()class TwoStageDetector(BaseDetector):"""Base class for two-stage detectors.Two-stage detectors typically consisting of a region proposal network and atask-specific regression head."""def __init__(self,backbone,neck=None,rpn_head=None,roi_head=None,train_cfg=None,test_cfg=None,pretrained=None,init_cfg=None):super(TwoStageDetector, self).__init__(init_cfg)if pretrained:warnings.warn('DeprecationWarning: pretrained is deprecated, ''please use "init_cfg" instead')backbone.pretrained = pretrainedself.backbone = build_backbone(backbone)####注释掉这里'''if neck is not None:self.neck = build_neck(neck)if rpn_head is not None:rpn_train_cfg = train_cfg.rpn if train_cfg is not None else Nonerpn_head_ = rpn_head.copy()rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)self.rpn_head = build_head(rpn_head_)if roi_head is not None:# update train and test cfg here for now# TODO: refactor assigner & samplerrcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else Noneroi_head.update(train_cfg=rcnn_train_cfg)roi_head.update(test_cfg=test_cfg.rcnn)roi_head.pretrained = pretrainedself.roi_head = build_head(roi_head)'''self.train_cfg = train_cfgself.test_cfg = test_cfg@propertydef with_rpn(self):"""bool: whether the detector has RPN"""return hasattr(self, 'rpn_head') and self.rpn_head is not None...def forward_dummy(self, img):"""Used for computing network flops.See `mmdetection/tools/analysis_tools/get_flops.py`"""outs = ()# backbonex = self.extract_feat(img)return x #增加这行#注释这里'''# rpnif self.with_rpn:rpn_outs = self.rpn_head(x)outs = outs + (rpn_outs, )proposals = torch.randn(1000, 4).to(img.device)# roi_headroi_outs = self.roi_head.forward_dummy(x, proposals)outs = outs + (roi_outs, )return outs'''def forward_train(self,img,img_metas,gt_bboxes,gt_labels,gt_bboxes_ignore=None,gt_masks=None,proposals=None,**kwargs):"""Args:img (Tensor): of shape (N, C, H, W) encoding input images.Typically these should be mean centered and std scaled.img_metas (list[dict]): list of image info dict where each dicthas: 'img_shape', 'scale_factor', 'flip', and may also contain'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.For details on the values of these keys see`mmdet/datasets/pipelines/formatting.py:Collect`.gt_bboxes (list[Tensor]): Ground truth bboxes for each image withshape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.gt_labels (list[Tensor]): class indices corresponding to each boxgt_bboxes_ignore (None | list[Tensor]): specify which boundingboxes can be ignored when computing the loss.gt_masks (None | Tensor) : true segmentation masks for each boxused if the architecture supports a segmentation task.proposals : override rpn proposals with custom proposals. Use when`with_rpn` is False.Returns:dict[str, Tensor]: a dictionary of loss components"""x = self.extract_feat(img)return x #增加这行#注释这里'''losses = dict()# RPN forward and lossif self.with_rpn:proposal_cfg = self.train_cfg.get('rpn_proposal',self.test_cfg.rpn)rpn_losses, proposal_list = self.rpn_head.forward_train(x,img_metas,gt_bboxes,gt_labels=None,gt_bboxes_ignore=gt_bboxes_ignore,proposal_cfg=proposal_cfg,**kwargs)losses.update(rpn_losses)else:proposal_list = proposalsroi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,gt_bboxes, gt_labels,gt_bboxes_ignore, gt_masks,**kwargs)losses.update(roi_losses)return losses'''...def simple_test(self, img, img_metas, proposals=None, rescale=False):"""Test without augmentation."""#注释这里#assert self.with_bbox, 'Bbox head must be implemented.'x = self.extract_feat(img)return x #增加这行#注释这里'''if proposals is None:proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)else:proposal_list = proposalsreturn self.roi_head.simple_test(x, proposal_list, img_metas, rescale=rescale)'''...def onnx_export(self, img, img_metas):img_shape = torch._shape_as_tensor(img)[2:]img_metas[0]['img_shape_for_onnx'] = img_shapex = self.extract_feat(img)return x #增加这行#注释这里'''proposals = self.rpn_head.onnx_export(x, img_metas)if hasattr(self.roi_head, 'onnx_export'):return self.roi_head.onnx_export(x, proposals, img_metas)else:raise NotImplementedError(f'{self.__class__.__name__} can not 'f'be exported to ONNX. Please refer to the 'f'list of supported models,'f'https://mmdetection.readthedocs.io/en/latest/tutorials/pytorch2onnx.html#list-of-supported-models-exportable-to-onnx' # noqa E501)'''...
步骤 3 基于常规训练的模型微调
步骤 4 单独准确率测试、部署转onnx时,需要将权重加载代码改为用rebuild函数加载,如以下代码:
依然在mmdet/apis/train.py 的train_detector中进行修改
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# cfg.gpus will be ignored if distributed
len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed) for ds in dataset
]
'''
以上为原始mmdetection代码
'''
from easypruner.utils.rebuild import rebuild
state_dict = torch.load("model_pruned_34_0.4uniform_finetuned.pt")#剪枝后的模型权重文件
#import pdb;pdb.set_trace()
model = rebuild(model , state_dict)
'''
以下为原始mmdetection代码
'''
# put model on gpus
if distributed:
find_unused_parameters = cfg.get('find_unused_parameters', False)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model = MMDistributedDataParallel(
model.cuda(),
device_ids=[torch.cuda.current_device()],
broadcast_buffers=False,
find_unused_parameters=find_unused_parameters)
else:
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
