Config

  1. model = dict(
  2. type='FastRCNN',
  3. backbone=dict(
  4. type='ResNet',
  5. depth=50,
  6. num_stages=4,
  7. out_indices=(0, 1, 2, 3),
  8. frozen_stages=1,
  9. norm_cfg=dict(type='BN', requires_grad=True),
  10. norm_eval=True,
  11. style='pytorch',
  12. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  13. neck=dict(
  14. type='FPN',
  15. in_channels=[256, 512, 1024, 2048],
  16. out_channels=256,
  17. num_outs=5),
  18. roi_head=dict(
  19. type='StandardRoIHead',
  20. bbox_roi_extractor=dict(
  21. type='SingleRoIExtractor',
  22. roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
  23. out_channels=256,
  24. featmap_strides=[4, 8, 16, 32]),
  25. bbox_head=dict(
  26. type='Shared2FCBBoxHead',
  27. in_channels=256,
  28. fc_out_channels=1024,
  29. roi_feat_size=7,
  30. num_classes=1,
  31. bbox_coder=dict(
  32. type='DeltaXYWHBBoxCoder',
  33. target_means=[0.0, 0.0, 0.0, 0.0],
  34. target_stds=[0.1, 0.1, 0.2, 0.2]),
  35. reg_class_agnostic=False,
  36. loss_cls=dict(
  37. type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
  38. loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
  39. train_cfg=dict(
  40. rcnn=dict(
  41. assigner=dict(
  42. type='MaxIoUAssigner',
  43. pos_iou_thr=0.5,
  44. neg_iou_thr=0.5,
  45. min_pos_iou=0.5,
  46. match_low_quality=False,
  47. ignore_iof_thr=-1),
  48. sampler=dict(
  49. type='RandomSampler',
  50. num=512,
  51. pos_fraction=0.25,
  52. neg_pos_ub=-1,
  53. add_gt_as_proposals=True),
  54. pos_weight=-1,
  55. debug=False)),
  56. test_cfg=dict(
  57. rcnn=dict(
  58. score_thr=0.05,
  59. nms=dict(type='nms', iou_threshold=0.5),
  60. max_per_img=100)))
  61. dataset_type = 'CocoDataset'
  62. data_root = 'data/coco/'
  63. img_norm_cfg = dict(
  64. mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
  65. train_pipeline = [
  66. dict(type='LoadImageFromFile'),
  67. dict(type='LoadProposals', num_max_proposals=2000),
  68. dict(type='LoadAnnotations', with_bbox=True),
  69. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  70. dict(type='RandomFlip', flip_ratio=0.5),
  71. dict(
  72. type='Normalize',
  73. mean=[123.675, 116.28, 103.53],
  74. std=[58.395, 57.12, 57.375],
  75. to_rgb=True),
  76. dict(type='Pad', size_divisor=32),
  77. dict(type='DefaultFormatBundle'),
  78. dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'])
  79. ]
  80. test_pipeline = [
  81. dict(type='LoadImageFromFile'),
  82. dict(type='LoadProposals', num_max_proposals=None),
  83. dict(
  84. type='MultiScaleFlipAug',
  85. img_scale=(1333, 800),
  86. flip=False,
  87. transforms=[
  88. dict(type='Resize', keep_ratio=True),
  89. dict(type='RandomFlip'),
  90. dict(
  91. type='Normalize',
  92. mean=[123.675, 116.28, 103.53],
  93. std=[58.395, 57.12, 57.375],
  94. to_rgb=True),
  95. dict(type='Pad', size_divisor=32),
  96. dict(type='ImageToTensor', keys=['img']),
  97. dict(type='ToTensor', keys=['proposals']),
  98. dict(
  99. type='ToDataContainer',
  100. fields=[dict(key='proposals', stack=False)]),
  101. dict(type='Collect', keys=['img', 'proposals'])
  102. ])
  103. ]
  104. data = dict(
  105. samples_per_gpu=2,
  106. workers_per_gpu=2,
  107. train=dict(
  108. type='TableDataset',
  109. ann_file='data/icdar2019/test.json',
  110. img_prefix='data/icdar2019/test/TRACKA/',
  111. pipeline=[
  112. dict(type='LoadImageFromFile'),
  113. dict(type='LoadProposals', num_max_proposals=2000),
  114. dict(type='LoadAnnotations', with_bbox=True),
  115. dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
  116. dict(type='RandomFlip', flip_ratio=0.5),
  117. dict(
  118. type='Normalize',
  119. mean=[123.675, 116.28, 103.53],
  120. std=[58.395, 57.12, 57.375],
  121. to_rgb=True),
  122. dict(type='Pad', size_divisor=32),
  123. dict(type='DefaultFormatBundle'),
  124. dict(
  125. type='Collect',
  126. keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'])
  127. ],
  128. proposal_file='work_dirs/rpn_r50_fpn_1x_icdar2019/epoch_12.pkl'),
  129. val=dict(
  130. type='TableDataset',
  131. ann_file='data/icdar2019/test.json',
  132. img_prefix='data/icdar2019/test/TRACKA/',
  133. pipeline=[
  134. dict(type='LoadImageFromFile'),
  135. dict(type='LoadProposals', num_max_proposals=None),
  136. dict(
  137. type='MultiScaleFlipAug',
  138. img_scale=(1333, 800),
  139. flip=False,
  140. transforms=[
  141. dict(type='Resize', keep_ratio=True),
  142. dict(type='RandomFlip'),
  143. dict(
  144. type='Normalize',
  145. mean=[123.675, 116.28, 103.53],
  146. std=[58.395, 57.12, 57.375],
  147. to_rgb=True),
  148. dict(type='Pad', size_divisor=32),
  149. dict(type='ImageToTensor', keys=['img']),
  150. dict(type='ToTensor', keys=['proposals']),
  151. dict(
  152. type='ToDataContainer',
  153. fields=[dict(key='proposals', stack=False)]),
  154. dict(type='Collect', keys=['img', 'proposals'])
  155. ])
  156. ],
  157. proposal_file='work_dirs/rpn_r50_fpn_1x_icdar2019/epoch_12.pkl'),
  158. test=dict(
  159. type='TableDataset',
  160. ann_file='data/icdar2019/test.json',
  161. img_prefix='data/icdar2019/test/TRACKA/',
  162. pipeline=[
  163. dict(type='LoadImageFromFile'),
  164. dict(type='LoadProposals', num_max_proposals=None),
  165. dict(
  166. type='MultiScaleFlipAug',
  167. img_scale=(1333, 800),
  168. flip=False,
  169. transforms=[
  170. dict(type='Resize', keep_ratio=True),
  171. dict(type='RandomFlip'),
  172. dict(
  173. type='Normalize',
  174. mean=[123.675, 116.28, 103.53],
  175. std=[58.395, 57.12, 57.375],
  176. to_rgb=True),
  177. dict(type='Pad', size_divisor=32),
  178. dict(type='ImageToTensor', keys=['img']),
  179. dict(type='ToTensor', keys=['proposals']),
  180. dict(
  181. type='ToDataContainer',
  182. fields=[dict(key='proposals', stack=False)]),
  183. dict(type='Collect', keys=['img', 'proposals'])
  184. ])
  185. ],
  186. proposal_file='data/coco/proposals/rpn_r50_fpn_1x_val2017.pkl'))
  187. evaluation = dict(interval=1, metric='bbox')
  188. optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
  189. optimizer_config = dict(grad_clip=None)
  190. lr_config = dict(
  191. policy='step',
  192. warmup='linear',
  193. warmup_iters=500,
  194. warmup_ratio=0.001,
  195. step=[8, 11])
  196. runner = dict(type='EpochBasedRunner', max_epochs=12)
  197. checkpoint_config = dict(interval=1)
  198. log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
  199. custom_hooks = [dict(type='NumClassCheckHook'), dict(type='UploadHook')]
  200. dist_params = dict(backend='nccl')
  201. log_level = 'INFO'
  202. load_from = None
  203. resume_from = None
  204. workflow = [('train', 1)]
  205. work_dir = './work_dirs/fast_rcnn_r50_fpn_1x_table_icdar2019'
  206. gpu_ids = range(0, 1)

前置基础知识

Config (定义了Config文件的格式以及引入的格式)
Registry (注册器实现编写Config文件实例化类)
Runner(实例化是实例化了,使用)

计算参数量

  1. python tools/analysis_tools/get_flops.py configs/tabnet/fast_rcnn_r50_fpn_1x_table_icdar2019.py

image.png