详解Transformer (Attention Is All You Need)

https://github.com/dk-liang/Awesome-Visual-Transformer

Deformable DETR: Deformable Transformers for End-to-End Object Detection

image.png

1、Compiling CUDA operators

  1. cd ./models/ops
  2. sh ./make.sh
  3. # unit test (should see all checking is True)
  4. python test.py

2、Prepare datasets and annotations

  1. python tools/convert_COCOText_to_coco.py
  2. python tools/convert_ICDAR15_to_coco.py
  3. python tools/convert_ICDAR13_to_coco.py
  4. python tools/convert_SynthText_to_coco.py
  5. python tools/convert_VISD_to_coco.py

3、Pretrain

  1. # 在COCOTEXT_v2上pretrain
  2. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/Pretrain_COCOTextV2 --dataset_file coco --coco_path /share/wuweijia/Data/COCOTextV2 --batch_size 2 --with_box_refine --num_queries 500 --epochs 300 --lr_drop 150
  3. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/Pretrain_COCOText --dataset_file cocotext --coco_path /share/wuweijia/Data/COCOTextV2 --batch_size 2 --with_box_refine --num_queries 300 --epochs 300 --lr_drop 150
  4. # COCOTex上做pretrain
  5. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/Pretrain_COCOText --dataset_file cocotext --coco_path /share/wuweijia/Data/COCOTextV2 --batch_size 2 --with_box_refine --num_queries 100 --epochs 300 --lr_drop 150
  6. # VISD上做pretrain
  7. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/VISD --dataset_file VISD --coco_path /share/wuweijia/Data/VISD --batch_size 2 --with_box_refine --num_queries 300 --epochs 300 --lr_drop 150
  8. --resume ./output/Pretrain_COCOTextV2/checkpoint.pth
  9. # 在SynthText上pretrain
  10. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/Pretrain_SynthText --dataset_file SynthText --coco_path /share/wuweijia/Data/SynthText --batch_size 2 --with_box_refine --num_queries 300 --epochs 300 --lr_drop 150
  11. # 在UnrealText上pretrain
  12. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/Pretrain_UnrealText --dataset_file UnrealText --coco_path /share/wuweijia/Data/UnrealText --batch_size 2 --with_box_refine --num_queries 300 --epochs 300 --lr_drop 150
  13. python3 tools/Pretrain_model_to_finetune.py

4、Train

  1. GPUS_PER_NODE=8 ./tools/run_dist_launch.sh 8 ./configs/r50_deformable_detr.sh
  2. python3 -m torch.distributed.launch --nproc_per_node=4 --use_env main.py --output_dir ./exps/r50_TotalText --dataset_file coco --coco_path /home/wuweijia/.jupyter/Data/Total-text --batch_size 1 --with_box_refine --num_queries 500 --epochs 50 --lr_drop 30
  3. ICDAR15
  4. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/ICDAR15 --dataset_file coco --coco_path /share/wuweijia/Data/ICDAR2015 --batch_size 2 --with_box_refine --num_queries 100 --epochs 500 --lr_drop 200 --resume ./output/Pretrain_COCOText/pretrain_coco.pth
  5. MOVText
  6. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/MOVText --dataset_file MOVText --coco_path /share/wuweijia/MyBenchMark/relabel/Dapan_lizhuang/final_FrameAnn/MOVText --batch_size 2 --with_box_refine --num_queries 100 --epochs 40 --lr_drop 20 --resume ./output/Pretrain_COCOText/pretrain_coco.pth
  7. LSVTD
  8. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/LSVTD --dataset_file LSVTD --coco_path /share/wuweijia/Data/VideoText/ICDAR21SVTS/SVTS --batch_size 2 --with_box_refine --num_queries 100 --epochs 40 --lr_drop 20 --resume ./output/Pretrain_COCOText/pretrain_coco.pth
  9. RoadText1k
  10. python3 -m torch.distributed.launch --nproc_per_node=8 --use_env main.py --output_dir ./output/RoadText1k --dataset_file RoadText1k --coco_path /share/wuweijia/Data/VideoText/RoadText1k --batch_size 2 --with_box_refine --num_queries 100 --epochs 40 --lr_drop 20 --resume ./output/Pretrain_COCOText/pretrain_coco.pth

5、Inference

  1. # ICDAR2015
  2. python3 -m torch.distributed.launch --nproc_per_node=1 --use_env main.py --output_dir ./output/ICDAR15 --dataset_file coco --coco_path /share/wuweijia/Data/ICDAR2015 --batch_size 1 --with_box_refine --num_queries 300 --resume ./output/ICDAR15/checkpoint.pth --eval --eval_dataset_file icdar15 --threshold 0.5 --show
  3. # evluation
  4. cd tools/Evaluation_ICDAR15/
  5. python scipy.py
  6. #ICDAR13
  7. python3 -m torch.distributed.launch --nproc_per_node=1 --use_env main.py --output_dir ./output/ICDAR13 --dataset_file coco --coco_path /share/wuweijia/Data/ICDAR2013 --batch_size 1 --with_box_refine --num_queries 500 --resume ./output/Pretrain_COCOTextV2/ICDAR150.786.pth --eval --eval_dataset_file icdar13 --threshold 0.5
  8. #MOVText
  9. python3 -m torch.distributed.launch --nproc_per_node=1 --use_env main.py --output_dir ./output/MOVText --dataset_file MOVText --coco_path /share/wuweijia/MyBenchMark/relabel/Dapan_lizhuang/final_FrameAnn/MOVText --batch_size 1 --with_box_refine --num_queries 100 --resume ./output/MOVText/checkpoint.pth --eval --eval_dataset_file MOVText --threshold 0.5