选择框架:Pytorch
yolo版本:yolov5-6.0
环境:win11+pycharm2019+anaconda
github下载
文件架构
image.png

1、xml文件转换

由于数据集标出来的格式为VOC2007
用脚本转换:la.py
image.png
需要放成如图示的架构,修改:

  1. #根据你要修改的来进行,一个是类别,一个是训练集和数据集的比例(ratio)
  2. classes = ['crack']
  3. TRAIN_RATIO = 80
  1. import xml.etree.ElementTree as ET
  2. import pickle
  3. import os
  4. from os import listdir, getcwd
  5. from os.path import join
  6. import random
  7. from shutil import copyfile
  8. #修改你要训练的类
  9. classes = ['crack']
  10. # classes=["ball"]
  11. TRAIN_RATIO = 80
  12. def clear_hidden_files(path):
  13. dir_list = os.listdir(path)
  14. for i in dir_list:
  15. abspath = os.path.join(os.path.abspath(path), i)
  16. if os.path.isfile(abspath):
  17. if i.startswith("._"):
  18. os.remove(abspath)
  19. else:
  20. clear_hidden_files(abspath)
  21. def convert(size, box):
  22. dw = 1. / size[0]
  23. dh = 1. / size[1]
  24. x = (box[0] + box[1]) / 2.0
  25. y = (box[2] + box[3]) / 2.0
  26. w = box[1] - box[0]
  27. h = box[3] - box[2]
  28. x = x * dw
  29. w = w * dw
  30. y = y * dh
  31. h = h * dh
  32. return (x, y, w, h)
  33. def convert_annotation(image_id):
  34. in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' % image_id)
  35. out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' % image_id, 'w')
  36. tree = ET.parse(in_file)
  37. root = tree.getroot()
  38. size = root.find('size')
  39. w = int(size.find('width').text)
  40. h = int(size.find('height').text)
  41. for obj in root.iter('object'):
  42. difficult = obj.find('difficult').text
  43. cls = obj.find('name').text
  44. if cls not in classes or int(difficult) == 1:
  45. continue
  46. cls_id = classes.index(cls)
  47. xmlbox = obj.find('bndbox')
  48. b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
  49. float(xmlbox.find('ymax').text))
  50. bb = convert((w, h), b)
  51. out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
  52. in_file.close()
  53. out_file.close()
  54. wd = os.getcwd()
  55. wd = os.getcwd()
  56. data_base_dir = os.path.join(wd, "VOCdevkit/")
  57. if not os.path.isdir(data_base_dir):
  58. os.mkdir(data_base_dir)
  59. work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
  60. if not os.path.isdir(work_sapce_dir):
  61. os.mkdir(work_sapce_dir)
  62. annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
  63. if not os.path.isdir(annotation_dir):
  64. os.mkdir(annotation_dir)
  65. clear_hidden_files(annotation_dir)
  66. image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
  67. if not os.path.isdir(image_dir):
  68. os.mkdir(image_dir)
  69. clear_hidden_files(image_dir)
  70. yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
  71. if not os.path.isdir(yolo_labels_dir):
  72. os.mkdir(yolo_labels_dir)
  73. clear_hidden_files(yolo_labels_dir)
  74. yolov5_images_dir = os.path.join(data_base_dir, "images/")
  75. if not os.path.isdir(yolov5_images_dir):
  76. os.mkdir(yolov5_images_dir)
  77. clear_hidden_files(yolov5_images_dir)
  78. yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
  79. if not os.path.isdir(yolov5_labels_dir):
  80. os.mkdir(yolov5_labels_dir)
  81. clear_hidden_files(yolov5_labels_dir)
  82. yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
  83. if not os.path.isdir(yolov5_images_train_dir):
  84. os.mkdir(yolov5_images_train_dir)
  85. clear_hidden_files(yolov5_images_train_dir)
  86. yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
  87. if not os.path.isdir(yolov5_images_test_dir):
  88. os.mkdir(yolov5_images_test_dir)
  89. clear_hidden_files(yolov5_images_test_dir)
  90. yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
  91. if not os.path.isdir(yolov5_labels_train_dir):
  92. os.mkdir(yolov5_labels_train_dir)
  93. clear_hidden_files(yolov5_labels_train_dir)
  94. yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
  95. if not os.path.isdir(yolov5_labels_test_dir):
  96. os.mkdir(yolov5_labels_test_dir)
  97. clear_hidden_files(yolov5_labels_test_dir)
  98. train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
  99. test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
  100. train_file.close()
  101. test_file.close()
  102. train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
  103. test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
  104. list_imgs = os.listdir(image_dir) # list image files
  105. prob = random.randint(1, 100)
  106. print("Probability: %d" % prob)
  107. for i in range(0, len(list_imgs)):
  108. path = os.path.join(image_dir, list_imgs[i])
  109. if os.path.isfile(path):
  110. image_path = image_dir + list_imgs[i]
  111. voc_path = list_imgs[i]
  112. (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
  113. (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
  114. annotation_name = nameWithoutExtention + '.xml'
  115. annotation_path = os.path.join(annotation_dir, annotation_name)
  116. label_name = nameWithoutExtention + '.txt'
  117. label_path = os.path.join(yolo_labels_dir, label_name)
  118. prob = random.randint(1, 100)
  119. print("Probability: %d" % prob)
  120. if (prob < TRAIN_RATIO): # train dataset
  121. if os.path.exists(annotation_path):
  122. train_file.write(image_path + '\n')
  123. convert_annotation(nameWithoutExtention) # convert label
  124. copyfile(image_path, yolov5_images_train_dir + voc_path)
  125. copyfile(label_path, yolov5_labels_train_dir + label_name)
  126. else: # test dataset
  127. if os.path.exists(annotation_path):
  128. test_file.write(image_path + '\n')
  129. convert_annotation(nameWithoutExtention) # convert label
  130. copyfile(image_path, yolov5_images_test_dir + voc_path)
  131. copyfile(label_path, yolov5_labels_test_dir + label_name)
  132. train_file.close()
  133. test_file.close()

在下载下来的文件基础上进行修改

2、加入预训练权重文件

image.png

3、修改

3.1 修改文件 VOC.yaml

  1. data/VOC.yaml

内容根据实际需要修改:
路径、类别数、类名。
需要与之前数据集标注和脚本转换一致。
复制一份粘贴下去

  1. train: VOCdevkit/VOC2007/images/train
  2. val: VOCdevkit/VOC2007/images/val
  3. nc: 1
  4. names: ['crack']

3.2 修改文件 yolov5s1.yaml

  1. models/yolov5s1.yaml

修改:

  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. # Parameters
  3. nc: 1 # number of classes
  4. depth_multiple: 0.33 # model depth multiple
  5. width_multiple: 0.50 # layer channel multiple
  6. anchors:
  7. - [10,13, 16,30, 33,23] # P3/8
  8. - [30,61, 62,45, 59,119] # P4/16
  9. - [116,90, 156,198, 373,326] # P5/32
  10. # YOLOv5 v6.0 backbone
  11. backbone:
  12. # [from, number, module, args]
  13. [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
  14. [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
  15. [-1, 3, C3, [128]],
  16. [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
  17. [-1, 6, C3, [256]],
  18. [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
  19. [-1, 9, C3, [512]],
  20. [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
  21. [-1, 3, C3, [1024]],
  22. [-1, 1, SPPF, [1024, 5]], # 9
  23. ]
  24. # YOLOv5 v6.0 head
  25. head:
  26. [[-1, 1, Conv, [512, 1, 1]],
  27. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  28. [[-1, 6], 1, Concat, [1]], # cat backbone P4
  29. [-1, 3, C3, [512, False]], # 13
  30. [-1, 1, Conv, [256, 1, 1]],
  31. [-1, 1, nn.Upsample, [None, 2, 'nearest']],
  32. [[-1, 4], 1, Concat, [1]], # cat backbone P3
  33. [-1, 3, C3, [256, False]], # 17 (P3/8-small)
  34. [-1, 1, Conv, [256, 3, 2]],
  35. [[-1, 14], 1, Concat, [1]], # cat head P4
  36. [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
  37. [-1, 1, Conv, [512, 3, 2]],
  38. [[-1, 10], 1, Concat, [1]], # cat head P5
  39. [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
  40. [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
  41. ]

3.3 修改文件 train.py

首先是几个路径
image.png
紧接着是训练参数,解释如下

  1. if __name__ == '__main__':
  2. """
  3. opt模型主要参数解析:
  4. --weights:初始化的权重文件的路径地址
  5. --cfg:模型yaml文件的路径地址
  6. --data:数据yaml文件的路径地址
  7. --hyp:超参数文件路径地址
  8. --epochs:训练轮次
  9. --batch-size:喂入批次文件的多少
  10. --img-size:输入图片尺寸
  11. --rect:是否采用矩形训练,默认False
  12. --resume:接着打断训练上次的结果接着训练
  13. --nosave:不保存模型,默认False
  14. --notest:不进行test,默认False
  15. --noautoanchor:不自动调整anchor,默认False
  16. --evolve:是否进行超参数进化,默认False
  17. --bucket:谷歌云盘bucket,一般不会用到
  18. --cache-images:是否提前缓存图片到内存,以加快训练速度,默认False
  19. --image-weights:使用加权图像选择进行训练
  20. --device:训练的设备,cpu;0(表示一个gpu设备cuda:0);0,1,2,3(多个gpu设备)
  21. --multi-scale:是否进行多尺度训练,默认False
  22. --single-cls:数据集是否只有一个类别,默认False
  23. --adam:是否使用adam优化器
  24. --sync-bn:是否使用跨卡同步BN,在DDP模式使用
  25. --local_rank:DDP参数,请勿修改
  26. --workers:最大工作核心数
  27. --project:训练模型的保存位置
  28. --name:模型保存的目录名称
  29. --exist-ok:模型目录是否存在,不存在就创建
  30. """
  31. parser = argparse.ArgumentParser()
  32. parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
  33. parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
  34. parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
  35. parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
  36. parser.add_argument('--epochs', type=int, default=300)
  37. parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
  38. parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
  39. parser.add_argument('--rect', action='store_true', help='rectangular training')
  40. parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
  41. parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
  42. parser.add_argument('--notest', action='store_true', help='only test final epoch')
  43. parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
  44. parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
  45. parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
  46. parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
  47. parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
  48. parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
  49. parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
  50. parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
  51. parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
  52. parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
  53. parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
  54. parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
  55. parser.add_argument('--project', default='runs/train', help='save to project/name')
  56. parser.add_argument('--entity', default=None, help='W&B entity')
  57. parser.add_argument('--name', default='exp', help='save to project/name')
  58. parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
  59. parser.add_argument('--quad', action='store_true', help='quad dataloader')
  60. parser.add_argument('--linear-lr', action='store_true', help='linear LR')
  61. parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
  62. parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
  63. parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
  64. parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
  65. parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
  66. opt = parser.parse_args()

根据设备情况,选择参数:
image.png
image.png
image.png
正常编译训练:
image.png

4、参考文献

https://blog.csdn.net/didiaopao/article/details/119954291