将 VOC 格式转换成 COCO 格式

  1. import os.path as osp
  2. import xml.etree.ElementTree as ET
  3. import mmcv
  4. def voc_classes():
  5. return [
  6. 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
  7. 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
  8. 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
  9. ]
  10. from glob import glob
  11. from tqdm import tqdm
  12. from PIL import Image
  13. label_ids = {name: i + 1 for i, name in enumerate(voc_classes())}
  14. def get_segmentation(points):
  15. return [points[0], points[1], points[2] + points[0], points[1],
  16. points[2] + points[0], points[3] + points[1], points[0], points[3] + points[1]]
  17. def parse_xml(xml_path, img_id, anno_id):
  18. tree = ET.parse(xml_path)
  19. root = tree.getroot()
  20. annotation = []
  21. for obj in root.findall('object'):
  22. name = obj.find('name').text
  23. if name == 'waterweeds':
  24. continue
  25. category_id = label_ids[name]
  26. bnd_box = obj.find('bndbox')
  27. xmin = int(bnd_box.find('xmin').text)
  28. ymin = int(bnd_box.find('ymin').text)
  29. xmax = int(bnd_box.find('xmax').text)
  30. ymax = int(bnd_box.find('ymax').text)
  31. w = xmax - xmin + 1
  32. h = ymax - ymin + 1
  33. area = w*h
  34. segmentation = get_segmentation([xmin, ymin, w, h])
  35. annotation.append({
  36. "segmentation": segmentation,
  37. "area": area,
  38. "iscrowd": 0,
  39. "image_id": img_id,
  40. "bbox": [xmin, ymin, w, h],
  41. "category_id": category_id,
  42. "id": anno_id,
  43. "ignore": 0})
  44. anno_id += 1
  45. return annotation, anno_id
  46. def cvt_annotations(img_path, xml_path, out_file):
  47. images = []
  48. annotations = []
  49. # xml_paths = glob(xml_path + '/*.xml')
  50. img_id = 1
  51. anno_id = 1
  52. for img_path in tqdm(glob(img_path + '/*.jpg')):
  53. w, h = Image.open(img_path).size
  54. img_name = osp.basename(img_path)
  55. img = {"file_name": img_name, "height": int(h), "width": int(w), "id": img_id}
  56. images.append(img)
  57. xml_file_name = img_name.split('.')[0] + '.xml'
  58. xml_file_path = osp.join(xml_path, xml_file_name)
  59. annos, anno_id = parse_xml(xml_file_path, img_id, anno_id)
  60. annotations.extend(annos)
  61. img_id += 1
  62. categories = []
  63. for k,v in label_ids.items():
  64. categories.append({"name": k, "id": v})
  65. final_result = {"images": images, "annotations": annotations, "categories": categories}
  66. mmcv.dump(final_result, out_file)
  67. return annotations
  68. def main():
  69. xml_path = r'E:\VOC\VOC2007/Annotations' # xml 所在文件夹
  70. img_path = r'E:\VOC\VOC2007/JPEGImages' # 图片所在文件夹
  71. print('processing {} ...'.format("xml format annotations"))
  72. cvt_annotations(img_path, xml_path, r'E:/VOC/all.json') # 保存的结果
  73. print('Done!')
  74. if __name__ == '__main__':
  75. main()

数据分析

VOC2007 总共有 9963 张图片,30638 个标注框
官方的 VOC2007 将其进行划分:训练集(2501张图片)、验证集(2510张图片)、测试集(4952张图片)

每个类别的标注框数量如下所示,可以看到训练集和测试集上每个类别的标注框数量是相等的(保证训练集和验证集的分布是一致的)

image.png
整个数据集(9963 张图片)的类别分布如下,可以看出,person 这个类别是最多的
image.png

data_analysis-Copy1.ipynb