YoloV1

Keras框架实现

下载原始darknet的权重

  1. http://pjreddie.com/media/files/yolov1/tiny-yolov1.weights

将darknet模型转为keras模型

  1. """
  2. Reads Darknet config and weights and creates Keras models with TF backend.
  3. Currently only supports layers in Yolov1-tiny config.
  4. """
  5. import argparse
  6. import configparser
  7. import io
  8. import os
  9. from collections import defaultdict
  10. import numpy as np
  11. import tensorflow.keras.backend as K
  12. from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Reshape, LeakyReLU, ReLU, BatchNormalization
  13. from tensorflow.keras import Model
  14. from tensorflow.keras.regularizers import l2
  15. def unique_config_sections(config_file):
  16. """
  17. 将配置文件所有的节点生成独一无二的名字,添加到每一个节点后边
  18. """
  19. section_counters = defaultdict(int)
  20. output_stream = io.StringIO()
  21. with open(config_file) as fin:
  22. for line in fin:
  23. if line.startswith('['):
  24. section = line.strip().strip('[]')
  25. # 添加新的后缀
  26. _section = section + '_' + str(section_counters[section])
  27. # 同一前缀的后缀加1
  28. section_counters[section] += 1
  29. # 用新的节点名字替换原有的节点名字
  30. line = line.replace(section, _section)
  31. output_stream.write(line)
  32. output_stream.seek(0)
  33. return output_stream
  34. def main(args):
  35. config_path = os.path.expanduser(args.config_path)
  36. weights_path = os.path.expanduser(args.weights_path)
  37. output_path = os.path.expanduser(args.output_path)
  38. assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(config_path)
  39. assert weights_path.endswith('.weights'), '{} is not a .weights file'.format(weights_path)
  40. assert output_path.endswith('.hdf5'), 'output path {} is not a .hdf5 file'.format(output_path)
  41. # Load weights and config.
  42. print('加载darknet的权重...')
  43. weights_file = open(weights_path, 'rb')
  44. # 读取darknet网络权重的头部信息
  45. weights_header = np.ndarray(shape=(4,), dtype='int32', buffer=weights_file.read(16))
  46. print('darknet网络权重的头部信息:', weights_header)
  47. print('解析Darknet配置文件')
  48. unique_config_file = unique_config_sections(config_path)
  49. cfg_parser = configparser.ConfigParser()
  50. cfg_parser.read_file(unique_config_file)
  51. print('开始生成Keras网络模型...')
  52. try:
  53. image_height = int(cfg_parser['crop_0']['crop_height'])
  54. image_width = int(cfg_parser['crop_0']['crop_width'])
  55. except KeyError:
  56. image_height = int(cfg_parser['net_0']['height'])
  57. image_width = int(cfg_parser['net_0']['width'])
  58. # 定义输入层
  59. prev_layer = Input(shape=(image_height, image_width, 3))
  60. all_layers = [prev_layer]
  61. # 权重衰减(目的不是为了提高精度或者提高速度,而是防止过拟合)
  62. weight_decay = float(cfg_parser['net_0']['decay']) if 'net_0' in cfg_parser.sections() else 5e-4
  63. count = 0
  64. fc_flag = False
  65. for section in cfg_parser.sections():
  66. print('正在解析节点: {}'.format(section))
  67. # 解析卷积相关
  68. if section.startswith('convolutional'):
  69. filters = int(cfg_parser[section]['filters'])
  70. size = int(cfg_parser[section]['size'])
  71. stride = int(cfg_parser[section]['stride'])
  72. pad = int(cfg_parser[section]['pad'])
  73. activation = cfg_parser[section]['activation']
  74. batch_normalize = 'batch_normalize' in cfg_parser[section]
  75. # padding='same' is equivalent to Darknet pad=1
  76. padding = 'same' if pad == 1 else 'valid'
  77. # Setting weights.
  78. # Darknet serializes convolutional weights as:
  79. # [bias/beta, [gamma, mean, variance], conv_weights]
  80. # 获取上一层的形状
  81. prev_layer_shape = K.int_shape(prev_layer)
  82. # TODO: This assumes channel last dim_ordering.
  83. # keras设计成channel last,而darknet是channel first
  84. weights_shape = (size, size, prev_layer_shape[-1], filters)
  85. darknet_w_shape = (filters, weights_shape[2], size, size)
  86. # 参数个数
  87. weights_size = np.product(weights_shape)
  88. print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape)
  89. conv_bias = np.ndarray(shape=(filters,), dtype='float32', buffer=weights_file.read(filters * 4))
  90. count += filters
  91. bn_weight_list = []
  92. if batch_normalize:
  93. bn_weights = np.ndarray(shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12))
  94. count += 3 * filters
  95. # TODO: Keras BatchNormalization mistakenly refers to var
  96. # as std.
  97. bn_weight_list = [
  98. bn_weights[0], # scale gamma
  99. conv_bias, # shift beta
  100. bn_weights[1], # running mean
  101. bn_weights[2] # running var
  102. ]
  103. conv_weights = np.ndarray(
  104. shape=darknet_w_shape,
  105. dtype='float32',
  106. buffer=weights_file.read(weights_size * 4))
  107. count += weights_size
  108. # DarkNet conv_weights are serialized Caffe-style:
  109. # (out_dim, in_dim, height, width)
  110. # We would like to set these to Tensorflow order:
  111. # (height, width, in_dim, out_dim)
  112. # TODO: Add check for Theano dim ordering.
  113. conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
  114. conv_weights = [conv_weights] if batch_normalize \
  115. else [conv_weights, conv_bias]
  116. # Handle activation.
  117. act_fn = None
  118. if activation == 'leaky':
  119. pass # Add advanced activation later.
  120. elif activation == 'relu':
  121. pass
  122. elif activation != 'linear':
  123. raise ValueError(
  124. 'Unknown activation function `{}` in section {}'.format(
  125. activation, section))
  126. # Create Conv2D layer
  127. conv_layer = Conv2D(
  128. filters, (size, size),
  129. strides=(stride, stride),
  130. kernel_regularizer=l2(weight_decay),
  131. use_bias=not batch_normalize,
  132. weights=conv_weights,
  133. activation=act_fn,
  134. padding=padding,
  135. name=format(section))(prev_layer)
  136. if batch_normalize:
  137. conv_layer = BatchNormalization(
  138. weights=bn_weight_list,
  139. name='bn' + format(section))(conv_layer)
  140. prev_layer = conv_layer
  141. if activation == 'linear':
  142. all_layers.append(prev_layer)
  143. elif activation == 'leaky':
  144. act_layer = LeakyReLU(alpha=0.1)(prev_layer)
  145. prev_layer = act_layer
  146. all_layers.append(act_layer)
  147. elif activation == 'relu':
  148. act_layer = ReLU()(prev_layer)
  149. prev_layer = act_layer
  150. all_layers.append(act_layer)
  151. # 解析池化相关
  152. elif section.startswith('maxpool'):
  153. size = int(cfg_parser[section]['size'])
  154. stride = int(cfg_parser[section]['stride'])
  155. all_layers.append(
  156. MaxPooling2D(
  157. padding='same',
  158. pool_size=(size, size),
  159. strides=(stride, stride))(prev_layer))
  160. prev_layer = all_layers[-1]
  161. # 解析连接相关
  162. elif section.startswith('connected'):
  163. output_size = int(cfg_parser[section]['output'])
  164. activation = cfg_parser[section]['activation']
  165. prev_layer_shape = K.int_shape(prev_layer)
  166. # TODO: This assumes channel last dim_ordering.
  167. weights_shape = (np.prod(prev_layer_shape[1:]), output_size)
  168. darknet_w_shape = (output_size, weights_shape[0])
  169. weights_size = np.product(weights_shape)
  170. print('full-connected', activation, weights_shape)
  171. fc_bias = np.ndarray(
  172. shape=(output_size,),
  173. dtype='float32',
  174. buffer=weights_file.read(output_size * 4))
  175. count += output_size
  176. fc_weights = np.ndarray(
  177. shape=darknet_w_shape,
  178. dtype='float32',
  179. buffer=weights_file.read(weights_size * 4))
  180. count += weights_size
  181. # DarkNet fc_weights are serialized Caffe-style:
  182. # (out_dim, in_dim)
  183. # We would like to set these to Tensorflow order:
  184. # (in_dim, out_dim)
  185. # TODO: Add check for Theano dim ordering.
  186. fc_weights = np.transpose(fc_weights, [1, 0])
  187. fc_weights = [fc_weights, fc_bias]
  188. # Handle activation.
  189. act_fn = None
  190. if activation == 'leaky':
  191. pass # Add advanced activation later.
  192. elif activation == 'relu':
  193. pass
  194. elif activation != 'linear':
  195. raise ValueError(
  196. 'Unknown activation function `{}` in section {}'.format(
  197. activation, section))
  198. if not fc_flag:
  199. prev_layer = Flatten()(prev_layer)
  200. fc_flag = True
  201. # Create Full-Connect layer
  202. fc_layer = Dense(
  203. output_size,
  204. kernel_regularizer=l2(weight_decay),
  205. weights=fc_weights,
  206. activation=act_fn,
  207. name=format(section))(prev_layer)
  208. prev_layer = fc_layer
  209. if activation == 'linear':
  210. all_layers.append(prev_layer)
  211. elif activation == 'leaky':
  212. act_layer = LeakyReLU(alpha=0.1)(prev_layer)
  213. prev_layer = act_layer
  214. all_layers.append(act_layer)
  215. elif activation == 'relu':
  216. act_layer = ReLU()(prev_layer)
  217. prev_layer = act_layer
  218. all_layers.append(act_layer)
  219. # 解析dropout相关
  220. elif section.startswith('dropout'):
  221. probability = float(cfg_parser[section]['probability'])
  222. dropout_layer = Dropout(probability)(prev_layer)
  223. prev_layer = dropout_layer
  224. all_layers.append(prev_layer)
  225. # 解析目标检测相关
  226. elif section.startswith('detection'):
  227. classes = int(cfg_parser[section]['classes'])
  228. coords = int(cfg_parser[section]['coords'])
  229. rescore = int(cfg_parser[section]['rescore'])
  230. side = int(cfg_parser[section]['side'])
  231. num = int(cfg_parser[section]['num'])
  232. reshape_layer = Reshape(
  233. (side, side, classes + num * (coords + rescore))
  234. )(prev_layer)
  235. prev_layer = reshape_layer
  236. all_layers.append(prev_layer)
  237. # net、crop、detection、softmax不解析
  238. elif (section.startswith('net') or
  239. section.startswith('crop') or
  240. section.startswith('detection') or
  241. section.startswith('softmax')):
  242. pass # Configs not currently handled during models definition.
  243. # 异常节点抛出
  244. else:
  245. raise ValueError('不支持节点类型: {}'.format(section))
  246. # 创建keras的model
  247. model = Model(inputs=all_layers[0], outputs=all_layers[-1])
  248. print("keras的model简要内容:")
  249. model.summary()
  250. # 保存model的权值
  251. model.save_weights('{}'.format(output_path))
  252. print('Saved Keras models to {}'.format(output_path))
  253. # 读取darknet剩余的权值
  254. remaining_weights = len(weights_file.read()) / 4
  255. # 关闭流
  256. weights_file.close()
  257. print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights))
  258. if remaining_weights > 0:
  259. print('Warning: {} unused weights'.format(remaining_weights))
  260. if __name__ == '__main__':
  261. parser = argparse.ArgumentParser(description='Darknet Yolov1-tiny To Keras Converter.')
  262. parser.add_argument('config_path', help='Path to Darknet cfg file.')
  263. parser.add_argument('weights_path', help='Path to Darknet weights file.')
  264. parser.add_argument('output_path', help='Path to output Keras models file.')
  265. # main(parser.parse_args())
  266. main(parser.parse_args(['cfg/yolov1-tiny.cfg', 'weights/tiny-yolov1.weights', 'weights/tiny-yolov1.hdf5']))

Voc数据集转为Keras适用的形式

  1. import argparse
  2. import xml.etree.ElementTree as ElTr
  3. import os
  4. # 数据集
  5. sets = [('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
  6. classes_num = {
  7. 'aeroplane': 0,
  8. 'bicycle': 1,
  9. 'bird': 2,
  10. 'boat': 3,
  11. 'bottle': 4,
  12. 'bus': 5,
  13. 'car': 6,
  14. 'cat': 7,
  15. 'chair': 8,
  16. 'cow': 9,
  17. 'diningtable': 10,
  18. 'dog': 11,
  19. 'horse': 12,
  20. 'motorbike': 13,
  21. 'person': 14,
  22. 'pottedplant': 15,
  23. 'sheep': 16,
  24. 'sofa': 17,
  25. 'train': 18,
  26. 'tvmonitor': 19
  27. }
  28. def convert_annotation(annotation_dir, year, image_id, f):
  29. # 获取文件的标注内容
  30. in_file = os.path.join(annotation_dir, 'VOC%s/Annotations/%s.xml' % (year, image_id))
  31. tree = ElTr.parse(in_file)
  32. root = tree.getroot()
  33. # 获取标注xml文件中所有的object节点
  34. for obj in root.iter('object'):
  35. # 目标识别难度
  36. difficult = obj.find('difficult').text
  37. # 类别名称
  38. cls = obj.find('name').text
  39. # 全部的类别
  40. classes = list(classes_num.keys())
  41. # 如果object不在分类类比中或者识别难度等于1,则不用于训练
  42. if cls not in classes or int(difficult) == 1:
  43. continue
  44. # 类别映射号码
  45. cls_id = classes.index(cls)
  46. # 获取object的边界
  47. xml_box = obj.find('bndbox')
  48. b = (int(xml_box.find('xmin').text), int(xml_box.find('ymin').text),
  49. int(xml_box.find('xmax').text), int(xml_box.find('ymax').text))
  50. f.write(' ' + ','.join([str(a) for a in b]) + ',' + str(cls_id))
  51. def main(args):
  52. data_dir = os.path.expanduser(args.dir)
  53. for year, image_set in sets:
  54. # 读取训练集的文件编号
  55. with open(os.path.join(data_dir, 'VOC%s/ImageSets/Main/%s.txt' % (year, image_set)), 'r') as f:
  56. image_ids = f.read().strip().split()
  57. # 获取训练数据的具体路径
  58. with open(os.path.join(data_dir, '%s_%s.txt' % (year, image_set)), 'w') as f:
  59. for image_id in image_ids:
  60. f.write('%s/VOC%s/JPEGImages/%s.jpg' % (data_dir, year, image_id))
  61. convert_annotation(data_dir, year, image_id, f)
  62. f.write('\n')
  63. if __name__ == '__main__':
  64. parser = argparse.ArgumentParser(description='Build Annotations.')
  65. parser.add_argument('dir', default='..', help='Annotations.')
  66. main(parser.parse_args(['data/VOCdevkit']))