deeplab环境安装

2021年12月24日 tf最高版本2.6,对应最新的models文件。在research文件中的deeplab代码是tf1版本 ,因此环境推荐安装

  1. tensorflow = 1.15
  2. cudatoolkit = 10.0.130
  3. cudnn = 7.6.5
  4. opencv-python = 4.5.3.56
  5. labelme = 3.9
  6. pip install labelme -i https://baidu.com/pypi/simple

剩下的缺什么装什么,tf安装好后别的库版本都会自动适配。
测试环境通过后,开始制作数据集。

制作数据集

首先对数据集进行标注,使用labelme,标注时注意标签一致(区分大小写)。
在jpg文件夹下会出现对应的同名json文件、
image.png
首先对每个类别的数据,进行重命名,同时检验标签是否一致:

  1. import os
  2. import json
  3. from PIL import Image
  4. path = "/media/neal/neal/Dataset/medical2021.11.13/YF2021.10.30"
  5. output_path = "/media/neal/neal/Dataset/medical2021.11.13/YF"
  6. class_name = "yf"
  7. count = os.listdir(path)
  8. cnt = 0
  9. for i in range(0, len(count)):
  10. if count[i].endswith("jpg"):
  11. cnt += 1
  12. jpg_path = os.path.join(path, count[i])
  13. img = Image.open(jpg_path)
  14. new_img_name = class_name + '_' + '%05d' % int(cnt) + '.jpg'
  15. img.save(output_path + '/' + new_img_name)
  16. json_path = path + "/" + count[i].split('.')[0] + '.json'
  17. with open(json_path , 'rb') as f:
  18. params = json.load(f)
  19. if params["shapes"][0]["label"] != class_name:
  20. print("dataset label has error !!!")
  21. params['imagePath'] = new_img_name
  22. new_json_name = os.path.join(output_path, new_img_name.replace("jpg", "json"))
  23. with open(new_json_name, 'w') as r:
  24. json.dump(params, r, indent = 4)
  25. r.close()
  26. f.close()

image.png
然后将所有类别的jpg和json文件一起放在jpg_json_dir文件夹中,具体文件夹格式见代码,运行代码:

  1. import os
  2. from PIL import Image
  3. import numpy as np
  4. """
  5. input : workspace_dir,jpg_json_dir ,class_name.txt
  6. output: png
  7. required pkgs: pip install labelme
  8. + workspace_dir root目录
  9. + jpg_json_dir 在root目录下的子文件夹,存放原始jpg图片和labelme标注后生成的json文件
  10. - *.jpg
  11. - *.json
  12. + labelme_output 所有图片的labelme输出文件夹
  13. + jpg 所有jpg图片
  14. + png 用于最终训练的全景图,单通道
  15. - class_name.txt 按行存放所有的标签,第一行 _background_
  16. """
  17. # 文件路径处理
  18. workspace_dir = "/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical"
  19. assert (os.path.exists(workspace_dir)), "please check workspace_dir folder"
  20. jpg_json_dir = os.path.join(workspace_dir, "jf_and_yf_jpg_json_file")
  21. assert (os.path.exists(jpg_json_dir)), "please check jpg_json_dir folder"
  22. jpg_dir = os.path.join(workspace_dir, "jpg")
  23. png_dir = os.path.join(workspace_dir, "png")
  24. class_name_path = workspace_dir + "/class_name.txt"
  25. if not os.path.exists(class_name_path): print("clsaa_name.txt is not found")
  26. if not os.path.exists(jpg_dir): os.mkdir(jpg_dir)
  27. if not os.path.exists(png_dir): os.mkdir(png_dir)
  28. def generate_labelme_output_file():
  29. json_file = os.listdir(jpg_json_dir)
  30. for file in json_file:
  31. if file.split('.')[1] == 'json':
  32. os.system("labelme_json_to_dataset %s" % (jpg_json_dir + '/' + file)) #
  33. def main():
  34. # 读取原文件夹
  35. count = os.listdir(jpg_json_dir)
  36. for i in range(0, len(count)):
  37. # 如果里的文件以jpg结尾,保存到jpg文件夹,寻找它对应的png
  38. if count[i].endswith("jpg"):
  39. jpg_path = os.path.join(jpg_json_dir, count[i])
  40. img = Image.open(jpg_path)
  41. img.save(os.path.join(jpg_dir, count[i]))
  42. # 找到对应的png
  43. labelme_png_path = jpg_json_dir + "/" + count[i].split(".")[0] + "_json/label.png"
  44. img = Image.open(labelme_png_path)
  45. with open(jpg_json_dir + "/" + count[i].split(".")[0] + "_json/label_names.txt", "r") as f:
  46. new = Image.new("P", (img.width, img.height))
  47. label = f.read().splitlines()[1]
  48. # label = f.read().splitlines()
  49. # class_txt = open(class_name_path, "r")
  50. # class_name = class_txt.read().splitlines()
  51. # # 找到局部的类在全局中的类的序号
  52. # for name in label:
  53. # # index_json是x_json文件里存在的类label_names.txt,局部类
  54. # index_json = label.index(name)
  55. # # index_all是全局的类,
  56. # index_all = class_name.index(name)
  57. # new = new + (index_all * (np.array(img) == index_json))
  58. if label == "jf":
  59. new = new + (1 * (np.array(img) == 1))
  60. elif label == "yf":
  61. new = new + (2 * (np.array(img) == 1))
  62. else:
  63. print(" label_names has error -->{}".format(labelme_png_path))
  64. # # 找到全局的类
  65. # class_txt = open(class_name_path, "r")
  66. # class_name = class_txt.read().splitlines()
  67. # print(class_name)
  68. # # ["bk","cat","dog"] 全局的类
  69. # # 打开x_json文件里面存在的类,称其为局部类
  70. # with open(output_dir + "/" + count[i].split(".")[0] + "_json/label_names.txt", "r") as f:
  71. # names = f.read().splitlines()
  72. # # ["bk","dog"] 局部的类
  73. # # 新建一张空白图片, 单通道
  74. # new = Image.new("P", (img.width, img.height))
  75. #
  76. # # 找到局部的类在全局中的类的序号
  77. # for name in names:
  78. # # index_json是x_json文件里存在的类label_names.txt,局部类
  79. # index_json = names.index(name)
  80. # # index_all是全局的类,
  81. # index_all = class_name.index(name)
  82. #
  83. # # 将局部类转换成为全局类
  84. # # 将原图img中像素点的值为index_json的像素点乘以其在全局中的像素点的所对应的类的序号 得到 其实际在数据集中像素点的值
  85. # # 比如dog,在局部类(output/x_json/label_names)中它的序号为1,dog在原图中的像素点的值也为1.
  86. # # 但是在全局的类(before/classes.txt)中其对应的序号为2,所以在新的图片中要将局部类的像素点的值*全局类的序号,从而得到标签文件
  87. # new = new + (index_all * (np.array(img) == index_json))
  88. new = Image.fromarray(np.uint8(new))
  89. new = new.convert("P")
  90. new.save(os.path.join(png_dir, count[i].replace("jpg", "png")))
  91. # 找到新的标签文件中像素点值的最大值和最小值,最大值为像素点对应的类在class_name.txt中的序号,最小值为背景,即0
  92. print(np.max(new), np.min(new), count[i].replace("jpg", "png"))
  93. if __name__ == '__main__':
  94. #generate_labelme_output_file()
  95. main()

出现对应的全局mask(或者叫全景图)训练文件,我保存在png文件夹下(注意是单通道图),
image.png

使用showimage文件可以查看单张图片中具体位置的像素值:

  1. import cv2
  2. img= cv2.imread('/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/png/jf_10.png') #定义图片位置
  3. #img= cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #转化为灰度图 因为我读取的直接是灰度标签图片就不用转化了
  4. def onmouse(event, x, y, flags, param): #标准鼠标交互函数
  5. if event==cv2.EVENT_MOUSEMOVE: #当鼠标移动时
  6. print(img[y,x]) #显示鼠标所在像素的数值,注意像素表示方法和坐标位置的不同
  7. def main():
  8. cv2.namedWindow("img") #构建窗口
  9. cv2.setMouseCallback("img", onmouse) #回调绑定窗口
  10. while True: #无限循环
  11. cv2.imshow("img",img) #显示图像
  12. if cv2.waitKey() == ord('q'):break #按下‘q'键,退出
  13. cv2.destroyAllWindows() #关闭窗口
  14. if __name__ == '__main__': #运行
  15. main()

然后,制作index标签:

  1. # 文件名写入txt
  2. import random
  3. import glob
  4. img_path = glob.glob('/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/jf_and_yf_jpg_json_file/*.jpg')
  5. all_txt = '/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/all.txt'
  6. train_txt = "/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/index/train.txt"
  7. trainval_txt = "/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/index/trainval.txt"
  8. val_txt = "/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/index/val.txt"
  9. def clear_file():
  10. with open(all_txt, 'a') as f:
  11. f.truncate(0)
  12. with open(train_txt, 'a') as f:
  13. f.truncate(0)
  14. with open(trainval_txt, 'a') as f:
  15. f.truncate(0)
  16. with open(val_txt, 'a') as f:
  17. f.truncate(0)
  18. def split_train_val():
  19. for each in img_path:
  20. with open(all_txt, 'a') as f:
  21. f.write(each.split("/")[-1][:-4] + '\n') # 切片换成自己路径对应的文件名
  22. with open('/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/all.txt', 'r') as f:
  23. lines = f.readlines()
  24. g = [i for i in range(1, len(img_path)+1)] # 设置文件总数
  25. random.shuffle(g)
  26. # 设置需要的文件数
  27. train = g[:len(img_path)*7//10]
  28. print("train num --> {}".format(len(train)))
  29. trainval = g[len(img_path)*7//10:len(img_path)*9//10]
  30. print("trainval num --> {}".format(len(trainval)))
  31. val = g[len(img_path)*9//10:]
  32. print("val num --> {}".format(len(val)))
  33. for index, line in enumerate(lines, 1):
  34. if index in train:
  35. with open(train_txt, 'a') as trainf:
  36. trainf.write(line)
  37. elif index in trainval:
  38. with open(trainval_txt, 'a') as trainvalf:
  39. trainvalf.write(line)
  40. elif index in val:
  41. with open(val_txt, 'a') as valf:
  42. valf.write(line)
  43. if __name__ == '__main__':
  44. clear_file()
  45. split_train_val()

下一步生成TFRecord,这个代码是官网带的,我在前面添加了一些注释,以及命令行运行的代码,直接复制后命令行运行。

  1. # Lint as: python2, python3
  2. # Copyright 2018 The TensorFlow Authors All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # ==============================================================================
  16. """Converts PASCAL VOC 2012 data to TFRecord file format with Example protos.
  17. PASCAL VOC 2012 dataset is expected to have the following directory structure:
  18. + pascal_voc_seg
  19. - build_data.py
  20. - build_voc2012_data.py (current working directory).
  21. + VOCdevkit
  22. + VOC2012
  23. + JPEGImages
  24. + SegmentationClass
  25. + ImageSets
  26. + Segmentation
  27. + tfrecord
  28. Image folder:
  29. ./VOCdevkit/VOC2012/JPEGImages
  30. Semantic segmentation annotations:
  31. ./VOCdevkit/VOC2012/SegmentationClass
  32. list folder:
  33. ./VOCdevkit/VOC2012/ImageSets/Segmentation
  34. This script converts data into sharded data files and save at tfrecord folder.
  35. The Example proto contains the following fields:
  36. image/encoded: encoded image content.
  37. image/filename: image filename.
  38. image/format: image file format.
  39. image/height: image height.
  40. image/width: image width.
  41. image/channels: image channels.
  42. image/segmentation/class/encoded: encoded semantic segmentation content.
  43. image/segmentation/class/format: semantic segmentation file format.
  44. """
  45. from __future__ import absolute_import
  46. from __future__ import division
  47. from __future__ import print_function
  48. import math
  49. import os.path
  50. import sys
  51. import build_data
  52. from six.moves import range
  53. import tensorflow as tf
  54. # image_format 这个参数只能在命令行给 ,没有仔细研究,先用命令行吧
  55. """
  56. python ./build_voc2012_data.py \
  57. --image_folder="/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/jpg" \
  58. --semantic_segmentation_folder="/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/png" \
  59. --list_folder="/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/index" \
  60. --image_format="jpg" \
  61. --output_dir="/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/TFRecord"
  62. """
  63. FLAGS = tf.app.flags.FLAGS
  64. tf.app.flags.DEFINE_string('image_folder',
  65. '/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/jpg',
  66. 'Folder containing images.')
  67. tf.app.flags.DEFINE_string(
  68. 'semantic_segmentation_folder',
  69. '/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/png',
  70. 'Folder containing semantic segmentation annotations.')
  71. tf.app.flags.DEFINE_string(
  72. 'list_folder',
  73. '/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/index',
  74. 'Folder containing lists for training and validation')
  75. tf.app.flags.DEFINE_string(
  76. 'output_dir',
  77. '/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/TFRecord',
  78. 'Path to save converted SSTable of TensorFlow examples.')
  79. _NUM_SHARDS = 4
  80. def _convert_dataset(dataset_split):
  81. """Converts the specified dataset split to TFRecord format.
  82. Args:
  83. dataset_split: The dataset split (e.g., train, test).
  84. Raises:
  85. RuntimeError: If loaded image and label have different shape.
  86. """
  87. dataset = os.path.basename(dataset_split)[:-4]
  88. sys.stdout.write('Processing ' + dataset)
  89. filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
  90. num_images = len(filenames)
  91. num_per_shard = int(math.ceil(num_images / _NUM_SHARDS))
  92. image_reader = build_data.ImageReader('jpeg', channels=3)
  93. label_reader = build_data.ImageReader('png', channels=1)
  94. for shard_id in range(_NUM_SHARDS):
  95. output_filename = os.path.join(
  96. FLAGS.output_dir,
  97. '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
  98. with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
  99. start_idx = shard_id * num_per_shard
  100. end_idx = min((shard_id + 1) * num_per_shard, num_images)
  101. for i in range(start_idx, end_idx):
  102. sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
  103. i + 1, len(filenames), shard_id))
  104. sys.stdout.flush()
  105. # Read the image.
  106. image_filename = os.path.join(
  107. FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
  108. image_data = tf.gfile.GFile(image_filename, 'rb').read()
  109. height, width = image_reader.read_image_dims(image_data)
  110. # Read the semantic segmentation annotation.
  111. seg_filename = os.path.join(
  112. FLAGS.semantic_segmentation_folder,
  113. filenames[i] + '.' + FLAGS.label_format)
  114. seg_data = tf.gfile.GFile(seg_filename, 'rb').read()
  115. seg_height, seg_width = label_reader.read_image_dims(seg_data)
  116. if height != seg_height or width != seg_width:
  117. raise RuntimeError('Shape mismatched between image and label.')
  118. # Convert to tf example.
  119. example = build_data.image_seg_to_tfexample(
  120. image_data, filenames[i], height, width, seg_data)
  121. tfrecord_writer.write(example.SerializeToString())
  122. sys.stdout.write('\n')
  123. sys.stdout.flush()
  124. def main(unused_argv):
  125. dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
  126. for dataset_split in dataset_splits:
  127. _convert_dataset(dataset_split)
  128. if __name__ == '__main__':
  129. tf.app.run()

生成TFRecode文件后,数据集制作完成。

注册数据集

data_generator.py

在/models/research/deeplab/datasets 路径的data_generator.py 第93行:

  1. _MYDATA_INFORMATION = DatasetDescriptor(
  2. splits_to_sizes={
  3. 'train':44, # num of samples in images/training train.txt的行数
  4. 'val':27, # num of samples in images/validation val.txt的行数
  5. },
  6. num_classes=6, # 我的标签是5类(包括background),加上ignore_label总共六类
  7. ignore_label=255,
  8. )

然后找到 _DATASETS_INFORMATION ,加上mydata….一行

  1. _DATASETS_INFORMATION = {
  2. 'cityscapes': _CITYSCAPES_INFORMATION,
  3. 'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
  4. 'ade20k': _ADE20K_INFORMATION,
  5. 'mydata': _MYDATA_INFORMATION, #注册上面的数据集 加粗部分一致,前面的mydata随意取
  6. }

同样在models/research/deeplab/deprecated下的segmentation_dataset.py文件进行同样的操作

train_utils.py

在models/research/deeplab/utils/train_utils.py的213行左右

  1. exclude_list = ['global_step','logits'] #本来只有global_step ,现在加上 logits,表示不加载逻辑层的参数
  2. if not initialize_last_layer:
  3. exclude_list.extend(last_layers)

train.py

在models/research/deeplab/train.py里 156行左右

  1. # Set to False if one does not want to re-use the trained classifier weights.
  2. flags.DEFINE_boolean('initialize_last_layer', False, 'Initialize the last layer.') #这个本来是True设置为False
  3. flags.DEFINE_boolean('last_layers_contain_logits_only', True, 'Only consider logits as last layers or not.')#这个设置为True

image.png

input_preprocess.py

  1. models/research/deeplab/input_preprocess.py 128行左右
  2. # Randomly crop the image and label.
  3. if is_trainingand labelis not None:
  4. processed_image, label = preprocess_utils.random_crop([processed_image, label], crop_height, crop_width)

为这个if加一个else,下面的代码显示得不好,不过在链接里也有
参考链接:https://github.com/tensorflow/models/issues/3695

  1. else:
  2. rr = tf.minimum(tf.cast(crop_height, tf.float32) / tf.cast(image_height, tf.float32), \
  3. tf.cast(crop_width, tf.float32) / tf.cast(image_width, tf.float32))
  4. newh = tf.cast(tf.cast(image_height, tf.float32) * rr, tf.int32)
  5. neww = tf.cast((tf.cast(image_width, tf.float32) * rr), tf.int32)
  6. processed_image = tf.image.resize_images(processed_image, (newh, neww), method=tf.image.ResizeMethod.BILINEAR, align_corners=True)
  7. processed_image = preprocess_utils.pad_to_bounding_box(processed_image, 0, 0, crop_height, crop_width, mean_pixel)

不加这一段在运行vis.py和eval的时候会报错
类似
InvalidArgumentError (see above for traceback): padded_shape[1]=128 is not divisible by block_shape[1]=12
Invalid argument: padded_shape[1]=69 is not divisible by block_shape[1]=2
这种错基本都是这样解决

训练

  1. python train.py \
  2. --logtostderr \
  3. --training_number_of_steps=100000 \
  4. --train_split="train" \
  5. --model_variant="xception_65" \
  6. --atrous_rates=6 \
  7. --atrous_rates=12 \
  8. --atrous_rates=18 \
  9. --output_stride=16 \
  10. --decoder_output_stride=4 \
  11. --train_crop_size="513,513" \
  12. --train_batch_size=4 \
  13. --dataset="mydata" \
  14. --tf_initial_checkpoint='/media/neal/neal/deeplab/models/research/deeplab/my_utils/pt/deeplabv3_xception_2018_01_04/xception/model.ckpt' \
  15. --train_logdir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/result' \
  16. --dataset_dir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/TFRecord'


命令行提示找不到python模块,在python文件中导入包前加入

  1. import sys
  2. sys.path.append("项目根目录")

可视化

  1. python vis.py \
  2. --logtostderr \
  3. --vis_split="val" \
  4. --model_variant="xception_65" \
  5. --atrous_rates=6 \
  6. --atrous_rates=12 \
  7. --atrous_rates=18 \
  8. --output_stride=16 \
  9. --decoder_output_stride=4 \
  10. --vis_crop_size="513,513" \
  11. --dataset="mydata" \
  12. --colormap_type="pascal" \
  13. --checkpoint_dir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/result' \
  14. --vis_logdir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/result_vis' \
  15. --dataset_dir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/TFRecord'

验证

  1. python eval.py \
  2. --logtostderr \
  3. --eval_split="val" \
  4. --model_variant="xception_65" \
  5. --atrous_rates=6 \
  6. --atrous_rates=12 \
  7. --atrous_rates=18 \
  8. --output_stride=16 \
  9. --decoder_output_stride=4 \
  10. --eval_crop_size="513,513" \
  11. --dataset="mydata" \
  12. --checkpoint_dir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/result' \
  13. --eval_logdir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/eval' \
  14. --dataset_dir='/media/neal/neal/deeplab/models/research/deeplab/my_utils/medical/TFRecord'

Tensorboard

  1. python /home/neal/Python/anaconda/envs/tf115/lib/python3.7/site-packages/tensorboard/main.py \
  2. --logdir="/media/neal/neal/deeplab/models/research/deeplab/my_utils/eval"
  3. python /home/neal/Python/anaconda/envs/tf115/lib/python3.7/site-packages/tensorboard/main.py \
  4. --logdir="/media/neal/neal/deeplab/models/research/deeplab/my_utils/result"
  5. http://neal:6006/

制作数据集deeplab的过程,deeplab训练自己的数据集链接1,链接2