1. import os
    2. import shutil
    3. import lxml.etree as ET
    4. from PIL import ImageDraw, Image
    5. import numpy as np
    6. import copy
    7. import cv2
    8. current_path = os.path.dirname(__file__)
    9. def dist(a, b):
    10. return round(abs(a[0]-b[0]) + abs(a[1]-b[1]))
    11. def del_contours(contours, delete_list):
    12. delta = 0
    13. for i in range(len(delete_list)):
    14. del contours[delete_list[i] - delta]
    15. delta = delta + 1
    16. return contours
    17. def xml_to_region(xml_file):
    18. """
    19. parse XML label file and get the points
    20. :param xml_file: xml file
    21. :return: region list,region_class
    22. """
    23. tree = ET.parse(xml_file)
    24. region_list = []
    25. region_class = []
    26. for color in tree.findall('.//Annotation'):
    27. if color.attrib['LineColor'] in ['65280','255','65535']:
    28. # '65280'是绿色,'255'是红色,可以根据自己的实际情况更改这个判断条件(或者直接if True)
    29. for region in color.findall('./Regions/Region'):
    30. vertex_list = []
    31. #region.attrib.get('Type')=='0':
    32. region_class.append(region.attrib.get('Type'))
    33. for vertex in region.findall('.//Vertices/Vertex'):
    34. # parse the 'X' and 'Y' for the vertex
    35. vertex_list.append(vertex.attrib)
    36. region_list.append(vertex_list)
    37. return region_list,region_class
    38. def region_handler(im, region_list,region_class, level_downsample):
    39. """
    40. handle region label point to discrete point, and draw the region point to line
    41. :param im: the image painted in region line
    42. :param region_list: region list, region point,
    43. eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]]
    44. :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list
    45. eg : [0,0,0,1,2,3]
    46. :param level_downsample: slide level down sample
    47. :return: image painted in region line of numpy array format
    48. """
    49. dr = ImageDraw.Draw(im)
    50. for r_class, region in enumerate(region_list):
    51. point_list = []
    52. if region_class[r_class] == '0' or region_class[r_class] == '3':
    53. for __, point in enumerate(region):
    54. X, Y = int(float(point['X'])/level_downsample), int(float(point['Y'])/level_downsample)
    55. point_list.append((X, Y))
    56. # points_length = len(point_list)
    57. x_max = max(point_list, key=lambda point: point[0])[0]
    58. x_min = min(point_list, key=lambda point: point[0])[0]
    59. y_max = max(point_list, key=lambda point: point[1])[1]
    60. y_min = min(point_list, key=lambda point: point[1])[1]
    61. # mislabeled, here checked by x and y coordinate max and min difference
    62. if (x_max - x_min < 50) or (y_max - y_min < 50): continue
    63. if region_class[r_class] == '3':
    64. dr.arc(point_list, 0, 360, fill='#000000', width=12)
    65. else:
    66. dr.line(point_list, fill="#000000", width=12)
    67. return im
    68. def region_binary_image(tile, region_list,region_class, level_downsample):
    69. """
    70. convert the region labeled or not by doctor to binary image
    71. :param tile: a return image based on the method of Slide class object in 'utils.openslide_utils'
    72. :param region_list: region list, region point,
    73. eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]]
    74. :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list
    75. eg : [0,0,0,1,2,3]
    76. :param level_downsample: slide level down sample
    77. :return: image painted in region line of numpy array format
    78. """
    79. im = Image.new(mode="1", size=tile.size)
    80. dr = ImageDraw.Draw(im)
    81. regions_list = []
    82. for r_class, region in enumerate(region_list):
    83. point_list = []
    84. if region_class[r_class] == '0':
    85. for __, point in enumerate(region):
    86. X, Y = int(float(point['X'])/level_downsample), int(float(point['Y'])/level_downsample)
    87. point_list.append((X, Y))
    88. regions_list.append(point_list)
    89. #由于医生的标注会出现不连续(非闭合)的情况,导致提取出来的标注坐标列表,会分成多段,比如:
    90. # 0 (1979, 798) (2144, 1479)
    91. # 1 (2139, 1483) (2319, 2162)
    92. # 2 (2308, 2160) (3003, 1646)
    93. # 正常情况下,标注坐标列表应该收尾闭合(前后坐标一致),上下列表之间差异应该较大,如:
    94. # 12 (1177, 2986) (1177, 2986)
    95. # 13 (1507, 2942) (1507, 2940)
    96. # 针对上述第一种情况,需要对提出出来的标注坐标列表进行循环判断,对收尾不一而且和其他标注列表的首坐标相对较近的话,进行合并处理
    97. pin_jie_flag = [] #存储已经被拼接过的标注坐标列表序号
    98. single_list = [] #存储新标注坐标列表的列表
    99. for j,p_list in enumerate(regions_list):
    100. if dist(p_list[0], p_list[-1]) < 50 and j not in pin_jie_flag:
    101. #如果收尾坐标距离相差在150范围内(曼哈顿距离),且未成被拼接过,直接认为这个组坐标无须拼接,存储起来
    102. single_list.append(p_list)
    103. elif dist(p_list[0], p_list[-1]) > 50 and j not in pin_jie_flag:
    104. #如果收尾坐标距离相差在150范围外(曼哈顿距离),且未成被拼接过,说明这组坐标是残缺非闭合的,需要对其余标注坐标进行新一轮的循环判断
    105. for j_2,p_list_2 in enumerate(regions_list):
    106. if j_2 != j and j_2 not in pin_jie_flag:
    107. if dist(p_list[-1],p_list_2[0]) < 50 :
    108. p_list = p_list + p_list_2.copy()
    109. pin_jie_flag.append(j_2)
    110. elif dist(p_list[0],p_list_2[-1]) < 50 :
    111. p_list = p_list_2.copy() + p_list
    112. pin_jie_flag.append(j_2)
    113. elif dist(p_list[-1],p_list_2[-1]) < 50 :
    114. p_list_2_new = copy.deepcopy(p_list_2)
    115. p_list_2_new.reverse()
    116. p_list = p_list + p_list_2_new
    117. pin_jie_flag.append(j_2)
    118. elif dist(p_list[0],p_list_2[0]) < 50 :
    119. p_list_2_new = copy.deepcopy(p_list_2)
    120. p_list_2_new.reverse()
    121. p_list = p_list_2_new + p_list
    122. pin_jie_flag.append(j_2)
    123. # 当这组非闭合的尾坐标和其他组坐标的首坐标接近到一定范围时(距离是150内),就让当前的非闭合的坐标列表和该组坐标列表相加
    124. # 处理完毕之后,将该组坐标的序号增加到已拼接坐标的列表中,确保后续循环不会再判断这个列表
    125. single_list.append(p_list)
    126. for points in single_list:
    127. dr.polygon(points, fill="#ffffff")
    128. #由于医生的标注除了出现不连续(非闭合)的情况外,还存在多余勾画的情况,对这种情况暂时没有完整的思路予以接近,先用
    129. # opencv中的开闭操作组合来进行修补
    130. kernel = np.ones((20,20),np.uint8)
    131. filter_matrix = np.array(im).astype(np.uint8)
    132. filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_OPEN, kernel)
    133. filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_CLOSE, kernel)
    134. # plt.imshow(filter_matrix)
    135. return filter_matrix
    136. class Region:
    137. """"
    138. handle the template xml format file to insert label svs region
    139. """
    140. def __init__(self, xml_file):
    141. parser = ET.XMLParser(remove_blank_text=True)
    142. if not os.path.isfile(xml_file):
    143. template = os.path.join(current_path, "template.xml")
    144. shutil.copy(template, xml_file)
    145. self._xml_file = xml_file
    146. self._tree = ET.parse(xml_file, parser)
    147. def get_region(self, region_id):
    148. """
    149. get region by region id
    150. :param region_id: region id, 0: green, 1: yellow, 2: red, see the template.xml
    151. :return: the region
    152. """
    153. return self._tree.findall(".//Annotation/Regions")[region_id]
    154. def add(self, region_id, points):
    155. """
    156. add one region to the specified region by region id, the added region is ellipse
    157. and the parameter points is a rectangle bounded by an ellipse
    158. :param points: list with two element(upper-left, bottom-right), is the rectangle bounded by an ellipse
    159. :return:
    160. """
    161. region = self.get_region(region_id)
    162. region_num = len(region.findall(".//Region"))
    163. region_attr = {
    164. "Id": str(region_num+1),
    165. "Type": "2",
    166. "Zoom": "1",
    167. "Selected": "1",
    168. "ImageLocation": "",
    169. "ImageFocus": "0",
    170. "Length": "80",
    171. "Area": "400",
    172. "LengthMicrons": "20",
    173. "AreaMicrons": "30",
    174. "Text": "",
    175. "NegativeROA": "0",
    176. "InputRegionId": "0",
    177. "Analyze": "0",
    178. "DisplayId": "1"
    179. }
    180. region_tag = ET.Element("Region", region_attr)
    181. region.append(region_tag)
    182. attributes = ET.SubElement(region_tag, "Attributes")
    183. vertices = ET.Element("Vertices")
    184. region_tag.append(vertices)
    185. for point in points:
    186. # insert point
    187. ET.SubElement(vertices, "Vertex", attrib=point)
    188. def save(self):
    189. """
    190. save the xml file
    191. :return:
    192. """
    193. self._tree.write(self._xml_file, pretty_print=True)
    194. def color_int_to_str(color_int):
    195. color_str = hex(color_int)[2:]
    196. assert len(color_str) <= 6, 'Found unknow color!'
    197. pad_count = 6 - len(color_str)
    198. color_str = ''.join(['0'] * pad_count) + color_str
    199. b, g, r = color_str[0:2], color_str[2:4], color_str[4:6]
    200. return r+g+b
    201. def color_str_to_int(color_str):
    202. assert len(color_str) == 6, 'Found unknow color!'
    203. r, g, b = color_str[0:2], color_str[2:4], color_str[4:6]
    204. color_int = (int(r, 16)) + (int(g, 16) << 8) + (int(b, 16) << 16)
    205. return color_int
    206. def contours_to_xml(savepath,contours,mode,if_add = True,level_downsample = 16,mpp= "0.252100",linecolor ="16711680",contour_area_threshold=0):
    207. """
    208. based on a mask of svs file(mask sure the size of the mask equals the size of the svs file 's level_dimensions in level 2) to make a
    209. xml format lable file for this svs file
    210. :param savepath : the xml format lable file save file path
    211. :param contours : contours list return from cv2.findContours of the mask
    212. :param if_add : Added niew Annotation to an exits xml format label file or not ,defaut False
    213. :param level_downsample : the value of slide.level_downsamples[2]
    214. :param mpp : the value of MicronsPerPixel in slide.properties['openslide.mpp-x']
    215. :param linecolor : the value of decimal color code to draw contours in xml format lable file ,default color is blue
    216. :param contour_area_threshold : the threshold to drop small contours base on cv2.contourArea,which helps to keep the big area contours in xml format lable file
    217. :return:
    218. """
    219. ann_begin_tag = 1
    220. Annotations = ET.Element('Annotations', {'MicronsPerPixel': mpp})
    221. origin_color_list = []
    222. if if_add and os.path.exists(savepath):
    223. origin = ET.parse(savepath)
    224. ann_begin_tag = len(origin.findall('.//Annotation')) + 1
    225. for ann in origin.findall('.//Annotation'):
    226. origin_color_list.append(ann.attrib['LineColor'])
    227. Annotations.append(ann)
    228. if mode == 'fp':
    229. linecolor = '65535'
    230. elif mode == 'fn':
    231. linecolor = "65280"
    232. elif mode == 'all':
    233. linecolor = '65280'
    234. # if linecolor in origin_color_list: linecolor = "13382297"
    235. Annotation = ET.SubElement(Annotations, 'Annotation',
    236. {'Id': str(ann_begin_tag), 'Name': '', 'ReadOnly': '0', 'NameReadOnly': '0',
    237. 'LineColorReadOnly': '0', 'Incremental': '0', 'Type': '4',
    238. 'LineColor': linecolor, 'Visible': '1', 'Selected': '1',
    239. 'MarkupImagePath': '', 'MacroName': ''})
    240. Attributes = ET.SubElement(Annotation, 'Attributes')
    241. ET.SubElement(Attributes, 'Attribute', {'Name': '', 'Id': '0', 'Value': ''})
    242. Regions = ET.SubElement(Annotation, 'Regions')
    243. RegionAttributeHeaders = ET.SubElement(Regions, 'RegionAttributeHeaders')
    244. ET.SubElement(RegionAttributeHeaders, 'AttributeHeader',
    245. {'Id': "9999", 'Name': 'Region', 'ColumnWidth': '-1'})
    246. ET.SubElement(RegionAttributeHeaders, 'AttributeHeader',
    247. {'Id': "9997", 'Name': 'Length', 'ColumnWidth': '-1'})
    248. ET.SubElement(RegionAttributeHeaders, 'AttributeHeader',
    249. {'Id': "9996", 'Name': 'Area', 'ColumnWidth': '-1'})
    250. ET.SubElement(RegionAttributeHeaders, 'AttributeHeader',
    251. {'Id': "9998", 'Name': 'Text', 'ColumnWidth': '-1'})
    252. ET.SubElement(RegionAttributeHeaders, 'AttributeHeader',
    253. {'Id': "1", 'Name': 'Description', 'ColumnWidth': '-1'})
    254. i = 1
    255. contour_area_threshold = 10000 / (level_downsample ** 2)
    256. # contour_area_threshold = 0
    257. for cnt in contours:
    258. contour_area = cv2.contourArea(cnt)
    259. if contour_area > contour_area_threshold:
    260. Region = ET.SubElement(Regions, 'Region',
    261. {'Id': str(i), 'Type': '0', 'Zoom': '0.011', 'Selected': '0',
    262. 'ImageLocation': '', 'ImageFocus': '-1', 'Length': str(cnt.shape[0]), 'Area': str(level_downsample**2*contour_area),
    263. 'LengthMicrons': '0', 'AreaMicrons': '0', 'Text': '', 'NegativeROA': '0',
    264. 'InputRegionId': '0', 'Analyze': '1', 'DisplayId': str(i)})
    265. ET.SubElement(Region, 'Attributes')
    266. Vertices = ET.SubElement(Region, 'Vertices')
    267. cnt = np.squeeze(np.asarray(cnt))
    268. for j in range(cnt.shape[0]):
    269. ET.SubElement(Vertices, 'Vertex', {'X': str(int(cnt[j,0]*level_downsample)), 'Y': str(int(cnt[j,1]*level_downsample))})
    270. i = i + 1
    271. ET.SubElement(Annotation, 'Plots')
    272. doc = ET.ElementTree(Annotations)
    273. doc.write(open(savepath, "wb"), pretty_print=True)
    274. if __name__ == '__main__':
    275. import sys,glob,os
    276. sys.path.append('../')
    277. from utils.openslide_utils import Slide
    278. import matplotlib.pyplot as plt
    279. plt.rcParams['figure.figsize'] = 15, 15
    280. # all_svs_path = sorted(glob.glob('/media/totem_disk/totem/Data_20201014/LABIAL_SALIVARY_GLAND_BIOPSY/*.svs'))
    281. all_svs_path = sorted(glob.glob('/media/totem_disk/totem/data_20200903/second_batch/*.svs'))
    282. for svs_path in all_svs_path:
    283. slide = Slide(svs_path)
    284. img_name = os.path.splitext(svs_path)[0].split('/')[-1]
    285. # draw_mask = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp_3_2_u2net/predict/12.18_efficient_b0_s1:d0:bce+lo_so + d6:bce_s2:d0:bce+lo_so/new_dataset_batch2/hotpic/{}_4.png'.format(img_name),0)
    286. draw_mask = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/xml_out/new_dataset_batch2/{}_hm_m.png'.format(img_name),0)
    287. draw_mask = np.where(draw_mask<128,0,draw_mask)
    288. draw_mask = np.where(draw_mask>=128,255,draw_mask)
    289. # kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
    290. # draw_mask = cv2.morphologyEx(draw_mask,cv2.MORPH_CLOSE,kernel)
    291. region_process_mask = draw_mask
    292. if cv2.__version__[0]=='4':
    293. cnts, _ = cv2.findContours(region_process_mask,mode=cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE)
    294. else:
    295. _, cnts , _ = cv2.findContours(region_process_mask,mode=cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE)
    296. # delete_list = []
    297. # delete_show_list = []
    298. # for i in range(len(cnts)):
    299. # if cv2.contourArea(cnts[i]) < 1000:
    300. # # if cv2.contourArea(cnts[i]) < draw_mask.shape[0]*draw_mask.shape[1]*0.01:
    301. # delete_list.append(i)
    302. # delete_show_list.append(cnts[i])
    303. # cnts = del_contours(cnts,delete_list)
    304. # draw_mask_bgr = draw_mask.copy()
    305. # draw_mask_bgr = cv2.cvtColor(draw_mask_bgr,cv2.COLOR_GRAY2BGR)
    306. # cv2.drawContours(draw_mask_bgr,delete_show_list,-1,(0,0,0),-1)
    307. # save_filter_mask = cv2.cvtColor(draw_mask_bgr,cv2.COLOR_BGR2GRAY)
    308. # cv2.imwrite('/media/totem_disk/totem/guozunhu/Project/lp_2/cache_ndpi_he_cc/hot_pic/c_xml/filter/{}.png'.format(img_name),save_filter_mask)
    309. savepath = '/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/xml_out/new_dataset_batch2/xml/{}.xml'.format(img_name)
    310. mpp = str(slide.get_mpp()*1000)
    311. # level_dimension = slide.get_level_dimension(level=0)
    312. level_downsample = slide.get_level_downsample(level=1)
    313. level_downsample = 8
    314. mode = 'all'
    315. # need_cnts_list = []
    316. # for i in range(len(cnts)):
    317. # if cv2.contourArea(cnts[i]) > 10000 / (level_downsample ** 2):
    318. # need_cnts_list.append(cnts[i])
    319. # if img_name in ['S04','S10','S11','S13']:
    320. # level_downsample = level_downsample // 2
    321. contours_to_xml(savepath,cnts,mode,if_add = False,level_downsample=level_downsample)
    322. print(img_name)