import osimport shutilimport lxml.etree as ETfrom PIL import ImageDraw, Imageimport numpy as npimport copyimport cv2current_path = os.path.dirname(__file__)def dist(a, b): return round(abs(a[0]-b[0]) + abs(a[1]-b[1]))def del_contours(contours, delete_list): delta = 0 for i in range(len(delete_list)): del contours[delete_list[i] - delta] delta = delta + 1 return contoursdef xml_to_region(xml_file): """ parse XML label file and get the points :param xml_file: xml file :return: region list,region_class """ tree = ET.parse(xml_file) region_list = [] region_class = [] for color in tree.findall('.//Annotation'): if color.attrib['LineColor'] in ['65280','255','65535']: # '65280'是绿色,'255'是红色,可以根据自己的实际情况更改这个判断条件(或者直接if True) for region in color.findall('./Regions/Region'): vertex_list = [] #region.attrib.get('Type')=='0': region_class.append(region.attrib.get('Type')) for vertex in region.findall('.//Vertices/Vertex'): # parse the 'X' and 'Y' for the vertex vertex_list.append(vertex.attrib) region_list.append(vertex_list) return region_list,region_classdef region_handler(im, region_list,region_class, level_downsample): """ handle region label point to discrete point, and draw the region point to line :param im: the image painted in region line :param region_list: region list, region point, eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]] :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list eg : [0,0,0,1,2,3] :param level_downsample: slide level down sample :return: image painted in region line of numpy array format """ dr = ImageDraw.Draw(im) for r_class, region in enumerate(region_list): point_list = [] if region_class[r_class] == '0' or region_class[r_class] == '3': for __, point in enumerate(region): X, Y = int(float(point['X'])/level_downsample), int(float(point['Y'])/level_downsample) point_list.append((X, Y)) # points_length = len(point_list) x_max = max(point_list, key=lambda point: point[0])[0] x_min = min(point_list, key=lambda point: point[0])[0] y_max = max(point_list, key=lambda point: point[1])[1] y_min = min(point_list, key=lambda point: point[1])[1] # mislabeled, here checked by x and y coordinate max and min difference if (x_max - x_min < 50) or (y_max - y_min < 50): continue if region_class[r_class] == '3': dr.arc(point_list, 0, 360, fill='#000000', width=12) else: dr.line(point_list, fill="#000000", width=12) return imdef region_binary_image(tile, region_list,region_class, level_downsample): """ convert the region labeled or not by doctor to binary image :param tile: a return image based on the method of Slide class object in 'utils.openslide_utils' :param region_list: region list, region point, eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]] :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list eg : [0,0,0,1,2,3] :param level_downsample: slide level down sample :return: image painted in region line of numpy array format """ im = Image.new(mode="1", size=tile.size) dr = ImageDraw.Draw(im) regions_list = [] for r_class, region in enumerate(region_list): point_list = [] if region_class[r_class] == '0': for __, point in enumerate(region): X, Y = int(float(point['X'])/level_downsample), int(float(point['Y'])/level_downsample) point_list.append((X, Y)) regions_list.append(point_list) #由于医生的标注会出现不连续(非闭合)的情况,导致提取出来的标注坐标列表,会分成多段,比如: # 0 (1979, 798) (2144, 1479) # 1 (2139, 1483) (2319, 2162) # 2 (2308, 2160) (3003, 1646) # 正常情况下,标注坐标列表应该收尾闭合(前后坐标一致),上下列表之间差异应该较大,如: # 12 (1177, 2986) (1177, 2986) # 13 (1507, 2942) (1507, 2940) # 针对上述第一种情况,需要对提出出来的标注坐标列表进行循环判断,对收尾不一而且和其他标注列表的首坐标相对较近的话,进行合并处理 pin_jie_flag = [] #存储已经被拼接过的标注坐标列表序号 single_list = [] #存储新标注坐标列表的列表 for j,p_list in enumerate(regions_list): if dist(p_list[0], p_list[-1]) < 50 and j not in pin_jie_flag: #如果收尾坐标距离相差在150范围内(曼哈顿距离),且未成被拼接过,直接认为这个组坐标无须拼接,存储起来 single_list.append(p_list) elif dist(p_list[0], p_list[-1]) > 50 and j not in pin_jie_flag: #如果收尾坐标距离相差在150范围外(曼哈顿距离),且未成被拼接过,说明这组坐标是残缺非闭合的,需要对其余标注坐标进行新一轮的循环判断 for j_2,p_list_2 in enumerate(regions_list): if j_2 != j and j_2 not in pin_jie_flag: if dist(p_list[-1],p_list_2[0]) < 50 : p_list = p_list + p_list_2.copy() pin_jie_flag.append(j_2) elif dist(p_list[0],p_list_2[-1]) < 50 : p_list = p_list_2.copy() + p_list pin_jie_flag.append(j_2) elif dist(p_list[-1],p_list_2[-1]) < 50 : p_list_2_new = copy.deepcopy(p_list_2) p_list_2_new.reverse() p_list = p_list + p_list_2_new pin_jie_flag.append(j_2) elif dist(p_list[0],p_list_2[0]) < 50 : p_list_2_new = copy.deepcopy(p_list_2) p_list_2_new.reverse() p_list = p_list_2_new + p_list pin_jie_flag.append(j_2) # 当这组非闭合的尾坐标和其他组坐标的首坐标接近到一定范围时(距离是150内),就让当前的非闭合的坐标列表和该组坐标列表相加 # 处理完毕之后,将该组坐标的序号增加到已拼接坐标的列表中,确保后续循环不会再判断这个列表 single_list.append(p_list) for points in single_list: dr.polygon(points, fill="#ffffff") #由于医生的标注除了出现不连续(非闭合)的情况外,还存在多余勾画的情况,对这种情况暂时没有完整的思路予以接近,先用 # opencv中的开闭操作组合来进行修补 kernel = np.ones((20,20),np.uint8) filter_matrix = np.array(im).astype(np.uint8) filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_OPEN, kernel) filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_CLOSE, kernel) # plt.imshow(filter_matrix) return filter_matrixclass Region: """" handle the template xml format file to insert label svs region """ def __init__(self, xml_file): parser = ET.XMLParser(remove_blank_text=True) if not os.path.isfile(xml_file): template = os.path.join(current_path, "template.xml") shutil.copy(template, xml_file) self._xml_file = xml_file self._tree = ET.parse(xml_file, parser) def get_region(self, region_id): """ get region by region id :param region_id: region id, 0: green, 1: yellow, 2: red, see the template.xml :return: the region """ return self._tree.findall(".//Annotation/Regions")[region_id] def add(self, region_id, points): """ add one region to the specified region by region id, the added region is ellipse and the parameter points is a rectangle bounded by an ellipse :param points: list with two element(upper-left, bottom-right), is the rectangle bounded by an ellipse :return: """ region = self.get_region(region_id) region_num = len(region.findall(".//Region")) region_attr = { "Id": str(region_num+1), "Type": "2", "Zoom": "1", "Selected": "1", "ImageLocation": "", "ImageFocus": "0", "Length": "80", "Area": "400", "LengthMicrons": "20", "AreaMicrons": "30", "Text": "", "NegativeROA": "0", "InputRegionId": "0", "Analyze": "0", "DisplayId": "1" } region_tag = ET.Element("Region", region_attr) region.append(region_tag) attributes = ET.SubElement(region_tag, "Attributes") vertices = ET.Element("Vertices") region_tag.append(vertices) for point in points: # insert point ET.SubElement(vertices, "Vertex", attrib=point) def save(self): """ save the xml file :return: """ self._tree.write(self._xml_file, pretty_print=True)def color_int_to_str(color_int): color_str = hex(color_int)[2:] assert len(color_str) <= 6, 'Found unknow color!' pad_count = 6 - len(color_str) color_str = ''.join(['0'] * pad_count) + color_str b, g, r = color_str[0:2], color_str[2:4], color_str[4:6] return r+g+bdef color_str_to_int(color_str): assert len(color_str) == 6, 'Found unknow color!' r, g, b = color_str[0:2], color_str[2:4], color_str[4:6] color_int = (int(r, 16)) + (int(g, 16) << 8) + (int(b, 16) << 16) return color_intdef contours_to_xml(savepath,contours,mode,if_add = True,level_downsample = 16,mpp= "0.252100",linecolor ="16711680",contour_area_threshold=0): """ based on a mask of svs file(mask sure the size of the mask equals the size of the svs file 's level_dimensions in level 2) to make a xml format lable file for this svs file :param savepath : the xml format lable file save file path :param contours : contours list return from cv2.findContours of the mask :param if_add : Added niew Annotation to an exits xml format label file or not ,defaut False :param level_downsample : the value of slide.level_downsamples[2] :param mpp : the value of MicronsPerPixel in slide.properties['openslide.mpp-x'] :param linecolor : the value of decimal color code to draw contours in xml format lable file ,default color is blue :param contour_area_threshold : the threshold to drop small contours base on cv2.contourArea,which helps to keep the big area contours in xml format lable file :return: """ ann_begin_tag = 1 Annotations = ET.Element('Annotations', {'MicronsPerPixel': mpp}) origin_color_list = [] if if_add and os.path.exists(savepath): origin = ET.parse(savepath) ann_begin_tag = len(origin.findall('.//Annotation')) + 1 for ann in origin.findall('.//Annotation'): origin_color_list.append(ann.attrib['LineColor']) Annotations.append(ann) if mode == 'fp': linecolor = '65535' elif mode == 'fn': linecolor = "65280" elif mode == 'all': linecolor = '65280' # if linecolor in origin_color_list: linecolor = "13382297" Annotation = ET.SubElement(Annotations, 'Annotation', {'Id': str(ann_begin_tag), 'Name': '', 'ReadOnly': '0', 'NameReadOnly': '0', 'LineColorReadOnly': '0', 'Incremental': '0', 'Type': '4', 'LineColor': linecolor, 'Visible': '1', 'Selected': '1', 'MarkupImagePath': '', 'MacroName': ''}) Attributes = ET.SubElement(Annotation, 'Attributes') ET.SubElement(Attributes, 'Attribute', {'Name': '', 'Id': '0', 'Value': ''}) Regions = ET.SubElement(Annotation, 'Regions') RegionAttributeHeaders = ET.SubElement(Regions, 'RegionAttributeHeaders') ET.SubElement(RegionAttributeHeaders, 'AttributeHeader', {'Id': "9999", 'Name': 'Region', 'ColumnWidth': '-1'}) ET.SubElement(RegionAttributeHeaders, 'AttributeHeader', {'Id': "9997", 'Name': 'Length', 'ColumnWidth': '-1'}) ET.SubElement(RegionAttributeHeaders, 'AttributeHeader', {'Id': "9996", 'Name': 'Area', 'ColumnWidth': '-1'}) ET.SubElement(RegionAttributeHeaders, 'AttributeHeader', {'Id': "9998", 'Name': 'Text', 'ColumnWidth': '-1'}) ET.SubElement(RegionAttributeHeaders, 'AttributeHeader', {'Id': "1", 'Name': 'Description', 'ColumnWidth': '-1'}) i = 1 contour_area_threshold = 10000 / (level_downsample ** 2) # contour_area_threshold = 0 for cnt in contours: contour_area = cv2.contourArea(cnt) if contour_area > contour_area_threshold: Region = ET.SubElement(Regions, 'Region', {'Id': str(i), 'Type': '0', 'Zoom': '0.011', 'Selected': '0', 'ImageLocation': '', 'ImageFocus': '-1', 'Length': str(cnt.shape[0]), 'Area': str(level_downsample**2*contour_area), 'LengthMicrons': '0', 'AreaMicrons': '0', 'Text': '', 'NegativeROA': '0', 'InputRegionId': '0', 'Analyze': '1', 'DisplayId': str(i)}) ET.SubElement(Region, 'Attributes') Vertices = ET.SubElement(Region, 'Vertices') cnt = np.squeeze(np.asarray(cnt)) for j in range(cnt.shape[0]): ET.SubElement(Vertices, 'Vertex', {'X': str(int(cnt[j,0]*level_downsample)), 'Y': str(int(cnt[j,1]*level_downsample))}) i = i + 1 ET.SubElement(Annotation, 'Plots') doc = ET.ElementTree(Annotations) doc.write(open(savepath, "wb"), pretty_print=True)if __name__ == '__main__': import sys,glob,os sys.path.append('../') from utils.openslide_utils import Slide import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = 15, 15 # all_svs_path = sorted(glob.glob('/media/totem_disk/totem/Data_20201014/LABIAL_SALIVARY_GLAND_BIOPSY/*.svs')) all_svs_path = sorted(glob.glob('/media/totem_disk/totem/data_20200903/second_batch/*.svs')) for svs_path in all_svs_path: slide = Slide(svs_path) img_name = os.path.splitext(svs_path)[0].split('/')[-1] # draw_mask = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp_3_2_u2net/predict/12.18_efficient_b0_s1:d0:bce+lo_so + d6:bce_s2:d0:bce+lo_so/new_dataset_batch2/hotpic/{}_4.png'.format(img_name),0) draw_mask = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/xml_out/new_dataset_batch2/{}_hm_m.png'.format(img_name),0) draw_mask = np.where(draw_mask<128,0,draw_mask) draw_mask = np.where(draw_mask>=128,255,draw_mask) # kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) # draw_mask = cv2.morphologyEx(draw_mask,cv2.MORPH_CLOSE,kernel) region_process_mask = draw_mask if cv2.__version__[0]=='4': cnts, _ = cv2.findContours(region_process_mask,mode=cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE) else: _, cnts , _ = cv2.findContours(region_process_mask,mode=cv2.RETR_EXTERNAL,method=cv2.CHAIN_APPROX_SIMPLE) # delete_list = [] # delete_show_list = [] # for i in range(len(cnts)): # if cv2.contourArea(cnts[i]) < 1000: # # if cv2.contourArea(cnts[i]) < draw_mask.shape[0]*draw_mask.shape[1]*0.01: # delete_list.append(i) # delete_show_list.append(cnts[i]) # cnts = del_contours(cnts,delete_list) # draw_mask_bgr = draw_mask.copy() # draw_mask_bgr = cv2.cvtColor(draw_mask_bgr,cv2.COLOR_GRAY2BGR) # cv2.drawContours(draw_mask_bgr,delete_show_list,-1,(0,0,0),-1) # save_filter_mask = cv2.cvtColor(draw_mask_bgr,cv2.COLOR_BGR2GRAY) # cv2.imwrite('/media/totem_disk/totem/guozunhu/Project/lp_2/cache_ndpi_he_cc/hot_pic/c_xml/filter/{}.png'.format(img_name),save_filter_mask) savepath = '/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/xml_out/new_dataset_batch2/xml/{}.xml'.format(img_name) mpp = str(slide.get_mpp()*1000) # level_dimension = slide.get_level_dimension(level=0) level_downsample = slide.get_level_downsample(level=1) level_downsample = 8 mode = 'all' # need_cnts_list = [] # for i in range(len(cnts)): # if cv2.contourArea(cnts[i]) > 10000 / (level_downsample ** 2): # need_cnts_list.append(cnts[i]) # if img_name in ['S04','S10','S11','S13']: # level_downsample = level_downsample // 2 contours_to_xml(savepath,cnts,mode,if_add = False,level_downsample=level_downsample) print(img_name)