# @Time : 2019.10.18# @Author : Bohrium.Kwong# @Licence : bio-totemimport osimport shutilimport lxml.etree as ETfrom PIL import ImageDraw, Imageimport numpy as npimport copyimport cv2import globimport openslide_utilsimport syssys.path.append('../')from openslide_utils import Slide# from xml_generate import get_contoursimport matplotlib.pyplot as pltcurrent_path = os.path.dirname(__file__)def dist(a, b): return round(abs(a[0]-b[0]) + abs(a[1]-b[1]))def floatrange(start,end,step): floatrange_list = [] for i in range(int((end-start)/step)): floatrange_list.append(round(start+step*i,1)) return floatrange_list# xml_ori_path = 'where xml files save'def xml_change_color(xml_ori_path): for xml in glob.glob(xml_ori_path+'/*.xml*'): #print(xml) tree = ET.parse(xml) for a in tree.findall('.//Annotation'): # print(a.attrib['LineColor']) a.attrib['LineColor'] = '65535' tree.write(xml,pretty_print=True)##解析xml文件获得坐标点def xml_to_region(xml_file): """ parse XML label file and get the points :param xml_file: xml file :return: region list,region_class """ tree = ET.parse(xml_file) region_list = [] region_class = [] for color in tree.findall('.//Annotation'): if color.attrib['LineColor'] in ['65280']: #zheli shi biaoqian zhong de biaozhu xian tiao de yanse,'65280' # '65280'是绿色,'255'是红色,可以根据自己的实际情况更改这个判断条件(或者直接if True) for region in color.findall('./Regions/Region'): vertex_list = [] #region.attrib.get('Type')=='0': region_class.append(region.attrib.get('Type')) #region_class:leibie dui ying de suo yin for vertex in region.findall('.//Vertices/Vertex'): # parse the 'X' and 'Y' for the vertex vertex_list.append(vertex.attrib) region_list.append(vertex_list) return region_list,region_class##解析xml文件获得坐标点def xml_to_region_pred(xml_file): """ parse XML label file and get the points :param xml_file: xml file :return: region list,region_class """ tree = ET.parse(xml_file) region_list = [] region_class = [] for color in tree.findall('.//Annotation'): if color.attrib['LineColor'] in ['255']: #zheli shi biaoqian zhong de biaozhu xian tiao de yanse,'65280' # '65280'是绿色,'255'是红色,可以根据自己的实际情况更改这个判断条件(或者直接if True) for region in color.findall('./Regions/Region'): vertex_list = [] #region.attrib.get('Type')=='0': region_class.append(region.attrib.get('Type')) #region_class:leibie dui ying de suo yin for vertex in region.findall('.//Vertices/Vertex'): # parse the 'X' and 'Y' for the vertex vertex_list.append(vertex.attrib) region_list.append(vertex_list) return region_list,region_class##将标签点(离散)加工成连续点,并将其绘制到原图上def region_handler(im, region_list,region_class, level_downsample): """ handle region label point to discrete point, and draw the region point to line :param im: the image painted in region line :param region_list: region list, region point, eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]] :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list eg : [0,0,0,1,2,3] :param level_downsample: slide level down sample :return: image painted in region line of numpy array format """ dr = ImageDraw.Draw(im) for r_class, region in enumerate(region_list): point_list = [] if region_class[r_class] == '0' or region_class[r_class] == '3': for __, point in enumerate(region): X, Y = int(float(point['X'])/level_downsample), int(float(point['Y'])/level_downsample) point_list.append((X, Y))# points_length = len(point_list) x_max = max(point_list, key=lambda point: point[0])[0] x_min = min(point_list, key=lambda point: point[0])[0] y_max = max(point_list, key=lambda point: point[1])[1] y_min = min(point_list, key=lambda point: point[1])[1] # mislabeled, here checked by x and y coordinate max and min difference if (x_max - x_min < 50) or (y_max - y_min < 50): continue if region_class[r_class] == '3': dr.arc(point_list, 0, 360, fill='#00FF00', width=10) ##huizhi de yanse else: dr.line(point_list, fill="#00FF00", width=10) ##huizhi de yanse return im##zhe shi wei le zai yuantu shang tongshi shengcheng biaoqian he yuce xiantiao shiyong de(fei bi xu)def region_handler_pred(im, region_list, region_class, level_downsample): """ handle region label point to discrete point, and draw the region point to line :param im: the image painted in region line :param region_list: region list, region point, eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]] :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list eg : [0,0,0,1,2,3] :param level_downsample: slide level down sample :return: image painted in region line of numpy array format """ dr = ImageDraw.Draw(im) for r_class, region in enumerate(region_list): point_list = [] if region_class[r_class] == '0' or region_class[r_class] == '3': for __, point in enumerate(region): X, Y = int(float(point['X']) / level_downsample), int(float(point['Y']) / level_downsample) point_list.append((X, Y)) # points_length = len(point_list) x_max = max(point_list, key=lambda point: point[0])[0] x_min = min(point_list, key=lambda point: point[0])[0] y_max = max(point_list, key=lambda point: point[1])[1] y_min = min(point_list, key=lambda point: point[1])[1] # mislabeled, here checked by x and y coordinate max and min difference if (x_max - x_min < 50) or (y_max - y_min < 50): continue if region_class[r_class] == '3': dr.arc(point_list, 0, 360, fill='#0000FF', width=0) ##huizhi de yanse else: dr.line(point_list, fill="#0000FF", width=10) ##huizhi de yanse return imdef region_binary_image(tile, region_list, region_class, level_downsample): """ convert the region labeled or not by doctor to binary image :param tile: a return image based on the method of Slide class object in 'utils.openslide_utils' :param region_list: region list, region point, eg : [[{'X': '27381.168113', 'Y': '37358.653791'}], [{'X': '27381.168113', 'Y': '37358.653791'}]] :param region_class : list,keep the value of region.attrib.get('Type') in elements of region list eg : [0,0,0,1,2,3] :param level_downsample: slide level down sample :return: image painted in region line of numpy array format """ im = Image.new(mode="1", size=tile.size) dr = ImageDraw.Draw(im) regions_list = [] for r_class, region in enumerate(region_list): point_list = [] if region_class[r_class] == '0': for __, point in enumerate(region): X, Y = int(float(point['X']) / level_downsample), int(float(point['Y']) / level_downsample) point_list.append((X, Y)) regions_list.append(point_list) # 由于医生的标注会出现不连续(非闭合)的情况,导致提取出来的标注坐标列表,会分成多段,比如: # 0 (1979, 798) (2144, 1479) # 1 (2139, 1483) (2319, 2162) # 2 (2308, 2160) (3003, 1646) # 正常情况下,标注坐标列表应该收尾闭合(前后坐标一致),上下列表之间差异应该较大,如: # 12 (1177, 2986) (1177, 2986) # 13 (1507, 2942) (1507, 2940) # 针对上述第一种情况,需要对提出出来的标注坐标列表进行循环判断,对收尾不一而且和其他标注列表的首坐标相对较近的话,进行合并处理 pin_jie_flag = [] # 存储已经被拼接过的标注坐标列表序号 single_list = [] # 存储新标注坐标列表的列表 for j, p_list in enumerate(regions_list): if dist(p_list[0], p_list[-1]) < 50 and j not in pin_jie_flag: # 如果收尾坐标距离相差在150范围内(曼哈顿距离),且未成被拼接过,直接认为这个组坐标无须拼接,存储起来 single_list.append(p_list) elif dist(p_list[0], p_list[-1]) > 50 and j not in pin_jie_flag: # 如果收尾坐标距离相差在150范围外(曼哈顿距离),且未成被拼接过,说明这组坐标是残缺非闭合的,需要对其余标注坐标进行新一轮的循环判断 for j_2, p_list_2 in enumerate(regions_list): if j_2 != j and j_2 not in pin_jie_flag: if dist(p_list[-1], p_list_2[0]) < 50: p_list = p_list + p_list_2.copy() pin_jie_flag.append(j_2) elif dist(p_list[0], p_list_2[-1]) < 50: p_list = p_list_2.copy() + p_list pin_jie_flag.append(j_2) elif dist(p_list[-1], p_list_2[-1]) < 50: p_list_2_new = copy.deepcopy(p_list_2) p_list_2_new.reverse() p_list = p_list + p_list_2_new pin_jie_flag.append(j_2) elif dist(p_list[0], p_list_2[0]) < 50: p_list_2_new = copy.deepcopy(p_list_2) p_list_2_new.reverse() p_list = p_list_2_new + p_list pin_jie_flag.append(j_2) # 当这组非闭合的尾坐标和其他组坐标的首坐标接近到一定范围时(距离是150内),就让当前的非闭合的坐标列表和该组坐标列表相加 # 处理完毕之后,将该组坐标的序号增加到已拼接坐标的列表中,确保后续循环不会再判断这个列表 single_list.append(p_list) for points in single_list: dr.polygon(points, fill="#ffffff") # 由于医生的标注除了出现不连续(非闭合)的情况外,还存在多余勾画的情况,对这种情况暂时没有完整的思路予以接近,先用 # opencv中的开闭操作组合来进行修补 # kernel size depends on slide size,original is (20,20) kernel = np.ones((3, 3), np.uint8) filter_matrix = np.array(im).astype(np.uint8) filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_OPEN, kernel) filter_matrix = cv2.morphologyEx(filter_matrix, cv2.MORPH_CLOSE, kernel) # plt.imshow(filter_matrix) return filter_matrixdef change_xml_line_color(xml_ori_path): for xml in glob.glob(xml_ori_path+'/*.xml*'): #print(xml) tree = ET.parse(xml) for a in tree.findall('.//Annotation'): # print(a.attrib['LineColor']) a.attrib['LineColor'] = '65535' tree.write(xml,pretty_print=True)def save_label_and_pred_pic(svs_path,model_mode,xml_path_label,xml_path_pred,img_name,threshold=0): slide = Slide(svs_path) tile = slide.get_thumb() # 获取2级采样下的全片截图 tile_copy = tile.copy() assert os.path.split(xml_path_label)[-1] == os.path.split(xml_path_pred)[-1] region_list_label,region_class_label = xml_to_region(xml_path_label) region_list_pred, region_class_pred = xml_to_region_pred(xml_path_pred) # #在这里使用xml_utils的方法进行指定区域提取(最终返回的是个True False矩阵) #region_process_mask = region_binary_image(tile, region_list_label, region_class_label,slide.get_level_downsample()) # # 根据上述返回的标注坐标列表生成WSI原图2级采样大小的True False矩阵 ##zhuyi: zheli shi like huizhi shangqu ,suoyi yao yong yici zhu shi yici region_xml_label_draw = region_handler(tile, region_list_label, region_class_label,slide.get_level_downsample()) # sheng cheng label huizhi de tu plt.figure(figsize=(12.4,9.6)) plt.imshow(region_xml_label_draw) plt.axis('off') plt.margins(0, 0) # plt.savefig(r'/media/totem_disk/totem/guozunhu/Project/(wen tai)projectC_cc/backup/(ds:12\18\24)/MainNet10/min_loss_model/paip/train/validation_in_train/label_pic/{}_{}_label_green.png'.format(img_name,model_mode)) plt.savefig('/media/totem_disk/totem/guozunhu/Project/new_hooknet/test_ndpi_out_he_hn_cc_ce_train/ori_label/{}'.format(img_name)) plt.close() # region_xml_pred_draw = region_handler_pred(tile_copy, region_list_pred, region_class_pred,slide.get_level_downsample()) # #sheng cheng pred huizhi de tu # plt.imshow(region_xml_pred_draw) # plt.axis('off') # plt.margins(0, 0) # # plt.savefig(r'/media/totem_disk/totem/guozunhu/Project/(wen tai)projectC_cc/backup/ds:4\8\12/MainNet10/min_loss/epoch:600/pred_pic/label_and_pred/{}_{}.png'.format(img_name, model_mode)) # # if not os.path.exists(r':600/validation_set/first_12_next_4/pred_xml_and_mask/arclength<800/{}/prec_pic'.format(threshold)): # # os.makedirs(r'/media/totem_disk/totem/guozunhu/Project/(wen tai)projectC_cc/backup/ds:4\8\12/MainNet10/halfway_hardsample/min_loss/epoch:600/validation_set/first_12_next_4/pred_xml_and_mask/arclength<800/{}/prec_pic'.format(threshold), exist_ok=True) # plt.savefig('/media/totem_disk/totem/guozunhu/Project/lp/test_ndpi_out_he_cc/{}_pred'.format(img_name)) # plt.close() # region_xml_pred_draw = region_handler_pred(tile, region_list_pred, region_class_pred,slide.get_level_downsample()) # #sheng cheng pred huizhi de tu # plt.imshow(region_xml_pred_draw) # # plt.axis('off') # plt.margins(0, 0) # plt.savefig('/media/totem_disk/totem/guozunhu/Project/lp/cache_ndpi_he_cc/hard_sample/ori_xml+hard_sample/pic/{}.png'.format(img_name)) # plt.close()class Region: """" handle the template xml format file to insert label svs region """ def __init__(self, xml_file): parser = ET.XMLParser(remove_blank_text=True) if not os.path.isfile(xml_file): template = os.path.join(current_path, "template.xml") shutil.copy(template, xml_file) self._xml_file = xml_file self._tree = ET.parse(xml_file, parser) def get_region(self, region_id): """ get region by region id :param region_id: region id, 0: green, 1: yellow, 2: red, see the template.xml :return: the region """ return self._tree.findall(".//Annotation/Regions")[region_id] def add(self, region_id, points): """ add one region to the specified region by region id, the added region is ellipse and the parameter points is a rectangle bounded by an ellipse :param points: list with two element(upper-left, bottom-right), is the rectangle bounded by an ellipse :return: """ region = self.get_region(region_id) region_num = len(region.findall(".//Region")) region_attr = { "Id": str(region_num+1), "Type": "2", "Zoom": "1", "Selected": "1", "ImageLocation": "", "ImageFocus": "0", "Length": "80", "Area": "400", "LengthMicrons": "20", "AreaMicrons": "30", "Text": "", "NegativeROA": "0", "InputRegionId": "0", "Analyze": "0", "DisplayId": "1" } region_tag = ET.Element("Region", region_attr) region.append(region_tag) attributes = ET.SubElement(region_tag, "Attributes") vertices = ET.Element("Vertices") region_tag.append(vertices) for point in points: # insert point ET.SubElement(vertices, "Vertex", attrib=point) def save(self): """ save the xml file :return: """ self._tree.write(self._xml_file, pretty_print=True)if __name__ == '__main__': if False: model_mode = 'min_loss' assert model_mode in ['min_loss','best_f1','last'] svs_path_root = '/media/totem_disk/totem/guozunhu/Project/new_hooknet/data_cc/valid' svs_path_list = glob.glob(svs_path_root+'/*.svs') # svs_path_list = sorted(glob.glob(svs_path_root+'/*.svs')) xml_path_label_root = r'/media/totem_disk/totem/guozunhu/Project/new_hooknet/data_cc/valid' xml_path_pred_root = r'/media/totem_disk/totem/guozunhu/Project/new_hooknet/data_cc/valid' for svs_path in svs_path_list: # xml_path_label = '/media/totem_disk/totem/guozunhu/Project/lp/cache_ndpi_he_cc/hard_sample/ori_xml+hard_sample' # xml_path_pred = '/media/totem_disk/totem/guozunhu/Project/lp/cache_ndpi_he_cc/hard_sample/ori_xml+hard_sample' img_name = os.path.split(svs_path)[-1].split('.svs')[0] # if 'neg' in img_name: # continue xml_path_label = glob.glob(xml_path_label_root+'/*{}.xml'.format(img_name))[0] xml_path_pred = glob.glob(xml_path_pred_root+'/*{}.xml'.format(img_name))[0] assert os.path.split(xml_path_label)[-1] == os.path.split(xml_path_pred)[-1] save_label_and_pred_pic(svs_path,model_mode,xml_path_label,xml_path_pred,img_name) print("Finished:{}".format(os.path.split(svs_path)[-1])) if True: import yaml model_mode = 'min_loss' for i in range(0,10): th = i / 10 mean_dice = {'mean_dice':0} low_dice_list = [] all_img_path = glob.glob('/media/totem_disk/totem/guozunhu/Project/new_hooknet/test_ndpi_out_he_hn_cc_ce_train/ds:12 20_new_dataset/merge_mask/{}/*.png'.format(th)) for img_path in all_img_path: img_name = os.path.splitext(img_path)[0].split('/')[-1] # svs_path = '/media/totem_disk/totem/guozunhu/Project/lp/all_svs/first_batch/all/{}.svs'.format(img_name) # xml_path_label = '/media/totem_disk/totem/guozunhu/Project/lp/all_svs/first_batch/all/{}.xml'.format(img_name) # region_list, region_class = xml_to_region(xml_path_label) # slide = Slide(svs_path) # tile = slide.get_thumb() # level_downsample = slide.get_level_downsample() # label = region_binary_image(tile,region_list,region_class,level_downsample) # label = np.where(label < 0.5, 0, label) # label = np.where(label >= 0.5, 1, label) # cv2.imwrite('/media/totem_disk/totem/guozunhu/Project/lp_2/cache_ndpi_he_cc/hot_pic/true_mask/{}_label.png'.format(img_name),label*255) # print(img_name) if 'neg' in img_name: continue label = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp/backup/before/all/true_mask/{}_label.png'.format(img_name),0) label = np.where(label < 127.5, 0, label) label = np.where(label >= 127.5, 1, label) # todo:分别计算不同ds倍率下热图的dice all_ds = [2,4,6] for ds in all_ds: out = cv2.imread('/media/totem_disk/totem/guozunhu/Project/temp_3/cache_ndpi_he_cc/M10:4_8_12(最大像素值)/hot_pic/{}_{}.png'.format(img_name,ds),0) out_2 = out.copy() out_2 = np.where(out_2 < 128,0,out_2) out_2 = np.where(out_2 >= 128,1,out_2) tp = (out_2 * label).sum(dtype=np.int) fp = out_2.sum(dtype=np.int) - tp label_p_num = label.sum(dtype=np.int) dice = 2 * tp / (label_p_num + tp + fp + 1e-8) out = cv2.cvtColor(out, cv2.COLOR_GRAY2BGR) cv2.putText(out, 'dice:{}'.format(dice), (200, 600), cv2.FONT_HERSHEY_SIMPLEX, 20,(255, 255,255), 25, cv2.LINE_AA) cv2.imwrite('/media/totem_disk/totem/guozunhu/Project/temp_3/cache_ndpi_he_cc/M10:4_8_12(最大像素值)/hot_pic/dice/{}_{}.png'.format(img_name,ds),out) # todo:计算merge_mask de dice out = cv2.imread('/media/totem_disk/totem/guozunhu/Project/lp_3_u2net/predict/merge_mask/{}/{}.png'.format(th,img_name),0) out_2 = out.copy() out_2 = np.where(out_2 < 128,0,out_2) out_2 = np.where(out_2 >= 128,1,out_2) tp = (out_2 * label).sum(dtype=np.int) fp = out_2.sum(dtype=np.int) - tp label_p_num = label.sum(dtype=np.int) dice = 2 * tp / (label_p_num + tp + fp + 1e-8) # if dice < 0.7: # low_dice_list.append(img_name) # # mean_dice['mean_dice'] += (dice / len(all_img_path)).item() # mean_dice['mean_dice'] += (dice / 28).item() # # out = cv2.cvtColor(out, cv2.COLOR_GRAY2BGR) # cv2.putText(out, 'dice:{}'.format(dice), (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 8,(255, 255,255), 10, cv2.LINE_AA) # save_root_path = '/media/totem_disk/totem/guozunhu/Project/lp_3_u2net/predict/dice' + '/' + str(th) # if not os.path.exists(save_root_path): # os.mkdir(save_root_path) # save_path = save_root_path + '/' + img_name + '.png' # cv2.imwrite(save_path,out) # mean_dice['low_dice_list'] = low_dice_list # yaml.safe_dump(mean_dice,open('/media/totem_disk/totem/guozunhu/Project/lp_3_u2net/predict/dice/{}/mean_dice.yml'.format(th),'w')) if False: import yaml mean_dice = {'mean_dice':0} low_dice_list = [] all_img_path = glob.glob('/media/totem_disk/totem/guozunhu/Project/new_hooknet/test_ndpi_out_he_hn_cc_ce_train/ds:12 20_valid/*_hm_m.png') for img_path in all_img_path: img_name = os.path.splitext(img_path)[0].split('/')[-1].split('_hm_m')[0] # if 'neg' in img_name: # continue svs_path = '/media/totem_disk/totem/guozunhu/Project/new_hooknet/data_cc/valid/{}.svs'.format(img_name) xml_path_label = '/media/totem_disk/totem/guozunhu/Project/new_hooknet/data_cc/valid/{}.xml'.format(img_name) region_list, region_class = xml_to_region(xml_path_label) slide = Slide(svs_path) tile = slide.get_thumb() level_downsample = slide.get_level_downsample() label = region_binary_image(tile,region_list,region_class,level_downsample) label = np.where(label < 0.5, 0, label) label = np.where(label >= 0.5, 1, label) print(img_name) pred = cv2.imread(img_path,0) pred = cv2.resize(pred,(label.shape[1],label.shape[0])) pred = np.where(pred < 128,0,pred) pred = np.where(pred >= 128,1,pred) tp = (pred * label).sum(dtype=np.int) fp = pred.sum(dtype=np.int) - tp label_p_num = label.sum(dtype=np.int) dice = 2 * tp / (label_p_num + tp + fp + 1e-8) if dice < 0.9: low_dice_list.append(img_name) mean_dice['mean_dice'] += (dice / len(all_img_path)).item() mean_dice['low_dice_list'] = low_dice_list yaml.safe_dump(mean_dice,open('/media/totem_disk/totem/guozunhu/Project/new_hooknet/test_ndpi_out_he_hn_cc_ce_train/ds:12 20_valid/dice/mean_dice.yml','w')) # xml_path_label = '/media/totem_disk/totem/guozunhu/Project/cc/xml_check/cc/he/pred_xml/634671.xml' # region_list, region_class = xml_to_region(xml_path_label) # slide = Slide('/media/totem_disk/totem/guozunhu/Project/lp_3_u2net/backup/valid/634671.svs') # tile = slide.get_thumb() # level_downsample = slide.get_level_downsample() # label = region_binary_image(tile,region_list,region_class,level_downsample) # label = np.where(label < 0.5, 0, label) # label = np.where(label >= 0.5, 1, label) # cv2.imwrite('/media/totem_disk/totem/guozunhu/Project/cc/xml_check/cc/he/pred_xml/634671.png',label*255) # print(img_name) # if True: # cache_paths = glob.glob('/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/cache_ndpi_he_cc/*_cache') # for cache_path in cache_paths: # svs_name = os.path.splitext(cache_path)[0].split('/')[-1].split('_cache')[0] # svs_path = '/media/totem_disk/totem/Data_20201014/LABIAL_SALIVARY_GLAND_BIOPSY/{}.svs'.format(svs_name) # slide = Slide(svs_path) # tile = slide.get_thumb() # save_path = '/media/totem_disk/totem/guozunhu/Project/lp_4_hooknet/xml_out/ori/{}.png'.format(svs_name) # if os.path.exists(save_path): # continue # tile.save(save_path) # print(svs_name)