清洗数据

import torch
import torch.nn as nn
import os
from torchvision.models.resnet import *
import numpy as np
import matplotlib.pyplot as plt
import time
import cv2
from visualize import visualize_grid_attention_v2
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
        padding = 3 if kernel_size == 7 else 1
        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
def draw_features(width,height,x,savename):
    tic=time.time()
    fig = plt.figure(figsize=(16, 16))
    fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.05, hspace=0.05)
    for i in range(width*height):
        plt.subplot(height,width, i + 1)
        plt.axis('off')
        img = x[0, i, :, :]
        pmin = np.min(img)
        pmax = np.max(img)
        img = ((img - pmin) / (pmax - pmin + 0.000001))*255  #float?[0?1]??????0-255
        img=img.astype(np.uint8)  #??unit8
        img=cv2.applyColorMap(img, cv2.COLORMAP_JET) #??heat map
        img = img[:, :, ::-1]#??cv2?BGR??matplotlib(RGB)??????
        plt.imshow(img)
        print("{}/{}".format(i,width*height))
    fig.savefig(savename, dpi=100)
    fig.clf()
    plt.close()
    print("time:{}".format(time.time()-tic))
make_pic = False
n_classes = 2
class Resnet50_att(nn.Module):
    def __init__(self,wsl_path,savepath=None):
        super(Resnet50_att, self).__init__()
        print("model: resnet50")
        checkpoint = os.path.join(wsl_path, 'resnet50_wsl.pth')
        model = resnet50()
        state_dict = torch.load(checkpoint)
        model.load_state_dict(state_dict)
        model.fc = torch.nn.Linear(2048, n_classes)
        self.model = model
        self.savepath = savepath
        # ?????????????
        # self.ca = ChannelAttention(self.inplanes)
        self.sa_1 = SpatialAttention()
        # ??????????????????
        self.sa_2 = SpatialAttention()
    def forward(self, x):
        origin_img = x
        att1 = 0
        att2 = 0
        #这段代码仅仅只使用了else后面的内容
        if self.savepath:  # draw features or not
            x = self.model.conv1(x)
            draw_features(8, 8, x.cpu().numpy(), "{}/f1_conv1.png".format(self.savepath))
            x = self.model.bn1(x)
            draw_features(8, 8, x.cpu().numpy(), "{}/f2_bn1.png".format(self.savepath))
            x = self.model.relu(x)
            draw_features(8, 8, x.cpu().numpy(), "{}/f3_relu.png".format(self.savepath))
            x = self.model.maxpool(x)
            draw_features(8, 8, x.cpu().numpy(), "{}/f4_maxpool.png".format(self.savepath))
            x = self.model.layer1(x)
            draw_features(16, 16, x.cpu().numpy(), "{}/f5_layer1.png".format(self.savepath))
            x = self.model.layer2(x)
            draw_features(16, 32, x.cpu().numpy(), "{}/f6_layer2.png".format(self.savepath))
            x = self.model.layer3(x)
            draw_features(32, 32, x.cpu().numpy(), "{}/f7_layer3.png".format(self.savepath))
            x = self.model.layer4(x)
            draw_features(32, 32, x.cpu().numpy()[:, 0:1024, :, :], "{}/f8_layer4_1.png".format(self.savepath))
            draw_features(32, 32, x.cpu().numpy()[:, 1024:2048, :, :], "{}/f8_layer4_2.png".format(self.savepath))
            x = self.model.avgpool(x)
            plt.plot(np.linspace(1, 2048, 2048), x.cpu().numpy()[0, :, 0, 0])
            plt.savefig("{}/f9_avgpool.png".format(self.savepath))
            plt.clf()
            plt.close()
            x = x.view(x.size(0), -1)
            x = self.model.fc(x)
            plt.plot(np.linspace(1, 1000, 1000), x.cpu().numpy()[0, :])
            plt.savefig("{}/f10_fc.png".format(self.savepath))
            plt.clf()
            plt.close()
        else:
            x = self.model.conv1(x)
            x = self.model.bn1(x)
            x = self.model.relu(x)
            att1 = self.sa_1(x)
            x = att1 * x
            # x = self.sa_1(x) * x
            x = self.model.maxpool(x)
            x = self.model.layer1(x)
            # att2 = self.sa_2(x)
            # x = att2 * x
            x = self.model.layer2(x)
            x = self.model.layer3(x)
            x = self.model.layer4(x)
            # x = self.sa_2(x) * x
            x = self.model.avgpool(x)
            x = x.view(x.size(0), -1)
            x = self.model.fc(x)
        #return x
        return x,att1
#预训练权重文件的地址
wsl_path=""
model = Resnet50_att(wsl_path=wsl_path)
inputs = ""
outs，att = model(inputs)
att1 = att.detach().cpu().numpy()
save_path_1 = "/media/zhujunjie/dataset/attVision/att1"
for index,image_id in enumerate(img_ids):
    img_path = os.path.join(dir_train_img, image_id + '.jpg')
    visualize_grid_attention_v2(img_path,
                                save_path=save_path_1,
                                attention_mask=att1[index][0],
                                save_image=True,
                                save_original_image=True,
                                quality=100)

清洗数据

    for step, batch in enumerate(valloader):
        if step % 1000 == 0:
            logger.info('Valid step {} of {}'.format(step, len(valloader)))
        inputs = batch["image"]
        inputs = inputs.to(device, dtype=torch.float)
        outputs = model(inputs,meta_inputs)
        sfx = nn.Softmax(dim=-1)(outputs)
        sfx_cpu = sfx.detach().cpu().numpy()
        # sfx = sfx.detach().cpu().numpy()
        # ???0??????????1??????
        predict_per = sfx.argmax(dim=1)
        prob = sfx.max(dim=-1, keepdim=False)[1].detach().cpu().numpy()
        truel = batch['target'].detach().cpu().numpy()
        #筛选出错误分类，p1-p0>0表示将负例分类成正例，反之，就是正例分类成正例
        for i in range(len(predict_per)):
            if predict_per[i] != truel[i]:
                a = {'image_id': batch['image_id'][i], 'p1-p0': sfx_cpu[i][1] - sfx_cpu[i][0]}
                fake_predict = fake_predict.append(a, ignore_index=True)
        prob_all.extend(prob)
        lable_all.extend(truel)
    probability_outfile = os.path.join(WORK_DIR,
                                       'probability/errprob_attUResnet50_ep{}_fold{}.csv'.format(epoch, fold))
    fake_predict.to_csv(probability_outfile, index=False)

将输出的概率文件用excel打开，按照p1-p0列进行排序，选择最大和最小的一部分数据，复制到新的表中，存为csv文件: delImg.csv


# 删除csv中的部分记录

dir_train_img = '/media/zhujunjie/dataset/baMelanoma_data'
path_img = '/media/zhujunjie/dataset/baMelanoma_data/train'
train_csv_path = '/media/zhujunjie/dataset/train.csv'
del_csv_path = '/media/zhujunjie/dataset/delImg.csv'
del_train = pd.read_csv(del_csv_path)
trndf = pd.read_csv(train_csv_path)
print('del_csv_img shape {} {}'.format(*del_train.shape))
print('trn_csv_img shape {} {}'.format(*trndf.shape))
del_pngs = [img_id for img_id in del_train['image_id']]
print('Count of del_pngs : {}'.format(len(del_pngs)))
#删除操作
trndf = trndf.set_index('image_id')
trndf.drop(index=del_pngs,axis=0,inplace=True)
#恢复索引
trndf = trndf.reset_index()
#保存删除了部分数据的新标签文件
print('new_csv_img shape {} {}'.format(*trndf.shape))
trndf.to_csv(os.path.join(dir_train_img,"new_train.csv"), index=False)

深度学习-实验

resnet50 SAM 手动数据清洗

清洗数据