requirement.txt
sklearnpandasefficientnet_pytorch
数据分析
import osimport cv2img_path = "./dataset/train/images/"img_list = [img_path+ i for i in os.listdir(img_path)]shape = []for img in img_list:arr = cv2.imread(img)shape.append(arr.shape)
训练
主要修改模型的结构 class VisitNet(nn.Module)
# -*- coding: utf-8 -*-import os, sys, glob, argparseimport pandas as pdimport numpy as npfrom tqdm import tqdmimport time, datetimeimport pdb, tracebackimport cv2# import imagehashfrom PIL import Imagefrom sklearn.model_selection import train_test_split, StratifiedKFold, KFoldfrom efficientnet_pytorch import EfficientNet# model = EfficientNet.from_pretrained('efficientnet-b4')import torchtorch.manual_seed(0)torch.backends.cudnn.deterministic = Falsetorch.backends.cudnn.benchmark = Trueimport torchvision.models as modelsimport torchvision.transforms as transformsimport torchvision.datasets as datasetsimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimfrom torch.autograd import Variablefrom torch.utils.data.dataset import Dataset# input dataset"""train_set 目录结构├── 0| ├── xx.jpg│ ├── xx.jpg├── 1| ├── xx.jpg│ ├── xx.jpg├── 2| ├── xx.jpg│ ├── xx.jpg├── 3| ├── xx.jpg│ ├── xx.jpg"""train_jpg = glob.glob('./train_dataset/*/*')train_jpg = np.array(train_jpg)print(train_jpg)if os.path.exists("log.txt"):os.remove("log.txt")class QRDataset(Dataset):def __init__(self, img_path, transform=None):self.img_path = img_pathif transform is not None:self.transform = transformelse:self.transform = Nonedef __getitem__(self, index):start_time = time.time()img = Image.open(self.img_path[index]).convert('RGB')if self.transform is not None:img = self.transform(img)return img,torch.from_numpy(np.array(int(self.img_path[index].split("/")[-2])))def __len__(self):return len(self.img_path)def accuracy(output, target, topk=(1,)):"""Computes the accuracy over the k top predictions for the specified values of k"""with torch.no_grad():maxk = max(topk)batch_size = target.size(0)_, pred = output.topk(maxk, 1, True, True)pred = pred.t()correct = pred.eq(target.view(1, -1).expand_as(pred))res = []for k in topk:correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)res.append(correct_k.mul_(100.0 / batch_size))return resclass AverageMeter(object):"""Computes and stores the average and current value"""def __init__(self, name, fmt=':f'):self.name = nameself.fmt = fmtself.reset()def reset(self):self.val = 0self.avg = 0self.sum = 0self.count = 0def update(self, val, n=1):self.val = valself.sum += val * nself.count += nself.avg = self.sum / self.countdef __str__(self):fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'return fmtstr.format(**self.__dict__)class ProgressMeter(object):def __init__(self, num_batches, *meters):self.batch_fmtstr = self._get_batch_fmtstr(num_batches)self.meters = metersself.prefix = ""def pr2int(self, batch):entries = [self.prefix + self.batch_fmtstr.format(batch)]entries += [str(meter) for meter in self.meters]print('\t'.join(entries))with open("log.txt", "a+") as f:f.write('\t'.join(entries)+"\n")def _get_batch_fmtstr(self, num_batches):num_digits = len(str(num_batches // 1))fmt = '{:' + str(num_digits) + 'd}'return '[' + fmt + '/' + fmt.format(num_batches) + ']'class VisitNet(nn.Module):def __init__(self):super(VisitNet, self).__init__()model = models.resnet18(True)model.avgpool = nn.AdaptiveAvgPool2d(1)model.fc = nn.Linear(512, 4)self.resnet = model# model = EfficientNet.from_pretrained('efficientnet-b4')# model._fc = nn.Linear(1792, 4)# self.resnet = modeldef forward(self, img):out = self.resnet(img)return outdef validate(val_loader, model, criterion):batch_time = AverageMeter('Time', ':6.3f')losses = AverageMeter('Loss', ':.4e')top1 = AverageMeter('Acc@1', ':6.2f')top5 = AverageMeter('Acc@2', ':6.2f')progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5)# switch to evaluate modemodel.eval()with torch.no_grad():end = time.time()for i, (input, target) in enumerate(val_loader):input = input.cuda()target = target.cuda()# compute outputoutput = model(input)loss = criterion(output, target)# measure accuracy and record lossacc1, acc5 = accuracy(output, target, topk=(1, 2))losses.update(loss.item(), input.size(0))top1.update(acc1[0], input.size(0))top5.update(acc5[0], input.size(0))# measure elapsed timebatch_time.update(time.time() - end)end = time.time()# TODO: this should also be done with the ProgressMeterprint(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))with open("log.txt", "a+") as f:f.write(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)+"\n")return top1def predict(test_loader, model, tta=10):# switch to evaluate modemodel.eval()test_pred_tta = Nonefor _ in range(tta):test_pred = []with torch.no_grad():end = time.time()for i, (input, target) in enumerate(test_loader):input = input.cuda()target = target.cuda()# compute outputoutput = model(input, path)output = output.data.cpu().numpy()test_pred.append(output)test_pred = np.vstack(test_pred)if test_pred_tta is None:test_pred_tta = test_predelse:test_pred_tta += test_predreturn test_pred_ttadef train(train_loader, model, criterion, optimizer, epoch):batch_time = AverageMeter('Time', ':6.3f')# data_time = AverageMeter('Data', ':6.3f')losses = AverageMeter('Loss', ':.4e')top1 = AverageMeter('Acc@1', ':6.2f')# top5 = AverageMeter('Acc@5', ':6.2f')progress = ProgressMeter(len(train_loader), batch_time, losses, top1)# switch to train modemodel.train()end = time.time()for i, (input, target) in enumerate(train_loader):input = input.cuda(non_blocking=True)target = target.cuda(non_blocking=True)# compute outputoutput = model(input)loss = criterion(output, target)# measure accuracy and record lossacc1, acc5 = accuracy(output, target, topk=(1, 2))losses.update(loss.item(), input.size(0))top1.update(acc1[0], input.size(0))# top5.update(acc5[0], input.size(0))# compute gradient and do SGD stepoptimizer.zero_grad()loss.backward()optimizer.step()# measure elapsed timebatch_time.update(time.time() - end)end = time.time()if i % 100 == 0:progress.pr2int(i)skf = KFold(n_splits=2, random_state=233, shuffle=True)for flod_idx, (train_idx, val_idx) in enumerate(skf.split(train_jpg, train_jpg)):print(flod_idx, train_idx, val_idx)train_loader = torch.utils.data.DataLoader(QRDataset(train_jpg[train_idx],transforms.Compose([# transforms.RandomGrayscale(),transforms.Resize((512, 512)),transforms.RandomAffine(10),transforms.ColorJitter(hue=.05, saturation=.05),transforms.RandomCrop((224, 224)),transforms.RandomHorizontalFlip(),transforms.RandomVerticalFlip(),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])), batch_size=2, shuffle=True, num_workers=8, pin_memory=True)val_loader = torch.utils.data.DataLoader(QRDataset(train_jpg[val_idx],transforms.Compose([transforms.Resize((512, 512)),# transforms.Resize((124, 124)),# transforms.RandomCrop((88, 88)),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])), batch_size=1, shuffle=False, num_workers=8, pin_memory=True)model = VisitNet().cuda()# model = nn.DataParallel(model).cuda()criterion = nn.CrossEntropyLoss().cuda()optimizer = torch.optim.Adam(model.parameters())scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.85)best_acc = 0.0for epoch in range(120):scheduler.step()print('Epoch: ', epoch)with open("log.txt", "a+") as f:f.write('Epoch: '+str(epoch)+"\n")train(train_loader, model, criterion, optimizer, epoch)val_acc = validate(val_loader, model, criterion)if val_acc.avg.item() > best_acc:best_acc = val_acc.avg.item()torch.save(model.state_dict(), './efficientnet{0}.pt'.format(flod_idx))
预测
# -*- coding: utf-8 -*-
import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm
import time, datetime
import pdb, traceback
import cv2
# import imagehash
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from efficientnet_pytorch import EfficientNet
# model = EfficientNet.from_pretrained('efficientnet-b4')
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
class QRDataset(Dataset):
def __init__(self, img_path, transform=None):
self.img_path = img_path
if transform is not None:
self.transform = transform
else:
self.transform = None
def __getitem__(self, index):
start_time = time.time()
img = Image.open(self.img_path[index]).convert('RGB')
if self.transform is not None:
img = self.transform(img)
return img, torch.from_numpy(np.array([1]))#,torch.from_numpy(np.array(int(self.img_path[index].split("/")[-2])))
def __len__(self):
return len(self.img_path)
class VisitNet(nn.Module):
def __init__(self):
super(VisitNet, self).__init__()
# model = models.resnet18(True)
# model.avgpool = nn.AdaptiveAvgPool2d(1)
# model.fc = nn.Linear(512, 4)
# self.resnet = model
model = EfficientNet.from_pretrained('efficientnet-b4')
model._fc = nn.Linear(1792, 4)
self.resnet = model
def forward(self, img):
out = self.resnet(img)
return out
def predict(test_loader, model, tta=10):
# switch to evaluate mode
model.eval()
test_pred_tta = None
for _ in range(tta):
test_pred = []
with torch.no_grad():
end = time.time()
for i, (input, target) in enumerate(test_loader):
input = input.cuda()
target = target.cuda()
# compute output
output = model(input)
output = output.data.cpu().numpy()
test_pred.append(output)
test_pred = np.vstack(test_pred)
if test_pred_tta is None:
test_pred_tta = test_pred
else:
test_pred_tta += test_pred
return test_pred_tta
upload_csv_file = "./dataset/test/upload.csv"
result = pd.read_csv(upload_csv_file)
test_jpg = list(result.iloc[:, 0].values)
test_jpg = list(map(lambda x:"./dataset/test/"+x, test_jpg))
test_jpg = np.array(test_jpg)
test_pred = None
for model_path in ["efficientnet0.pt", "efficientnet1.pt"]:#['resnet18_fold{}.pt'.format(i) for i in [0,2,3,4,6]]:
print(model_path)
test_loader = torch.utils.data.DataLoader(
QRDataset(test_jpg,
transforms.Compose([
transforms.Resize((224, 224)),
# transforms.RandomHorizontalFlip(),
# transforms.RandomVerticalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
), batch_size=64, shuffle=False, num_workers=16, pin_memory=True
)
model = VisitNet().cuda()
model.load_state_dict(torch.load(model_path))
# model = nn.DataParallel(model).cuda()
if test_pred is None:
test_pred = predict(test_loader, model, 5)
else:
test_pred += predict(test_loader, model, 5)
test_csv = pd.DataFrame()
test_csv[0] = list(result.iloc[:, 0].values)
test_csv[1] = np.argmax(test_pred, 1)
test_csv.to_csv('tmp.csv', index=None, header=None)
