这边是重点中的重点了,如何计算loss和各种评价指标。
按照我的优先级,先重点看loss计算的代码。(事实上花时间在这代码上有两天了,还是有很多问题)
import glob
import random
import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
from pathlib import Path
from . import torch_utils # , google_utils
matplotlib.rc('font', **{'size': 11})
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2.setNumThreads(0)
def float3(x): # format floats to 3 decimals
return float(format(x, '.3f'))
def init_seeds(seed=0):
random.seed(seed)
np.random.seed(seed)
torch_utils.init_seeds(seed=seed)
def load_classes(path):
# Loads *.names file at 'path'
with open(path, 'r') as f:
names = f.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line)
def model_info(model, report='summary'):
# Plots a line-by-line description of a PyTorch model
n_p = sum(x.numel() for x in model.parameters()) # number parameters
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
if report is 'full':
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
for i, (name, p) in enumerate(model.named_parameters()):
name = name.replace('module_list.', '')
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
def labels_to_class_weights(labels, nc=80):
# Get class weights (inverse frequency) from training labels
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
weights = np.bincount(classes, minlength=nc) # occurences per class
weights[weights == 0] = 1 # replace empty bins with 1
weights = 1 / weights # number of targets per class
weights /= weights.sum() # normalize
return torch.Tensor(weights)
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
# Produces image weights based on class mAPs
n = len(labels)
class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
def coco_class_weights(): # frequency of each class in coco train2014
n = [187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
1877, 17630, 4337, 4624, 1075, 3468, 135, 1380]
weights = 1 / torch.Tensor(n)
weights /= weights.sum()
return weights
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
有教学意义的教你如何网络初始化,但其实作者没用
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
torch.nn.init.constant_(m.bias.data, 0.0)
一些常用的函数
def xyxy2xywh(x):
# Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2
y[:, 1] = (x[:, 1] + x[:, 3]) / 2
y[:, 2] = x[:, 2] - x[:, 0]
y[:, 3] = x[:, 3] - x[:, 1]
return y
def xywh2xyxy(x):
# Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
下面的代码暂时没用得上,没看
def scale_coords(img1_shape, coords, img0_shape):
# Rescale coords (xyxy) from img1_shape to img0_shape
gain = max(img1_shape) / max(img0_shape) # gain = old / new
coords[:, [0, 2]] -= (img1_shape[1] - img0_shape[1] * gain) / 2 # x padding
coords[:, [1, 3]] -= (img1_shape[0] - img0_shape[0] * gain) / 2 # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def clip_coords(boxes, img_shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clamp(min=0, max=img_shape[1]) # clip x
boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(min=0, max=img_shape[0]) # clip y
要看还没看的关于计算mAP的代码
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in unique_classes:
i = pred_cls == c
n_gt = (target_cls == c).sum() # Number of ground truth objects
n_p = i.sum() # Number of predicted objects
if n_p == 0 and n_gt == 0:
continue
elif n_p == 0 or n_gt == 0:
ap.append(0)
r.append(0)
p.append(0)
else:
# Accumulate FPs and TPs
fpc = (1 - tp[i]).cumsum()
tpc = (tp[i]).cumsum()
# Recall
recall_curve = tpc / (n_gt + 1e-16)
r.append(recall_curve[-1])
# Precision
precision_curve = tpc / (tpc + fpc)
p.append(precision_curve[-1])
# AP from recall-precision curve
ap.append(compute_ap(recall_curve, precision_curve))
# Plot
# plt.plot(recall_curve, precision_curve)
# Compute F1 score (harmonic mean of precision and recall)
p, r, ap = np.array(p), np.array(r), np.array(ap)
f1 = 2 * p * r / (p + r + 1e-16)
return p, r, ap, f1, unique_classes.astype('int32')
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
iou的计算方法(包括GIoU的计算)
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False):
# 这边注意一下,作者的注释已经说了, box1维度(4,), box2可以是多个框的信息(n, 4)
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.t()
# Get the coordinates of bounding boxes
if x1y1x2y2:
# x1, y1, x2, y2 = box1
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
# x, y, w, h = box1
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
# Intersection area
inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
# Union Area
union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
(b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
iou = inter_area / union_area # iou
if GIoU: # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
c_x1, c_x2 = torch.min(b1_x1, b2_x1), torch.max(b1_x2, b2_x2)
c_y1, c_y2 = torch.min(b1_y1, b2_y1), torch.max(b1_y2, b2_y2)
c_area = (c_x2 - c_x1) * (c_y2 - c_y1) # convex area
return iou - (c_area - union_area) / c_area # GIoU
return iou
def wh_iou(box1, box2):
# 这边注意一下,作者的注释已经说了, box1维度(2,), box2可以是多个框的信息(n, 2)
# Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2
box2 = box2.t()
# w, h = box1
w1, h1 = box1[0], box1[1]
w2, h2 = box2[0], box2[1]
# Intersection area
inter_area = torch.min(w1, w2) * torch.min(h1, h2)
# Union Area
union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
return inter_area / union_area # iou
重中之中,计算loss的代码
yolov3论文中的loss计算方法如下图所示。
说明一下p, 在模型的输出中
- if self.training
output.append(YOLOLayer层的输出), yolo层的输出在训练时为(bs, 3, ng, ng, 85), ng是该层的特征图尺寸
return output, output维度(3, pi), 其中pi(bs, 3, ng, ng, 85) - else
output.append(YOLOLayer层的输出), yolo层的输出包括:io(bs, ng*ng*3, 85)
, p(bs, 3, ng, ng, 85)
io, p = list(zip(*output
)), 做变形
io = torch.cat(io, 1) # 从左往右拼接, 拼接以后的维度应该是(bs, 3549, 85)
return io, p, p维度(3, pi), 其中pi(bs, 3, ng, ng, 85)
def compute_loss(p, targets, model, giou_loss=True): # predictions, targets, model
'''
train时直接传p, 测试时时model的输出是io和p,这边传的是第二个输出。维度都是p(3, pi), 其中pi(bs, 3, ng, ng, 85)
targets来源于真实标注,维度(num_of_labels(a batch), 6),第二个维度的六个值包括(image_id, class, x, y, w, h)
'''
# ft相当于是定义了一种格式
ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
lxy, lwh, lcls, lobj = ft([0]), ft([0]), ft([0]), ft([0])
'''
txy: (3, num_of_usedAnchors,2)
twh: (3, num_of_usedAnchors, 2)
tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
tbox: (3, num_of_usedAnchors, 4)
indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
anchor_vec: (3, num_of_usedAnchors, 2)
'''
txy, twh, tcls, tbox, indices, anchor_vec = build_targets(model, targets)
# 获取模型的参数
h = model.hyp # hyperparameters
# Define criteria
# MSELoss指均方损失函数, MSELoss(x,y) = (x-y)^2
MSE = nn.MSELoss()
'''
h['cls_pw']: 1.957, # cls BCELoss positive_weight,权重值
h['obj_pw']: 2.894, # obj BCELoss positive_weight,权重值
'''
BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']])) # class类别的损失函数
BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']])) # object置信度的损失函数
# CE = nn.CrossEntropyLoss() # (weight=model.class_weights)
# Compute losses
bs = p[0].shape[0] # batch size
k = bs / 64 # loss gain
for i, pi0 in enumerate(p): # layer i predictions, i
# p(3, pi0), 其中pi0(bs, 3, ng, ng, 85)
# b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
b, a, gj, gi = indices[i] # image_id, anchor, gridy, gridx
# tobj: (bs, 3, ng, ng)
tobj = torch.zeros_like(pi0[..., 0]) # target obj
# Compute losses
# 在这一层中能检测出的labels数量
nb = len(b)
if nb: # number of targets
# 如果这一层能检测出真实label, 即有正样本,那就需要算一下lxy + lwh + lcls, 负样本的lxy + lwh + lcls则不需要算
# pi0是第i个yolo层的输出,pi指在第b张图,第a个anchor,第gj,gi个网格处的检测结果,维度:(num_of_usedAnchors, 85)
pi = pi0[b, a, gj, gi] # predictions closest to anchors
# tobj: 在第b张图,第a个anchor,第gj,gi个网格处检测结果的置信度
tobj[b, a, gj, gi] = 1.0 # obj
# pi[..., 2:4] = torch.sigmoid(pi[..., 2:4]) # wh power loss (uncomment)
if giou_loss:
pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1) # predicted
# 算giou的值
giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True) # giou computation
# lxy += k * 权重 * mean(1-giou)
lxy += (k * h['giou']) * (1.0 - giou).mean() # giou loss
else:
# 不使用giou_loss的正常yolo_loss计算, 公式为lxy + lwh + lobj + lcls
# lxy += k * 权重 * xy的均方误差
lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i]) # xy loss
# lwh += k * 权重 * wh的均方误差
lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i]) # wh yolo loss
# tclsm用来存储真实类别: (num_of_usedAnchors, 80)
tclsm = torch.zeros_like(pi[..., 5:])
# tcls[i]: (num_of_usedAnchors), 表示的是类别数
tclsm[range(nb), tcls[i]] = 1.0
# 算类别loss, lcls += k * 权重 * BCEcls(80种类别)
lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm) # cls loss (BCE)
# lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i]) # cls loss (CE)
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
# object置信度的loss, lobj += k * 权重 * BCEobj(obj置信度)
lobj += (k * h['obj']) * BCEobj(pi0[..., 4], tobj) # obj loss
'''
这边做个解释,如果这个位置是负样本,那就只算lobj
如果是正样本,还要算lxy + lwh + lcls
'''
loss = lxy + lwh + lobj + lcls
return loss, torch.cat((lxy, lwh, lobj, lcls, loss)).detach()
下面的代码是计算loss前的准备工作。它把图像的真实标注和anchor进行比对, 和网络的输出同步(一致),从而用来计算
def build_targets(model, targets):
# targets来源于真实标注,维度(num_of_labels(a batch), 6),第二个维度的六个值包括(image_id, class, x, y, w, h)
#得到iou的阈值,低于这个阈值的将被舍弃。下面关于iou阈值有个更详细的解释
iou_thres = model.hyp['iou_t'] # hyperparameter
if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
model = model.module
nt = len(targets) # nums_of_labels(a batch)
# 这个是我想要的东西,先初始化
txy, twh, tcls, tbox, indices, anchor_vec = [], [], [], [], [], []
for i in model.yolo_layers:
# 找到所有的yolo层, 一共有三个yolo层,要循环三次
layer = model.module_list[i][0]
# iou of targets-anchors
t, a = targets, [] # t来自targets, 用于处理变形, a用来存储我需要的anchors
gwh = t[:, 4:6] * layer.ng # # groundTruth wh 从[0,1]区间根据特征图尺寸放大, (num_of_labels(a batch), 2)
# 作者写的代码,实际上不应该出现nt==0的情况
if nt:
# layer.anchor_vec(3, 2)
# wh_iou(box1, box2)计算两个box的iou, 其实box2可为多个框的信息,利用了python的广播机制,维度是(num_of_labels(a batch), 2), torch.stack以后维度是(num_of_labels(a batch) * num_of_anchors, 1)
iou = torch.stack([wh_iou(x, gwh) for x in layer.anchor_vec], 0)
use_best_anchor = False
if use_best_anchor:
iou, a = iou.max(0) # best iou and anchor
else: # use all anchors
# number of anchors anchors的数量,应该是3
na = len(layer.anchor_vec)
# a: (3, )->(3,1)->(3, num_of_labels(a batch))->(3*num_of_labels(a batch), ), 即(num_of_labels(a batch)*num_of_anchors, )
a = torch.arange(na).view((-1, 1)).repeat([1, nt]).view(-1)
# t: (num_of_labels(a batch), 6)->(num_of_labels(a batch)*num_of_anchors, 6)
t = targets.repeat([na, 1])
# gwh: (num_of_labels(a batch), 2)->(num_of_labels(a batch)*num_of_anchors, 2)
gwh = gwh.repeat([na, 1])
# iou: (num_of_labels(a batch) * num_of_anchors, 1)->(num_of_labels(a batch) * num_of_anchors, )
iou = iou.view(-1) # use all ious
# reject anchors below iou_thres (OPTIONAL, increases P, lowers R)
reject = True
if reject:
# j: (num_of_labels(a batch) * num_of_anchors, ), 全由0/1构成
j = iou > iou_thres
# 这里很细节!!tensor1[tensor2]见简书, 得到的是t中满足iou>iou_thres的信息,将满足的数量记为 (num_of_usedAnchors, _)
# t: (num_of_usedAnchors, 6), a: (num_of_usedAnchors, ), gwh: (num_of_usedAnchors, 2)
t, a, gwh = t[j], a[j], gwh[j]
# Indices
# tensor.long()将tensor转为Long类型的张量, long即长整形
# b: img_id, (num_of_usedAnchors, ), c: class, (num_of_usedAnchors, )
b, c = t[:, :2].long().t() # target image, class
# gxy: 中心点在该特征图上的坐标 (num_of_usedAnchors, 2)
gxy = t[:, 2:4] * layer.ng # grid x, y
# gi, gj: (num_of_trueAnchors, 2)->2个(num_of_usedAnchors, ), 表示网格的坐标
gi, gj = gxy.long().t() # grid x, y indices
# indices: (for循环三轮下来之后)(3, (b,a,gj,gi))
# b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
indices.append((b, a, gj, gi))
# XY coordinates
# gxy: 得到中心点在某个网格上的坐标 (num_of_usedAnchors,2)
gxy -= gxy.floor()
# txy: (for循环三轮下来之后)(3, num_of_usedAnchors,2)
txy.append(gxy)
# GIoU, CVPR2019中新的衡量标准
# xywh (grids) 基于某个网格的信息存到tbox中,也是网络希望得到的结果
# tbox: (for循环三轮下来之后)(3, num_of_usedAnchors, 4)
tbox.append(torch.cat((gxy, gwh), 1)) # xywh (grids)
# a是满足iou>iou_thres的anchor信息(存的是编号), (num_of_usedAnchors, )。layer.anchor_vec:(3,2)。
# layer.anchor_vec[a]: (num_of_usedAnchors, 2)
# anchor_vec: 满足iou>iou_thres的anchor信息(存的是值)(for循环三轮下来之后)(3, num_of_usedAnchors, 2)
anchor_vec.append(layer.anchor_vec[a])
# Width and height
# twh: (for循环三轮下来之后)(3, num_of_usedAnchors, 2)
'''
下面一行代码解释一下。
让我们回想一下网络得到的xywh是怎么变成我们真正想要的值的呢?没错!
xy = sigmoid(xy) + offset
wh = exp(wh) * anchor_wh
所以这边我们希望得到的twh应该逆处理一下
'''
twh.append(torch.log(gwh / layer.anchor_vec[a])) # wh yolo method
# twh.append((gwh / layer.anchor_vec[a]) ** (1 / 3) / 2) # wh power method
# Class
# tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
tcls.append(c)
if c.shape[0]:
assert c.max() <= layer.nc, 'Target classes exceed model classes'
# txy: (3, num_of_usedAnchors,2)
# twh: (3, num_of_usedAnchors, 2)
# tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
# tbox: (3, num_of_usedAnchors, 4)
# indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
# anchor_vec: (3, num_of_usedAnchors, 2)
return txy, twh, tcls, tbox, indices, anchor_vec
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.5):
"""
Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections.
Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_conf, class)
"""
min_wh = 2 # (pixels) minimum box width and height
output = [None] * len(prediction)
for image_i, pred in enumerate(prediction):
# Experiment: Prior class size rejection
# x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
# a = w * h # area
# ar = w / (h + 1e-16) # aspect ratio
# n = len(w)
# log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
# shape_likelihood = np.zeros((n, 60), dtype=np.float32)
# x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
# from scipy.stats import multivariate_normal
# for c in range(60):
# shape_likelihood[:, c] =
# multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
# Multiply conf by class conf to get combined confidence
class_conf, class_pred = pred[:, 5:].max(1)
pred[:, 4] *= class_conf
# Select only suitable predictions
i = (pred[:, 4] > conf_thres) & (pred[:, 2:4] > min_wh).all(1) & torch.isfinite(pred).all(1)
pred = pred[i]
# If none are remaining => process next image
if len(pred) == 0:
continue
# Select predicted classes
class_conf = class_conf[i]
class_pred = class_pred[i].unsqueeze(1).float()
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
pred[:, :4] = xywh2xyxy(pred[:, :4])
# pred[:, 4] *= class_conf # improves mAP from 0.549 to 0.551
# Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1)
# Get detections sorted by decreasing confidence scores
pred = pred[(-pred[:, 4]).argsort()]
det_max = []
nms_style = 'MERGE' # 'OR' (default), 'AND', 'MERGE' (experimental)
for c in pred[:, -1].unique():
dc = pred[pred[:, -1] == c] # select class c
n = len(dc)
if n == 1:
det_max.append(dc) # No NMS required if only 1 prediction
continue
elif n > 100:
dc = dc[:100] # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117
# Non-maximum suppression
if nms_style == 'OR': # default
# METHOD1
# ind = list(range(len(dc)))
# while len(ind):
# j = ind[0]
# det_max.append(dc[j:j + 1]) # save highest conf detection
# reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
# [ind.pop(i) for i in reversed(reject)]
# METHOD2
while dc.shape[0]:
det_max.append(dc[:1]) # save highest conf detection
if len(dc) == 1: # Stop if we're at the last detection
break
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'AND': # requires overlap, single boxes erased
while len(dc) > 1:
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
if iou.max() > 0.5:
det_max.append(dc[:1])
dc = dc[1:][iou < nms_thres] # remove ious > threshold
elif nms_style == 'MERGE': # weighted mixture box
while len(dc):
if len(dc) == 1:
det_max.append(dc)
break
i = bbox_iou(dc[0], dc) > nms_thres # iou with other boxes
weights = dc[i, 4:5]
dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
det_max.append(dc[:1])
dc = dc[i == 0]
elif nms_style == 'SOFT': # soft-NMS https://arxiv.org/abs/1704.04503
sigma = 0.5 # soft-nms sigma parameter
while len(dc):
if len(dc) == 1:
det_max.append(dc)
break
det_max.append(dc[:1])
iou = bbox_iou(dc[0], dc[1:]) # iou with other boxes
dc = dc[1:]
dc[:, 4] *= torch.exp(-iou ** 2 / sigma) # decay confidences
# dc = dc[dc[:, 4] > nms_thres] # new line per https://github.com/ultralytics/yolov3/issues/362
if len(det_max):
det_max = torch.cat(det_max) # concatenate
output[image_i] = det_max[(-det_max[:, 4]).argsort()] # sort
return output
def get_yolo_layers(model):
bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
return [i for i, x in enumerate(bool_vec) if x] # [82, 94, 106] for yolov3
def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
# Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
a = torch.load(filename, map_location='cpu')
a['optimizer'] = []
torch.save(a, filename.replace('.pt', '_lite.pt'))
def coco_class_count(path='../coco/labels/train2014/'):
# Histogram of occurrences per class
nc = 80 # number classes
x = np.zeros(nc, dtype='int32')
files = sorted(glob.glob('%s/*.*' % path))
for i, file in enumerate(files):
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
x += np.bincount(labels[:, 0].astype('int32'), minlength=nc)
print(i, len(files))
def coco_only_people(path='../coco/labels/val2014/'):
# Find images with only people
files = sorted(glob.glob('%s/*.*' % path))
for i, file in enumerate(files):
labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
if all(labels[:, 0] == 0):
print(labels.shape[0], file)
def select_best_evolve(path='../../Downloads/evolve*.txt'): # from utils.utils import *; select_best_evolve()
# Find best evolved mutation
for file in sorted(glob.glob(path)):
x = np.loadtxt(file, dtype=np.float32)
print(file, x[x[:, 2].argmax()])
def kmeans_targets(path='./data/coco_64img.txt'): # from utils.utils import *; kmeans_targets()
with open(path, 'r') as f:
img_files = f.read().splitlines()
img_files = list(filter(lambda x: len(x) > 0, img_files))
# Read shapes
n = len(img_files)
assert n > 0, 'No images found in %s' % path
label_files = [x.replace('images', 'labels').
replace('.jpeg', '.txt').
replace('.jpg', '.txt').
replace('.bmp', '.txt').
replace('.png', '.txt') for x in img_files]
s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')]) # (width, height)
# Read targets
labels = [np.zeros((0, 5))] * n
iter = tqdm(label_files, desc='Reading labels')
for i, file in enumerate(iter):
try:
with open(file, 'r') as f:
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
if l.shape[0]:
assert l.shape[1] == 5, '> 5 label columns: %s' % file
assert (l >= 0).all(), 'negative labels: %s' % file
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
l[:, [1, 3]] *= s[i][0]
l[:, [2, 4]] *= s[i][1]
l[:, 1:] *= 320 / max(s[i])
labels[i] = l
except:
pass # print('Warning: missing labels for %s' % self.img_files[i]) # missing label file
assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'
# kmeans
from scipy import cluster
wh = np.concatenate(labels, 0)[:, 3:5]
k = cluster.vq.kmeans(wh, 9)[0]
k = k[np.argsort(k.prod(1))]
for x in k.ravel():
print('%.1f, ' % x, end='')
# Plotting functions ---------------------------------------------------------------------------------------------------
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line thickness
color = color or [random.randint(0, 255) for _ in range(3)]
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
cv2.rectangle(img, c1, c2, color, thickness=tl)
if label:
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(img, c1, c2, color, -1) # filled
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
def plot_wh_methods(): # from utils.utils import *; plot_wh_methods()
# Compares the two methods for width-height anchor multiplication
# https://github.com/ultralytics/yolov3/issues/168
x = np.arange(-4.0, 4.0, .1)
ya = np.exp(x)
yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
fig = plt.figure(figsize=(6, 3), dpi=150)
plt.plot(x, ya, '.-', label='yolo method')
plt.plot(x, yb ** 2, '.-', label='^2 power method')
plt.plot(x, yb ** 2.5, '.-', label='^2.5 power method')
plt.xlim(left=-4, right=4)
plt.ylim(bottom=0, top=6)
plt.xlabel('input')
plt.ylabel('output')
plt.legend()
fig.tight_layout()
fig.savefig('comparison.png', dpi=300)
def plot_images(imgs, targets, paths=None, fname='images.jpg'):
# Plots training images overlaid with targets
imgs = imgs.cpu().numpy()
targets = targets.cpu().numpy()
# targets = targets[targets[:, 1] == 21] # plot only one class
fig = plt.figure(figsize=(10, 10))
bs, _, h, w = imgs.shape # batch size, _, height, width
ns = np.ceil(bs ** 0.5) # number of subplots
for i in range(bs):
boxes = xywh2xyxy(targets[targets[:, 0] == i, 2:6]).T
boxes[[0, 2]] *= w
boxes[[1, 3]] *= h
plt.subplot(ns, ns, i + 1).imshow(imgs[i].transpose(1, 2, 0))
plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
plt.axis('off')
if paths is not None:
s = Path(paths[i]).name
plt.title(s[:min(len(s), 40)], fontdict={'size': 8}) # limit to 40 characters
fig.tight_layout()
fig.savefig(fname, dpi=300)
plt.close()
def plot_test_txt(): # from utils.utils import *; plot_test()
# Plot test.txt histograms
x = np.loadtxt('test.txt', dtype=np.float32)
box = xyxy2xywh(x[:, :4])
cx, cy = box[:, 0], box[:, 1]
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
ax.set_aspect('equal')
fig.tight_layout()
plt.savefig('hist2d.jpg', dpi=300)
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].hist(cx, bins=600)
ax[1].hist(cy, bins=600)
fig.tight_layout()
plt.savefig('hist1d.jpg', dpi=300)
def plot_targets_txt(): # from utils.utils import *; plot_targets_txt()
# Plot test.txt histograms
x = np.loadtxt('targets.txt', dtype=np.float32)
x = x.T
s = ['x targets', 'y targets', 'width targets', 'height targets']
fig, ax = plt.subplots(2, 2, figsize=(8, 8))
ax = ax.ravel()
for i in range(4):
ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
ax[i].legend()
ax[i].set_title(s[i])
fig.tight_layout()
plt.savefig('targets.jpg', dpi=300)
def plot_results(start=0, stop=0): # from utils.utils import *; plot_results()
# Plot training results files 'results*.txt'
# import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')
fig, ax = plt.subplots(2, 5, figsize=(14, 7))
ax = ax.ravel()
s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Train Loss', 'Precision', 'Recall', 'mAP', 'F1',
'Test Loss']
for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11, 12, 13]).T
n = results.shape[1] # number of rows
x = range(start, min(stop, n) if stop else n)
for i in range(10):
ax[i].plot(x, results[i, x], marker='.', label=f.replace('.txt', ''))
ax[i].set_title(s[i])
fig.tight_layout()
ax[4].legend()
fig.savefig('results.png', dpi=300)