yolov3代码探究 - yolov3代码探究-utils.py - 《Notes of CV》

这边是重点中的重点了，如何计算loss和各种评价指标。
按照我的优先级，先重点看loss计算的代码。（事实上花时间在这代码上有两天了，还是有很多问题）

import glob
import random
import cv2
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
from pathlib import Path
from . import torch_utils  # , google_utils
matplotlib.rc('font', **{'size': 11})
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
cv2.setNumThreads(0)
def float3(x):  # format floats to 3 decimals
    return float(format(x, '.3f'))
def init_seeds(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    torch_utils.init_seeds(seed=seed)
def load_classes(path):
    # Loads *.names file at 'path'
    with open(path, 'r') as f:
        names = f.read().split('\n')
    return list(filter(None, names))  # filter removes empty strings (such as last line)
def model_info(model, report='summary'):
    # Plots a line-by-line description of a PyTorch model
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    if report is 'full':
        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
        for i, (name, p) in enumerate(model.named_parameters()):
            name = name.replace('module_list.', '')
            print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
def labels_to_class_weights(labels, nc=80):
    # Get class weights (inverse frequency) from training labels
    labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
    classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
    weights = np.bincount(classes, minlength=nc)  # occurences per class
    weights[weights == 0] = 1  # replace empty bins with 1
    weights = 1 / weights  # number of targets per class
    weights /= weights.sum()  # normalize
    return torch.Tensor(weights)
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
    # Produces image weights based on class mAPs
    n = len(labels)
    class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
    image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
    # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
    return image_weights
def coco_class_weights():  # frequency of each class in coco train2014
    n = [187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
         6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
         4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
         5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
         1877, 17630, 4337, 4624, 1075, 3468, 135, 1380]
    weights = 1 / torch.Tensor(n)
    weights /= weights.sum()
    return weights
def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
    # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
    # x = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
    x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
         35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
         64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
    return x

有教学意义的教你如何网络初始化，但其实作者没用

def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
    elif classname.find('BatchNorm2d') != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
        torch.nn.init.constant_(m.bias.data, 0.0)

一些常用的函数

def xyxy2xywh(x):
    # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = (x[:, 0] + x[:, 2]) / 2
    y[:, 1] = (x[:, 1] + x[:, 3]) / 2
    y[:, 2] = x[:, 2] - x[:, 0]
    y[:, 3] = x[:, 3] - x[:, 1]
    return y


def xywh2xyxy(x):
    # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y

下面的代码暂时没用得上，没看

def scale_coords(img1_shape, coords, img0_shape):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    gain = max(img1_shape) / max(img0_shape)  # gain  = old / new
    coords[:, [0, 2]] -= (img1_shape[1] - img0_shape[1] * gain) / 2  # x padding
    coords[:, [1, 3]] -= (img1_shape[0] - img0_shape[0] * gain) / 2  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def clip_coords(boxes, img_shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    boxes[:, [0, 2]] = boxes[:, [0, 2]].clamp(min=0, max=img_shape[1])  # clip x
    boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(min=0, max=img_shape[0])  # clip y

要看还没看的关于计算mAP的代码

def ap_per_class(tp, conf, pred_cls, target_cls):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
        tp:    True positives (list).
        conf:  Objectness value from 0-1 (list).
        pred_cls: Predicted object classes (list).
        target_cls: True object classes (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes = np.unique(target_cls)

    # Create Precision-Recall curve and compute AP for each class
    ap, p, r = [], [], []
    for c in unique_classes:
        i = pred_cls == c
        n_gt = (target_cls == c).sum()  # Number of ground truth objects
        n_p = i.sum()  # Number of predicted objects

        if n_p == 0 and n_gt == 0:
            continue
        elif n_p == 0 or n_gt == 0:
            ap.append(0)
            r.append(0)
            p.append(0)
        else:
            # Accumulate FPs and TPs
            fpc = (1 - tp[i]).cumsum()
            tpc = (tp[i]).cumsum()

            # Recall
            recall_curve = tpc / (n_gt + 1e-16)
            r.append(recall_curve[-1])

            # Precision
            precision_curve = tpc / (tpc + fpc)
            p.append(precision_curve[-1])

            # AP from recall-precision curve
            ap.append(compute_ap(recall_curve, precision_curve))

            # Plot
            # plt.plot(recall_curve, precision_curve)

    # Compute F1 score (harmonic mean of precision and recall)
    p, r, ap = np.array(p), np.array(r), np.array(ap)
    f1 = 2 * p * r / (p + r + 1e-16)

    return p, r, ap, f1, unique_classes.astype('int32')


def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rbgirshick/py-faster-rcnn.
    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end

    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

iou的计算方法（包括GIoU的计算）

def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False):
    # 这边注意一下，作者的注释已经说了, box1维度(4,), box2可以是多个框的信息(n, 4)
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.t()

    # Get the coordinates of bounding boxes
    if x1y1x2y2:
        # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:
        # x, y, w, h = box1
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
                 (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
                 (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area

    iou = inter_area / union_area  # iou
    if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
        c_x1, c_x2 = torch.min(b1_x1, b2_x1), torch.max(b1_x2, b2_x2)
        c_y1, c_y2 = torch.min(b1_y1, b2_y1), torch.max(b1_y2, b2_y2)
        c_area = (c_x2 - c_x1) * (c_y2 - c_y1)  # convex area
        return iou - (c_area - union_area) / c_area  # GIoU

    return iou


def wh_iou(box1, box2):
    # 这边注意一下，作者的注释已经说了, box1维度(2,), box2可以是多个框的信息(n, 2)
    # Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2
    box2 = box2.t()

    # w, h = box1
    w1, h1 = box1[0], box1[1]
    w2, h2 = box2[0], box2[1]

    # Intersection area
    inter_area = torch.min(w1, w2) * torch.min(h1, h2)

    # Union Area
    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area

    return inter_area / union_area  # iou

重中之中，计算loss的代码
yolov3论文中的loss计算方法如下图所示。

yolov3代码探究-utils.py - 图1

说明一下p，在模型的输出中

if self.training
output.append(YOLOLayer层的输出), yolo层的输出在训练时为(bs, 3, ng, ng, 85), ng是该层的特征图尺寸
return output, output维度(3, pi), 其中pi(bs, 3, ng, ng, 85)
else
output.append(YOLOLayer层的输出), yolo层的输出包括：io(bs, ng*ng*3, 85), p(bs, 3, ng, ng, 85)
io, p = list(zip(*output)), 做变形
io = torch.cat(io, 1) # 从左往右拼接, 拼接以后的维度应该是(bs, 3549, 85)
return io, p, p维度(3, pi), 其中pi(bs, 3, ng, ng, 85)

def compute_loss(p, targets, model, giou_loss=True):  # predictions, targets, model
    '''
    train时直接传p, 测试时时model的输出是io和p，这边传的是第二个输出。维度都是p(3, pi), 其中pi(bs, 3, ng, ng, 85)
    targets来源于真实标注，维度(num_of_labels(a batch), 6)，第二个维度的六个值包括(image_id, class, x, y, w, h)
    '''
    # ft相当于是定义了一种格式
    ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
    lxy, lwh, lcls, lobj = ft([0]), ft([0]), ft([0]), ft([0])
    '''
    txy: (3, num_of_usedAnchors,2)
    twh: (3, num_of_usedAnchors, 2)
    tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
    tbox: (3, num_of_usedAnchors, 4)
    indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
    anchor_vec: (3, num_of_usedAnchors, 2)
    '''
    txy, twh, tcls, tbox, indices, anchor_vec = build_targets(model, targets)
    # 获取模型的参数
    h = model.hyp  # hyperparameters

    # Define criteria
    # MSELoss指均方损失函数, MSELoss(x,y) = (x-y)^2
    MSE = nn.MSELoss()
    '''
    h['cls_pw']: 1.957,  # cls BCELoss positive_weight，权重值
    h['obj_pw']: 2.894,  # obj BCELoss positive_weight，权重值
     '''
    BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']])) # class类别的损失函数
    BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']])) # object置信度的损失函数
    # CE = nn.CrossEntropyLoss()  # (weight=model.class_weights)

    # Compute losses
    bs = p[0].shape[0]  # batch size
    k = bs / 64  # loss gain
    for i, pi0 in enumerate(p):  # layer i predictions, i
        # p(3, pi0), 其中pi0(bs, 3, ng, ng, 85)
        # b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
        b, a, gj, gi = indices[i]  # image_id, anchor, gridy, gridx
        # tobj: (bs, 3, ng, ng)
        tobj = torch.zeros_like(pi0[..., 0])  # target obj

        # Compute losses
        # 在这一层中能检测出的labels数量
        nb = len(b)
        if nb:  # number of targets
            # 如果这一层能检测出真实label, 即有正样本，那就需要算一下lxy + lwh + lcls, 负样本的lxy + lwh + lcls则不需要算

            # pi0是第i个yolo层的输出，pi指在第b张图，第a个anchor,第gj,gi个网格处的检测结果，维度:(num_of_usedAnchors, 85)
            pi = pi0[b, a, gj, gi]  # predictions closest to anchors
            # tobj: 在第b张图，第a个anchor，第gj,gi个网格处检测结果的置信度
            tobj[b, a, gj, gi] = 1.0  # obj
            # pi[..., 2:4] = torch.sigmoid(pi[..., 2:4])  # wh power loss (uncomment)

            if giou_loss:
                pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1)  # predicted
                # 算giou的值
                giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True)  # giou computation
                # lxy += k * 权重 * mean(1-giou)
                lxy += (k * h['giou']) * (1.0 - giou).mean()  # giou loss
            else:
                # 不使用giou_loss的正常yolo_loss计算， 公式为lxy + lwh + lobj + lcls
                # lxy += k * 权重 * xy的均方误差
                lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
                # lwh += k * 权重 * wh的均方误差
                lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i])  # wh yolo loss

            # tclsm用来存储真实类别: (num_of_usedAnchors, 80)
            tclsm = torch.zeros_like(pi[..., 5:])
            # tcls[i]: (num_of_usedAnchors), 表示的是类别数
            tclsm[range(nb), tcls[i]] = 1.0
            # 算类别loss, lcls += k * 权重 * BCEcls(80种类别)
            lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm)  # cls loss (BCE)
            # lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i])  # cls loss (CE)

            # Append targets to text file
            # with open('targets.txt', 'a') as file:
            #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

        # object置信度的loss, lobj += k * 权重 * BCEobj(obj置信度)
        lobj += (k * h['obj']) * BCEobj(pi0[..., 4], tobj)  # obj loss

        '''
        这边做个解释，如果这个位置是负样本，那就只算lobj
        如果是正样本，还要算lxy + lwh + lcls
        '''
    loss = lxy + lwh + lobj + lcls

    return loss, torch.cat((lxy, lwh, lobj, lcls, loss)).detach()

下面的代码是计算loss前的准备工作。它把图像的真实标注和anchor进行比对, 和网络的输出同步（一致），从而用来计算

def build_targets(model, targets):
    # targets来源于真实标注，维度(num_of_labels(a batch), 6)，第二个维度的六个值包括(image_id, class, x, y, w, h)

    #得到iou的阈值，低于这个阈值的将被舍弃。下面关于iou阈值有个更详细的解释
    iou_thres = model.hyp['iou_t']  # hyperparameter
    if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
        model = model.module

    nt = len(targets) # nums_of_labels(a batch)
    # 这个是我想要的东西，先初始化
    txy, twh, tcls, tbox, indices, anchor_vec = [], [], [], [], [], []

    for i in model.yolo_layers:
        # 找到所有的yolo层, 一共有三个yolo层，要循环三次
        layer = model.module_list[i][0] 

        # iou of targets-anchors
        t, a = targets, [] # t来自targets, 用于处理变形， a用来存储我需要的anchors
        gwh = t[:, 4:6] * layer.ng # # groundTruth wh 从[0,1]区间根据特征图尺寸放大, (num_of_labels(a batch), 2)
        # 作者写的代码，实际上不应该出现nt==0的情况
        if nt: 
            # layer.anchor_vec(3, 2)
            # wh_iou(box1, box2)计算两个box的iou, 其实box2可为多个框的信息，利用了python的广播机制，维度是(num_of_labels(a batch), 2), torch.stack以后维度是(num_of_labels(a batch) * num_of_anchors, 1)
            iou = torch.stack([wh_iou(x, gwh) for x in layer.anchor_vec], 0)

            use_best_anchor = False
            if use_best_anchor:
                iou, a = iou.max(0)  # best iou and anchor
            else:  # use all anchors
                # number of anchors anchors的数量，应该是3
                na = len(layer.anchor_vec)  
                # a: (3, )->(3,1)->(3, num_of_labels(a batch))->(3*num_of_labels(a batch), ), 即(num_of_labels(a batch)*num_of_anchors, )
                a = torch.arange(na).view((-1, 1)).repeat([1, nt]).view(-1)
                # t: (num_of_labels(a batch), 6)->(num_of_labels(a batch)*num_of_anchors, 6)
                t = targets.repeat([na, 1])
                # gwh: (num_of_labels(a batch), 2)->(num_of_labels(a batch)*num_of_anchors, 2)
                gwh = gwh.repeat([na, 1])
                # iou: (num_of_labels(a batch) * num_of_anchors, 1)->(num_of_labels(a batch) * num_of_anchors, )
                iou = iou.view(-1)  # use all ious

            # reject anchors below iou_thres (OPTIONAL, increases P, lowers R)
            reject = True
            if reject:
                # j: (num_of_labels(a batch) * num_of_anchors, ), 全由0/1构成
                j = iou > iou_thres
                # 这里很细节！！tensor1[tensor2]见简书, 得到的是t中满足iou>iou_thres的信息，将满足的数量记为 (num_of_usedAnchors, _)
                # t: (num_of_usedAnchors, 6), a: (num_of_usedAnchors, ), gwh: (num_of_usedAnchors, 2)
                t, a, gwh = t[j], a[j], gwh[j]

        # Indices
        # tensor.long()将tensor转为Long类型的张量, long即长整形
        # b: img_id, (num_of_usedAnchors, ), c: class, (num_of_usedAnchors, )
        b, c = t[:, :2].long().t()  # target image, class
        # gxy: 中心点在该特征图上的坐标 (num_of_usedAnchors, 2)
        gxy = t[:, 2:4] * layer.ng  # grid x, y
        # gi, gj: (num_of_trueAnchors, 2)->2个(num_of_usedAnchors, ), 表示网格的坐标
        gi, gj = gxy.long().t()  # grid x, y indices
        # indices: (for循环三轮下来之后)(3, (b,a,gj,gi))
        # b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
        indices.append((b, a, gj, gi))

        # XY coordinates
        # gxy: 得到中心点在某个网格上的坐标 (num_of_usedAnchors,2)
        gxy -= gxy.floor()
        # txy: (for循环三轮下来之后)(3, num_of_usedAnchors,2)
        txy.append(gxy)

        # GIoU, CVPR2019中新的衡量标准
        # xywh (grids) 基于某个网格的信息存到tbox中，也是网络希望得到的结果
        # tbox: (for循环三轮下来之后)(3, num_of_usedAnchors, 4)
        tbox.append(torch.cat((gxy, gwh), 1))  # xywh (grids)
        # a是满足iou>iou_thres的anchor信息（存的是编号）, (num_of_usedAnchors, )。layer.anchor_vec:(3,2)。
        # layer.anchor_vec[a]: (num_of_usedAnchors, 2)
        # anchor_vec: 满足iou>iou_thres的anchor信息（存的是值）(for循环三轮下来之后)(3, num_of_usedAnchors, 2)
        anchor_vec.append(layer.anchor_vec[a])

        # Width and height
        # twh: (for循环三轮下来之后)(3, num_of_usedAnchors, 2)
        '''         
        下面一行代码解释一下。
        让我们回想一下网络得到的xywh是怎么变成我们真正想要的值的呢？没错！         
        xy = sigmoid(xy) +　offset         
        wh = exp(wh) * anchor_wh         
        所以这边我们希望得到的twh应该逆处理一下        
        '''
        twh.append(torch.log(gwh / layer.anchor_vec[a]))  # wh yolo method
        # twh.append((gwh / layer.anchor_vec[a]) ** (1 / 3) / 2)  # wh power method

        # Class
        # tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
        tcls.append(c)
        if c.shape[0]:
            assert c.max() <= layer.nc, 'Target classes exceed model classes'

    # txy: (3, num_of_usedAnchors,2)
    # twh: (3, num_of_usedAnchors, 2)
    # tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
    # tbox: (3, num_of_usedAnchors, 4)
    # indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
    # anchor_vec: (3, num_of_usedAnchors, 2)
    return txy, twh, tcls, tbox, indices, anchor_vec

def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.5):
    """
    Removes detections with lower object confidence score than 'conf_thres'
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_conf, class)
    """

    min_wh = 2  # (pixels) minimum box width and height

    output = [None] * len(prediction)
    for image_i, pred in enumerate(prediction):
        # Experiment: Prior class size rejection
        # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
        # a = w * h  # area
        # ar = w / (h + 1e-16)  # aspect ratio
        # n = len(w)
        # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
        # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
        # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
        # from scipy.stats import multivariate_normal
        # for c in range(60):
        # shape_likelihood[:, c] =
        #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])

        # Multiply conf by class conf to get combined confidence
        class_conf, class_pred = pred[:, 5:].max(1)
        pred[:, 4] *= class_conf

        # Select only suitable predictions
        i = (pred[:, 4] > conf_thres) & (pred[:, 2:4] > min_wh).all(1) & torch.isfinite(pred).all(1)
        pred = pred[i]

        # If none are remaining => process next image
        if len(pred) == 0:
            continue

        # Select predicted classes
        class_conf = class_conf[i]
        class_pred = class_pred[i].unsqueeze(1).float()

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        pred[:, :4] = xywh2xyxy(pred[:, :4])
        # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551

        # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
        pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1)

        # Get detections sorted by decreasing confidence scores
        pred = pred[(-pred[:, 4]).argsort()]

        det_max = []
        nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
        for c in pred[:, -1].unique():
            dc = pred[pred[:, -1] == c]  # select class c
            n = len(dc)
            if n == 1:
                det_max.append(dc)  # No NMS required if only 1 prediction
                continue
            elif n > 100:
                dc = dc[:100]  # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117

            # Non-maximum suppression
            if nms_style == 'OR':  # default
                # METHOD1
                # ind = list(range(len(dc)))
                # while len(ind):
                # j = ind[0]
                # det_max.append(dc[j:j + 1])  # save highest conf detection
                # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
                # [ind.pop(i) for i in reversed(reject)]

                # METHOD2
                while dc.shape[0]:
                    det_max.append(dc[:1])  # save highest conf detection
                    if len(dc) == 1:  # Stop if we're at the last detection
                        break
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold

            elif nms_style == 'AND':  # requires overlap, single boxes erased
                while len(dc) > 1:
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                    if iou.max() > 0.5:
                        det_max.append(dc[:1])
                    dc = dc[1:][iou < nms_thres]  # remove ious > threshold

            elif nms_style == 'MERGE':  # weighted mixture box
                while len(dc):
                    if len(dc) == 1:
                        det_max.append(dc)
                        break
                    i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
                    weights = dc[i, 4:5]
                    dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
                    det_max.append(dc[:1])
                    dc = dc[i == 0]

            elif nms_style == 'SOFT':  # soft-NMS https://arxiv.org/abs/1704.04503
                sigma = 0.5  # soft-nms sigma parameter
                while len(dc):
                    if len(dc) == 1:
                        det_max.append(dc)
                        break
                    det_max.append(dc[:1])
                    iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                    dc = dc[1:]
                    dc[:, 4] *= torch.exp(-iou ** 2 / sigma)  # decay confidences
                    # dc = dc[dc[:, 4] > nms_thres]  # new line per https://github.com/ultralytics/yolov3/issues/362

        if len(det_max):
            det_max = torch.cat(det_max)  # concatenate
            output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort

    return output


def get_yolo_layers(model):
    bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
    return [i for i, x in enumerate(bool_vec) if x]  # [82, 94, 106] for yolov3


def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
    # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
    a = torch.load(filename, map_location='cpu')
    a['optimizer'] = []
    torch.save(a, filename.replace('.pt', '_lite.pt'))


def coco_class_count(path='../coco/labels/train2014/'):
    # Histogram of occurrences per class
    nc = 80  # number classes
    x = np.zeros(nc, dtype='int32')
    files = sorted(glob.glob('%s/*.*' % path))
    for i, file in enumerate(files):
        labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
        x += np.bincount(labels[:, 0].astype('int32'), minlength=nc)
        print(i, len(files))


def coco_only_people(path='../coco/labels/val2014/'):
    # Find images with only people
    files = sorted(glob.glob('%s/*.*' % path))
    for i, file in enumerate(files):
        labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
        if all(labels[:, 0] == 0):
            print(labels.shape[0], file)


def select_best_evolve(path='../../Downloads/evolve*.txt'):  # from utils.utils import *; select_best_evolve()
    # Find best evolved mutation
    for file in sorted(glob.glob(path)):
        x = np.loadtxt(file, dtype=np.float32)
        print(file, x[x[:, 2].argmax()])


def kmeans_targets(path='./data/coco_64img.txt'):  # from utils.utils import *; kmeans_targets()
    with open(path, 'r') as f:
        img_files = f.read().splitlines()
        img_files = list(filter(lambda x: len(x) > 0, img_files))

    # Read shapes
    n = len(img_files)
    assert n > 0, 'No images found in %s' % path
    label_files = [x.replace('images', 'labels').
                       replace('.jpeg', '.txt').
                       replace('.jpg', '.txt').
                       replace('.bmp', '.txt').
                       replace('.png', '.txt') for x in img_files]
    s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')])  # (width, height)

    # Read targets
    labels = [np.zeros((0, 5))] * n
    iter = tqdm(label_files, desc='Reading labels')
    for i, file in enumerate(iter):
        try:
            with open(file, 'r') as f:
                l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
                if l.shape[0]:
                    assert l.shape[1] == 5, '> 5 label columns: %s' % file
                    assert (l >= 0).all(), 'negative labels: %s' % file
                    assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
                    l[:, [1, 3]] *= s[i][0]
                    l[:, [2, 4]] *= s[i][1]
                    l[:, 1:] *= 320 / max(s[i])
                    labels[i] = l
        except:
            pass  # print('Warning: missing labels for %s' % self.img_files[i])  # missing label file
    assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'

    # kmeans
    from scipy import cluster
    wh = np.concatenate(labels, 0)[:, 3:5]
    k = cluster.vq.kmeans(wh, 9)[0]
    k = k[np.argsort(k.prod(1))]
    for x in k.ravel():
        print('%.1f, ' % x, end='')


# Plotting functions ---------------------------------------------------------------------------------------------------
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)


def plot_wh_methods():  # from utils.utils import *; plot_wh_methods()
    # Compares the two methods for width-height anchor multiplication
    # https://github.com/ultralytics/yolov3/issues/168
    x = np.arange(-4.0, 4.0, .1)
    ya = np.exp(x)
    yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2

    fig = plt.figure(figsize=(6, 3), dpi=150)
    plt.plot(x, ya, '.-', label='yolo method')
    plt.plot(x, yb ** 2, '.-', label='^2 power method')
    plt.plot(x, yb ** 2.5, '.-', label='^2.5 power method')
    plt.xlim(left=-4, right=4)
    plt.ylim(bottom=0, top=6)
    plt.xlabel('input')
    plt.ylabel('output')
    plt.legend()
    fig.tight_layout()
    fig.savefig('comparison.png', dpi=300)


def plot_images(imgs, targets, paths=None, fname='images.jpg'):
    # Plots training images overlaid with targets
    imgs = imgs.cpu().numpy()
    targets = targets.cpu().numpy()
    # targets = targets[targets[:, 1] == 21]  # plot only one class

    fig = plt.figure(figsize=(10, 10))
    bs, _, h, w = imgs.shape  # batch size, _, height, width
    ns = np.ceil(bs ** 0.5)  # number of subplots

    for i in range(bs):
        boxes = xywh2xyxy(targets[targets[:, 0] == i, 2:6]).T
        boxes[[0, 2]] *= w
        boxes[[1, 3]] *= h
        plt.subplot(ns, ns, i + 1).imshow(imgs[i].transpose(1, 2, 0))
        plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
        plt.axis('off')
        if paths is not None:
            s = Path(paths[i]).name
            plt.title(s[:min(len(s), 40)], fontdict={'size': 8})  # limit to 40 characters
    fig.tight_layout()
    fig.savefig(fname, dpi=300)
    plt.close()


def plot_test_txt():  # from utils.utils import *; plot_test()
    # Plot test.txt histograms
    x = np.loadtxt('test.txt', dtype=np.float32)
    box = xyxy2xywh(x[:, :4])
    cx, cy = box[:, 0], box[:, 1]

    fig, ax = plt.subplots(1, 1, figsize=(6, 6))
    ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
    ax.set_aspect('equal')
    fig.tight_layout()
    plt.savefig('hist2d.jpg', dpi=300)

    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].hist(cx, bins=600)
    ax[1].hist(cy, bins=600)
    fig.tight_layout()
    plt.savefig('hist1d.jpg', dpi=300)


def plot_targets_txt():  # from utils.utils import *; plot_targets_txt()
    # Plot test.txt histograms
    x = np.loadtxt('targets.txt', dtype=np.float32)
    x = x.T

    s = ['x targets', 'y targets', 'width targets', 'height targets']
    fig, ax = plt.subplots(2, 2, figsize=(8, 8))
    ax = ax.ravel()
    for i in range(4):
        ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
        ax[i].legend()
        ax[i].set_title(s[i])
    fig.tight_layout()
    plt.savefig('targets.jpg', dpi=300)


def plot_results(start=0, stop=0):  # from utils.utils import *; plot_results()
    # Plot training results files 'results*.txt'
    # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')

    fig, ax = plt.subplots(2, 5, figsize=(14, 7))
    ax = ax.ravel()
    s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Train Loss', 'Precision', 'Recall', 'mAP', 'F1',
         'Test Loss']
    for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
        results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11, 12, 13]).T
        n = results.shape[1]  # number of rows
        x = range(start, min(stop, n) if stop else n)
        for i in range(10):
            ax[i].plot(x, results[i, x], marker='.', label=f.replace('.txt', ''))
            ax[i].set_title(s[i])
    fig.tight_layout()
    ax[4].legend()
    fig.savefig('results.png', dpi=300)