github-yolov3
    utils.py

    这边是重点中的重点了,如何计算loss和各种评价指标。
    按照我的优先级,先重点看loss计算的代码。(事实上花时间在这代码上有两天了,还是有很多问题)

    1. import glob
    2. import random
    3. import cv2
    4. import matplotlib
    5. import matplotlib.pyplot as plt
    6. import numpy as np
    7. import torch
    8. import torch.nn as nn
    9. from PIL import Image
    10. from tqdm import tqdm
    11. from pathlib import Path
    12. from . import torch_utils # , google_utils
    13. matplotlib.rc('font', **{'size': 11})
    14. # Set printoptions
    15. torch.set_printoptions(linewidth=1320, precision=5, profile='long')
    16. np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
    17. # Prevent OpenCV from multithreading (to use PyTorch DataLoader)
    18. cv2.setNumThreads(0)
    19. def float3(x): # format floats to 3 decimals
    20. return float(format(x, '.3f'))
    21. def init_seeds(seed=0):
    22. random.seed(seed)
    23. np.random.seed(seed)
    24. torch_utils.init_seeds(seed=seed)
    25. def load_classes(path):
    26. # Loads *.names file at 'path'
    27. with open(path, 'r') as f:
    28. names = f.read().split('\n')
    29. return list(filter(None, names)) # filter removes empty strings (such as last line)
    30. def model_info(model, report='summary'):
    31. # Plots a line-by-line description of a PyTorch model
    32. n_p = sum(x.numel() for x in model.parameters()) # number parameters
    33. n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
    34. if report is 'full':
    35. print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
    36. for i, (name, p) in enumerate(model.named_parameters()):
    37. name = name.replace('module_list.', '')
    38. print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
    39. (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
    40. print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
    41. def labels_to_class_weights(labels, nc=80):
    42. # Get class weights (inverse frequency) from training labels
    43. labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
    44. classes = labels[:, 0].astype(np.int) # labels = [class xywh]
    45. weights = np.bincount(classes, minlength=nc) # occurences per class
    46. weights[weights == 0] = 1 # replace empty bins with 1
    47. weights = 1 / weights # number of targets per class
    48. weights /= weights.sum() # normalize
    49. return torch.Tensor(weights)
    50. def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
    51. # Produces image weights based on class mAPs
    52. n = len(labels)
    53. class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
    54. image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
    55. # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
    56. return image_weights
    57. def coco_class_weights(): # frequency of each class in coco train2014
    58. n = [187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
    59. 6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
    60. 4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
    61. 5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
    62. 1877, 17630, 4337, 4624, 1075, 3468, 135, 1380]
    63. weights = 1 / torch.Tensor(n)
    64. weights /= weights.sum()
    65. return weights
    66. def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
    67. # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    68. # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
    69. # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
    70. # x = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
    71. x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
    72. 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    73. 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
    74. return x


    有教学意义的教你如何网络初始化,但其实作者没用

    def weights_init_normal(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            torch.nn.init.normal_(m.weight.data, 0.0, 0.03)
        elif classname.find('BatchNorm2d') != -1:
            torch.nn.init.normal_(m.weight.data, 1.0, 0.03)
            torch.nn.init.constant_(m.bias.data, 0.0)
    


    一些常用的函数

    def xyxy2xywh(x):
        # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
        y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
        y[:, 0] = (x[:, 0] + x[:, 2]) / 2
        y[:, 1] = (x[:, 1] + x[:, 3]) / 2
        y[:, 2] = x[:, 2] - x[:, 0]
        y[:, 3] = x[:, 3] - x[:, 1]
        return y
    
    
    def xywh2xyxy(x):
        # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
        y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
        y[:, 0] = x[:, 0] - x[:, 2] / 2
        y[:, 1] = x[:, 1] - x[:, 3] / 2
        y[:, 2] = x[:, 0] + x[:, 2] / 2
        y[:, 3] = x[:, 1] + x[:, 3] / 2
        return y
    


    下面的代码暂时没用得上,没看

    def scale_coords(img1_shape, coords, img0_shape):
        # Rescale coords (xyxy) from img1_shape to img0_shape
        gain = max(img1_shape) / max(img0_shape)  # gain  = old / new
        coords[:, [0, 2]] -= (img1_shape[1] - img0_shape[1] * gain) / 2  # x padding
        coords[:, [1, 3]] -= (img1_shape[0] - img0_shape[0] * gain) / 2  # y padding
        coords[:, :4] /= gain
        clip_coords(coords, img0_shape)
        return coords
    
    
    def clip_coords(boxes, img_shape):
        # Clip bounding xyxy bounding boxes to image shape (height, width)
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clamp(min=0, max=img_shape[1])  # clip x
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(min=0, max=img_shape[0])  # clip y
    


    要看还没看的关于计算mAP的代码

    def ap_per_class(tp, conf, pred_cls, target_cls):
        """ Compute the average precision, given the recall and precision curves.
        Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
        # Arguments
            tp:    True positives (list).
            conf:  Objectness value from 0-1 (list).
            pred_cls: Predicted object classes (list).
            target_cls: True object classes (list).
        # Returns
            The average precision as computed in py-faster-rcnn.
        """
    
        # Sort by objectness
        i = np.argsort(-conf)
        tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
    
        # Find unique classes
        unique_classes = np.unique(target_cls)
    
        # Create Precision-Recall curve and compute AP for each class
        ap, p, r = [], [], []
        for c in unique_classes:
            i = pred_cls == c
            n_gt = (target_cls == c).sum()  # Number of ground truth objects
            n_p = i.sum()  # Number of predicted objects
    
            if n_p == 0 and n_gt == 0:
                continue
            elif n_p == 0 or n_gt == 0:
                ap.append(0)
                r.append(0)
                p.append(0)
            else:
                # Accumulate FPs and TPs
                fpc = (1 - tp[i]).cumsum()
                tpc = (tp[i]).cumsum()
    
                # Recall
                recall_curve = tpc / (n_gt + 1e-16)
                r.append(recall_curve[-1])
    
                # Precision
                precision_curve = tpc / (tpc + fpc)
                p.append(precision_curve[-1])
    
                # AP from recall-precision curve
                ap.append(compute_ap(recall_curve, precision_curve))
    
                # Plot
                # plt.plot(recall_curve, precision_curve)
    
        # Compute F1 score (harmonic mean of precision and recall)
        p, r, ap = np.array(p), np.array(r), np.array(ap)
        f1 = 2 * p * r / (p + r + 1e-16)
    
        return p, r, ap, f1, unique_classes.astype('int32')
    
    
    def compute_ap(recall, precision):
        """ Compute the average precision, given the recall and precision curves.
        Source: https://github.com/rbgirshick/py-faster-rcnn.
        # Arguments
            recall:    The recall curve (list).
            precision: The precision curve (list).
        # Returns
            The average precision as computed in py-faster-rcnn.
        """
        # correct AP calculation
        # first append sentinel values at the end
    
        mrec = np.concatenate(([0.], recall, [1.]))
        mpre = np.concatenate(([0.], precision, [0.]))
    
        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
    
        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]
    
        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        return ap
    


    iou的计算方法(包括GIoU的计算)

    def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False):
        # 这边注意一下,作者的注释已经说了, box1维度(4,), box2可以是多个框的信息(n, 4)
        # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
        box2 = box2.t()
    
        # Get the coordinates of bounding boxes
        if x1y1x2y2:
            # x1, y1, x2, y2 = box1
            b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
            b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
        else:
            # x, y, w, h = box1
            b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
            b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
            b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
            b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
    
        # Intersection area
        inter_area = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
                     (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    
        # Union Area
        union_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1) + 1e-16) + \
                     (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter_area
    
        iou = inter_area / union_area  # iou
        if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
            c_x1, c_x2 = torch.min(b1_x1, b2_x1), torch.max(b1_x2, b2_x2)
            c_y1, c_y2 = torch.min(b1_y1, b2_y1), torch.max(b1_y2, b2_y2)
            c_area = (c_x2 - c_x1) * (c_y2 - c_y1)  # convex area
            return iou - (c_area - union_area) / c_area  # GIoU
    
        return iou
    
    
    def wh_iou(box1, box2):
        # 这边注意一下,作者的注释已经说了, box1维度(2,), box2可以是多个框的信息(n, 2)
        # Returns the IoU of wh1 to wh2. wh1 is 2, wh2 is nx2
        box2 = box2.t()
    
        # w, h = box1
        w1, h1 = box1[0], box1[1]
        w2, h2 = box2[0], box2[1]
    
        # Intersection area
        inter_area = torch.min(w1, w2) * torch.min(h1, h2)
    
        # Union Area
        union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
    
        return inter_area / union_area  # iou
    


    重中之中,计算loss的代码
    yolov3论文中的loss计算方法如下图所示。

    yolov3代码探究-utils.py - 图1

    说明一下p, 在模型的输出中

    1. if self.training
      output.append(YOLOLayer层的输出), yolo层的输出在训练时为(bs, 3, ng, ng, 85), ng是该层的特征图尺寸
      return output, output维度(3, pi), 其中pi(bs, 3, ng, ng, 85)
    2. else
      output.append(YOLOLayer层的输出), yolo层的输出包括:io(bs, ng*ng*3, 85), p(bs, 3, ng, ng, 85)
      io, p = list(zip(*output)), 做变形
      io = torch.cat(io, 1) # 从左往右拼接, 拼接以后的维度应该是(bs, 3549, 85)
      return io, p, p维度(3, pi), 其中pi(bs, 3, ng, ng, 85)
    def compute_loss(p, targets, model, giou_loss=True):  # predictions, targets, model
        '''
        train时直接传p, 测试时时model的输出是io和p,这边传的是第二个输出。维度都是p(3, pi), 其中pi(bs, 3, ng, ng, 85)
        targets来源于真实标注,维度(num_of_labels(a batch), 6),第二个维度的六个值包括(image_id, class, x, y, w, h)
        '''
        # ft相当于是定义了一种格式
        ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
        lxy, lwh, lcls, lobj = ft([0]), ft([0]), ft([0]), ft([0])
        '''
        txy: (3, num_of_usedAnchors,2)
        twh: (3, num_of_usedAnchors, 2)
        tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
        tbox: (3, num_of_usedAnchors, 4)
        indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
        anchor_vec: (3, num_of_usedAnchors, 2)
        '''
        txy, twh, tcls, tbox, indices, anchor_vec = build_targets(model, targets)
        # 获取模型的参数
        h = model.hyp  # hyperparameters
    
        # Define criteria
        # MSELoss指均方损失函数, MSELoss(x,y) = (x-y)^2
        MSE = nn.MSELoss()
        '''
        h['cls_pw']: 1.957,  # cls BCELoss positive_weight,权重值
        h['obj_pw']: 2.894,  # obj BCELoss positive_weight,权重值
         '''
        BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']])) # class类别的损失函数
        BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']])) # object置信度的损失函数
        # CE = nn.CrossEntropyLoss()  # (weight=model.class_weights)
    
        # Compute losses
        bs = p[0].shape[0]  # batch size
        k = bs / 64  # loss gain
        for i, pi0 in enumerate(p):  # layer i predictions, i
            # p(3, pi0), 其中pi0(bs, 3, ng, ng, 85)
            # b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
            b, a, gj, gi = indices[i]  # image_id, anchor, gridy, gridx
            # tobj: (bs, 3, ng, ng)
            tobj = torch.zeros_like(pi0[..., 0])  # target obj
    
            # Compute losses
            # 在这一层中能检测出的labels数量
            nb = len(b)
            if nb:  # number of targets
                # 如果这一层能检测出真实label, 即有正样本,那就需要算一下lxy + lwh + lcls, 负样本的lxy + lwh + lcls则不需要算
    
                # pi0是第i个yolo层的输出,pi指在第b张图,第a个anchor,第gj,gi个网格处的检测结果,维度:(num_of_usedAnchors, 85)
                pi = pi0[b, a, gj, gi]  # predictions closest to anchors
                # tobj: 在第b张图,第a个anchor,第gj,gi个网格处检测结果的置信度
                tobj[b, a, gj, gi] = 1.0  # obj
                # pi[..., 2:4] = torch.sigmoid(pi[..., 2:4])  # wh power loss (uncomment)
    
                if giou_loss:
                    pbox = torch.cat((torch.sigmoid(pi[..., 0:2]), torch.exp(pi[..., 2:4]) * anchor_vec[i]), 1)  # predicted
                    # 算giou的值
                    giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=False, GIoU=True)  # giou computation
                    # lxy += k * 权重 * mean(1-giou)
                    lxy += (k * h['giou']) * (1.0 - giou).mean()  # giou loss
                else:
                    # 不使用giou_loss的正常yolo_loss计算, 公式为lxy + lwh + lobj + lcls
                    # lxy += k * 权重 * xy的均方误差
                    lxy += (k * h['xy']) * MSE(torch.sigmoid(pi[..., 0:2]), txy[i])  # xy loss
                    # lwh += k * 权重 * wh的均方误差
                    lwh += (k * h['wh']) * MSE(pi[..., 2:4], twh[i])  # wh yolo loss
    
                # tclsm用来存储真实类别: (num_of_usedAnchors, 80)
                tclsm = torch.zeros_like(pi[..., 5:])
                # tcls[i]: (num_of_usedAnchors), 表示的是类别数
                tclsm[range(nb), tcls[i]] = 1.0
                # 算类别loss, lcls += k * 权重 * BCEcls(80种类别)
                lcls += (k * h['cls']) * BCEcls(pi[..., 5:], tclsm)  # cls loss (BCE)
                # lcls += (k * h['cls']) * CE(pi[..., 5:], tcls[i])  # cls loss (CE)
    
                # Append targets to text file
                # with open('targets.txt', 'a') as file:
                #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
    
            # object置信度的loss, lobj += k * 权重 * BCEobj(obj置信度)
            lobj += (k * h['obj']) * BCEobj(pi0[..., 4], tobj)  # obj loss
    
            '''
            这边做个解释,如果这个位置是负样本,那就只算lobj
            如果是正样本,还要算lxy + lwh + lcls
            '''
        loss = lxy + lwh + lobj + lcls
    
        return loss, torch.cat((lxy, lwh, lobj, lcls, loss)).detach()
    


    下面的代码是计算loss前的准备工作。它把图像的真实标注和anchor进行比对, 和网络的输出同步(一致),从而用来计算

    def build_targets(model, targets):
        # targets来源于真实标注,维度(num_of_labels(a batch), 6),第二个维度的六个值包括(image_id, class, x, y, w, h)
    
        #得到iou的阈值,低于这个阈值的将被舍弃。下面关于iou阈值有个更详细的解释
        iou_thres = model.hyp['iou_t']  # hyperparameter
        if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
            model = model.module
    
        nt = len(targets) # nums_of_labels(a batch)
        # 这个是我想要的东西,先初始化
        txy, twh, tcls, tbox, indices, anchor_vec = [], [], [], [], [], []
    
        for i in model.yolo_layers:
            # 找到所有的yolo层, 一共有三个yolo层,要循环三次
            layer = model.module_list[i][0] 
    
            # iou of targets-anchors
            t, a = targets, [] # t来自targets, 用于处理变形, a用来存储我需要的anchors
            gwh = t[:, 4:6] * layer.ng # # groundTruth wh 从[0,1]区间根据特征图尺寸放大, (num_of_labels(a batch), 2)
            # 作者写的代码,实际上不应该出现nt==0的情况
            if nt: 
                # layer.anchor_vec(3, 2)
                # wh_iou(box1, box2)计算两个box的iou, 其实box2可为多个框的信息,利用了python的广播机制,维度是(num_of_labels(a batch), 2), torch.stack以后维度是(num_of_labels(a batch) * num_of_anchors, 1)
                iou = torch.stack([wh_iou(x, gwh) for x in layer.anchor_vec], 0)
    
                use_best_anchor = False
                if use_best_anchor:
                    iou, a = iou.max(0)  # best iou and anchor
                else:  # use all anchors
                    # number of anchors anchors的数量,应该是3
                    na = len(layer.anchor_vec)  
                    # a: (3, )->(3,1)->(3, num_of_labels(a batch))->(3*num_of_labels(a batch), ), 即(num_of_labels(a batch)*num_of_anchors, )
                    a = torch.arange(na).view((-1, 1)).repeat([1, nt]).view(-1)
                    # t: (num_of_labels(a batch), 6)->(num_of_labels(a batch)*num_of_anchors, 6)
                    t = targets.repeat([na, 1])
                    # gwh: (num_of_labels(a batch), 2)->(num_of_labels(a batch)*num_of_anchors, 2)
                    gwh = gwh.repeat([na, 1])
                    # iou: (num_of_labels(a batch) * num_of_anchors, 1)->(num_of_labels(a batch) * num_of_anchors, )
                    iou = iou.view(-1)  # use all ious
    
                # reject anchors below iou_thres (OPTIONAL, increases P, lowers R)
                reject = True
                if reject:
                    # j: (num_of_labels(a batch) * num_of_anchors, ), 全由0/1构成
                    j = iou > iou_thres
                    # 这里很细节!!tensor1[tensor2]见简书, 得到的是t中满足iou>iou_thres的信息,将满足的数量记为 (num_of_usedAnchors, _)
                    # t: (num_of_usedAnchors, 6), a: (num_of_usedAnchors, ), gwh: (num_of_usedAnchors, 2)
                    t, a, gwh = t[j], a[j], gwh[j]
    
            # Indices
            # tensor.long()将tensor转为Long类型的张量, long即长整形
            # b: img_id, (num_of_usedAnchors, ), c: class, (num_of_usedAnchors, )
            b, c = t[:, :2].long().t()  # target image, class
            # gxy: 中心点在该特征图上的坐标 (num_of_usedAnchors, 2)
            gxy = t[:, 2:4] * layer.ng  # grid x, y
            # gi, gj: (num_of_trueAnchors, 2)->2个(num_of_usedAnchors, ), 表示网格的坐标
            gi, gj = gxy.long().t()  # grid x, y indices
            # indices: (for循环三轮下来之后)(3, (b,a,gj,gi))
            # b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
            indices.append((b, a, gj, gi))
    
            # XY coordinates
            # gxy: 得到中心点在某个网格上的坐标 (num_of_usedAnchors,2)
            gxy -= gxy.floor()
            # txy: (for循环三轮下来之后)(3, num_of_usedAnchors,2)
            txy.append(gxy)
    
            # GIoU, CVPR2019中新的衡量标准
            # xywh (grids) 基于某个网格的信息存到tbox中,也是网络希望得到的结果
            # tbox: (for循环三轮下来之后)(3, num_of_usedAnchors, 4)
            tbox.append(torch.cat((gxy, gwh), 1))  # xywh (grids)
            # a是满足iou>iou_thres的anchor信息(存的是编号), (num_of_usedAnchors, )。layer.anchor_vec:(3,2)。
            # layer.anchor_vec[a]: (num_of_usedAnchors, 2)
            # anchor_vec: 满足iou>iou_thres的anchor信息(存的是值)(for循环三轮下来之后)(3, num_of_usedAnchors, 2)
            anchor_vec.append(layer.anchor_vec[a])
    
            # Width and height
            # twh: (for循环三轮下来之后)(3, num_of_usedAnchors, 2)
            '''         
            下面一行代码解释一下。
            让我们回想一下网络得到的xywh是怎么变成我们真正想要的值的呢?没错!         
            xy = sigmoid(xy) + offset         
            wh = exp(wh) * anchor_wh         
            所以这边我们希望得到的twh应该逆处理一下        
            '''
            twh.append(torch.log(gwh / layer.anchor_vec[a]))  # wh yolo method
            # twh.append((gwh / layer.anchor_vec[a]) ** (1 / 3) / 2)  # wh power method
    
            # Class
            # tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
            tcls.append(c)
            if c.shape[0]:
                assert c.max() <= layer.nc, 'Target classes exceed model classes'
    
        # txy: (3, num_of_usedAnchors,2)
        # twh: (3, num_of_usedAnchors, 2)
        # tcls: (for循环三轮下来之后)(3, num_of_usedAnchors)
        # tbox: (3, num_of_usedAnchors, 4)
        # indices: (3, (b,a,gj,gi)); b:(num_of_usedAnchors, ), a:(num_of_usedAnchors, ), gj:(num_of_usedAnchors, ), gi:(num_of_usedAnchors, )
        # anchor_vec: (3, num_of_usedAnchors, 2)
        return txy, twh, tcls, tbox, indices, anchor_vec
    
    def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.5):
        """
        Removes detections with lower object confidence score than 'conf_thres'
        Non-Maximum Suppression to further filter detections.
        Returns detections with shape:
            (x1, y1, x2, y2, object_conf, class_conf, class)
        """
    
        min_wh = 2  # (pixels) minimum box width and height
    
        output = [None] * len(prediction)
        for image_i, pred in enumerate(prediction):
            # Experiment: Prior class size rejection
            # x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3]
            # a = w * h  # area
            # ar = w / (h + 1e-16)  # aspect ratio
            # n = len(w)
            # log_w, log_h, log_a, log_ar = torch.log(w), torch.log(h), torch.log(a), torch.log(ar)
            # shape_likelihood = np.zeros((n, 60), dtype=np.float32)
            # x = np.concatenate((log_w.reshape(-1, 1), log_h.reshape(-1, 1)), 1)
            # from scipy.stats import multivariate_normal
            # for c in range(60):
            # shape_likelihood[:, c] =
            #   multivariate_normal.pdf(x, mean=mat['class_mu'][c, :2], cov=mat['class_cov'][c, :2, :2])
    
            # Multiply conf by class conf to get combined confidence
            class_conf, class_pred = pred[:, 5:].max(1)
            pred[:, 4] *= class_conf
    
            # Select only suitable predictions
            i = (pred[:, 4] > conf_thres) & (pred[:, 2:4] > min_wh).all(1) & torch.isfinite(pred).all(1)
            pred = pred[i]
    
            # If none are remaining => process next image
            if len(pred) == 0:
                continue
    
            # Select predicted classes
            class_conf = class_conf[i]
            class_pred = class_pred[i].unsqueeze(1).float()
    
            # Box (center x, center y, width, height) to (x1, y1, x2, y2)
            pred[:, :4] = xywh2xyxy(pred[:, :4])
            # pred[:, 4] *= class_conf  # improves mAP from 0.549 to 0.551
    
            # Detections ordered as (x1y1x2y2, obj_conf, class_conf, class_pred)
            pred = torch.cat((pred[:, :5], class_conf.unsqueeze(1), class_pred), 1)
    
            # Get detections sorted by decreasing confidence scores
            pred = pred[(-pred[:, 4]).argsort()]
    
            det_max = []
            nms_style = 'MERGE'  # 'OR' (default), 'AND', 'MERGE' (experimental)
            for c in pred[:, -1].unique():
                dc = pred[pred[:, -1] == c]  # select class c
                n = len(dc)
                if n == 1:
                    det_max.append(dc)  # No NMS required if only 1 prediction
                    continue
                elif n > 100:
                    dc = dc[:100]  # limit to first 100 boxes: https://github.com/ultralytics/yolov3/issues/117
    
                # Non-maximum suppression
                if nms_style == 'OR':  # default
                    # METHOD1
                    # ind = list(range(len(dc)))
                    # while len(ind):
                    # j = ind[0]
                    # det_max.append(dc[j:j + 1])  # save highest conf detection
                    # reject = (bbox_iou(dc[j], dc[ind]) > nms_thres).nonzero()
                    # [ind.pop(i) for i in reversed(reject)]
    
                    # METHOD2
                    while dc.shape[0]:
                        det_max.append(dc[:1])  # save highest conf detection
                        if len(dc) == 1:  # Stop if we're at the last detection
                            break
                        iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                        dc = dc[1:][iou < nms_thres]  # remove ious > threshold
    
                elif nms_style == 'AND':  # requires overlap, single boxes erased
                    while len(dc) > 1:
                        iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                        if iou.max() > 0.5:
                            det_max.append(dc[:1])
                        dc = dc[1:][iou < nms_thres]  # remove ious > threshold
    
                elif nms_style == 'MERGE':  # weighted mixture box
                    while len(dc):
                        if len(dc) == 1:
                            det_max.append(dc)
                            break
                        i = bbox_iou(dc[0], dc) > nms_thres  # iou with other boxes
                        weights = dc[i, 4:5]
                        dc[0, :4] = (weights * dc[i, :4]).sum(0) / weights.sum()
                        det_max.append(dc[:1])
                        dc = dc[i == 0]
    
                elif nms_style == 'SOFT':  # soft-NMS https://arxiv.org/abs/1704.04503
                    sigma = 0.5  # soft-nms sigma parameter
                    while len(dc):
                        if len(dc) == 1:
                            det_max.append(dc)
                            break
                        det_max.append(dc[:1])
                        iou = bbox_iou(dc[0], dc[1:])  # iou with other boxes
                        dc = dc[1:]
                        dc[:, 4] *= torch.exp(-iou ** 2 / sigma)  # decay confidences
                        # dc = dc[dc[:, 4] > nms_thres]  # new line per https://github.com/ultralytics/yolov3/issues/362
    
            if len(det_max):
                det_max = torch.cat(det_max)  # concatenate
                output[image_i] = det_max[(-det_max[:, 4]).argsort()]  # sort
    
        return output
    
    
    def get_yolo_layers(model):
        bool_vec = [x['type'] == 'yolo' for x in model.module_defs]
        return [i for i, x in enumerate(bool_vec) if x]  # [82, 94, 106] for yolov3
    
    
    def strip_optimizer_from_checkpoint(filename='weights/best.pt'):
        # Strip optimizer from *.pt files for lighter files (reduced by 2/3 size)
        a = torch.load(filename, map_location='cpu')
        a['optimizer'] = []
        torch.save(a, filename.replace('.pt', '_lite.pt'))
    
    
    def coco_class_count(path='../coco/labels/train2014/'):
        # Histogram of occurrences per class
        nc = 80  # number classes
        x = np.zeros(nc, dtype='int32')
        files = sorted(glob.glob('%s/*.*' % path))
        for i, file in enumerate(files):
            labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
            x += np.bincount(labels[:, 0].astype('int32'), minlength=nc)
            print(i, len(files))
    
    
    def coco_only_people(path='../coco/labels/val2014/'):
        # Find images with only people
        files = sorted(glob.glob('%s/*.*' % path))
        for i, file in enumerate(files):
            labels = np.loadtxt(file, dtype=np.float32).reshape(-1, 5)
            if all(labels[:, 0] == 0):
                print(labels.shape[0], file)
    
    
    def select_best_evolve(path='../../Downloads/evolve*.txt'):  # from utils.utils import *; select_best_evolve()
        # Find best evolved mutation
        for file in sorted(glob.glob(path)):
            x = np.loadtxt(file, dtype=np.float32)
            print(file, x[x[:, 2].argmax()])
    
    
    def kmeans_targets(path='./data/coco_64img.txt'):  # from utils.utils import *; kmeans_targets()
        with open(path, 'r') as f:
            img_files = f.read().splitlines()
            img_files = list(filter(lambda x: len(x) > 0, img_files))
    
        # Read shapes
        n = len(img_files)
        assert n > 0, 'No images found in %s' % path
        label_files = [x.replace('images', 'labels').
                           replace('.jpeg', '.txt').
                           replace('.jpg', '.txt').
                           replace('.bmp', '.txt').
                           replace('.png', '.txt') for x in img_files]
        s = np.array([Image.open(f).size for f in tqdm(img_files, desc='Reading image shapes')])  # (width, height)
    
        # Read targets
        labels = [np.zeros((0, 5))] * n
        iter = tqdm(label_files, desc='Reading labels')
        for i, file in enumerate(iter):
            try:
                with open(file, 'r') as f:
                    l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
                    if l.shape[0]:
                        assert l.shape[1] == 5, '> 5 label columns: %s' % file
                        assert (l >= 0).all(), 'negative labels: %s' % file
                        assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
                        l[:, [1, 3]] *= s[i][0]
                        l[:, [2, 4]] *= s[i][1]
                        l[:, 1:] *= 320 / max(s[i])
                        labels[i] = l
            except:
                pass  # print('Warning: missing labels for %s' % self.img_files[i])  # missing label file
        assert len(np.concatenate(labels, 0)) > 0, 'No labels found. Incorrect label paths provided.'
    
        # kmeans
        from scipy import cluster
        wh = np.concatenate(labels, 0)[:, 3:5]
        k = cluster.vq.kmeans(wh, 9)[0]
        k = k[np.argsort(k.prod(1))]
        for x in k.ravel():
            print('%.1f, ' % x, end='')
    
    
    # Plotting functions ---------------------------------------------------------------------------------------------------
    def plot_one_box(x, img, color=None, label=None, line_thickness=None):
        # Plots one bounding box on image img
        tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line thickness
        color = color or [random.randint(0, 255) for _ in range(3)]
        c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
        cv2.rectangle(img, c1, c2, color, thickness=tl)
        if label:
            tf = max(tl - 1, 1)  # font thickness
            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
            c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
            cv2.rectangle(img, c1, c2, color, -1)  # filled
            cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    
    
    def plot_wh_methods():  # from utils.utils import *; plot_wh_methods()
        # Compares the two methods for width-height anchor multiplication
        # https://github.com/ultralytics/yolov3/issues/168
        x = np.arange(-4.0, 4.0, .1)
        ya = np.exp(x)
        yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
    
        fig = plt.figure(figsize=(6, 3), dpi=150)
        plt.plot(x, ya, '.-', label='yolo method')
        plt.plot(x, yb ** 2, '.-', label='^2 power method')
        plt.plot(x, yb ** 2.5, '.-', label='^2.5 power method')
        plt.xlim(left=-4, right=4)
        plt.ylim(bottom=0, top=6)
        plt.xlabel('input')
        plt.ylabel('output')
        plt.legend()
        fig.tight_layout()
        fig.savefig('comparison.png', dpi=300)
    
    
    def plot_images(imgs, targets, paths=None, fname='images.jpg'):
        # Plots training images overlaid with targets
        imgs = imgs.cpu().numpy()
        targets = targets.cpu().numpy()
        # targets = targets[targets[:, 1] == 21]  # plot only one class
    
        fig = plt.figure(figsize=(10, 10))
        bs, _, h, w = imgs.shape  # batch size, _, height, width
        ns = np.ceil(bs ** 0.5)  # number of subplots
    
        for i in range(bs):
            boxes = xywh2xyxy(targets[targets[:, 0] == i, 2:6]).T
            boxes[[0, 2]] *= w
            boxes[[1, 3]] *= h
            plt.subplot(ns, ns, i + 1).imshow(imgs[i].transpose(1, 2, 0))
            plt.plot(boxes[[0, 2, 2, 0, 0]], boxes[[1, 1, 3, 3, 1]], '.-')
            plt.axis('off')
            if paths is not None:
                s = Path(paths[i]).name
                plt.title(s[:min(len(s), 40)], fontdict={'size': 8})  # limit to 40 characters
        fig.tight_layout()
        fig.savefig(fname, dpi=300)
        plt.close()
    
    
    def plot_test_txt():  # from utils.utils import *; plot_test()
        # Plot test.txt histograms
        x = np.loadtxt('test.txt', dtype=np.float32)
        box = xyxy2xywh(x[:, :4])
        cx, cy = box[:, 0], box[:, 1]
    
        fig, ax = plt.subplots(1, 1, figsize=(6, 6))
        ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
        ax.set_aspect('equal')
        fig.tight_layout()
        plt.savefig('hist2d.jpg', dpi=300)
    
        fig, ax = plt.subplots(1, 2, figsize=(12, 6))
        ax[0].hist(cx, bins=600)
        ax[1].hist(cy, bins=600)
        fig.tight_layout()
        plt.savefig('hist1d.jpg', dpi=300)
    
    
    def plot_targets_txt():  # from utils.utils import *; plot_targets_txt()
        # Plot test.txt histograms
        x = np.loadtxt('targets.txt', dtype=np.float32)
        x = x.T
    
        s = ['x targets', 'y targets', 'width targets', 'height targets']
        fig, ax = plt.subplots(2, 2, figsize=(8, 8))
        ax = ax.ravel()
        for i in range(4):
            ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
            ax[i].legend()
            ax[i].set_title(s[i])
        fig.tight_layout()
        plt.savefig('targets.jpg', dpi=300)
    
    
    def plot_results(start=0, stop=0):  # from utils.utils import *; plot_results()
        # Plot training results files 'results*.txt'
        # import os; os.system('wget https://storage.googleapis.com/ultralytics/yolov3/results_v3.txt')
    
        fig, ax = plt.subplots(2, 5, figsize=(14, 7))
        ax = ax.ravel()
        s = ['X + Y', 'Width + Height', 'Confidence', 'Classification', 'Train Loss', 'Precision', 'Recall', 'mAP', 'F1',
             'Test Loss']
        for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
            results = np.loadtxt(f, usecols=[2, 3, 4, 5, 6, 9, 10, 11, 12, 13]).T
            n = results.shape[1]  # number of rows
            x = range(start, min(stop, n) if stop else n)
            for i in range(10):
                ax[i].plot(x, results[i, x], marker='.', label=f.replace('.txt', ''))
                ax[i].set_title(s[i])
        fig.tight_layout()
        ax[4].legend()
        fig.savefig('results.png', dpi=300)