一、Loss函数

  1. #----------------------------------------------------#
  2. # l代表的是,当前输入进来的有效特征层,是第几个有效特征层
  3. # input的shape为 bs, 3*(5+num_classes), 13, 13
  4. # bs, 3*(5+num_classes), 26, 26
  5. # bs, 3*(5+num_classes), 52, 52
  6. # targets代表的是真实框。
  7. #----------------------------------------------------#
  8. #--------------------------------#
  9. # 获得图片数量,特征层的高和宽
  10. # 13和13
  11. #--------------------------------#
  12. bs = input.size(0)
  13. in_h = input.size(2)
  14. in_w = input.size(3)
  15. #-----------------------------------------------------------------------#
  16. # 计算步长
  17. # 每一个特征点对应原来的图片上多少个像素点
  18. # 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
  19. # 如果特征层为26x26的话,一个特征点就对应原来的图片上的16个像素点
  20. # 如果特征层为52x52的话,一个特征点就对应原来的图片上的8个像素点
  21. # stride_h = stride_w = 32、16、8
  22. # stride_h和stride_w都是32。
  23. #-----------------------------------------------------------------------#
  24. stride_h = self.input_shape[0] / in_h
  25. stride_w = self.input_shape[1] / in_w
  26. #-------------------------------------------------#
  27. # 此时获得的scaled_anchors大小是相对于特征层的
  28. #-------------------------------------------------#
  29. scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
  30. #-----------------------------------------------#
  31. # 输入的input一共有三个,他们的shape分别是
  32. # bs, 3*(5+num_classes), 13, 13 => batch_size, 3, 13, 13, 5 + num_classes
  33. # batch_size, 3, 26, 26, 5 + num_classes
  34. # batch_size, 3, 52, 52, 5 + num_classes
  35. #-----------------------------------------------#
  36. prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
  37. #-----------------------------------------------#
  38. # 先验框的中心位置的调整参数
  39. #-----------------------------------------------#
  40. x = torch.sigmoid(prediction[..., 0])
  41. y = torch.sigmoid(prediction[..., 1])
  42. #-----------------------------------------------#
  43. # 先验框的宽高调整参数
  44. #-----------------------------------------------#
  45. w = prediction[..., 2]
  46. h = prediction[..., 3]
  47. #-----------------------------------------------#
  48. # 获得置信度,是否有物体
  49. #-----------------------------------------------#
  50. conf = torch.sigmoid(prediction[..., 4])
  51. #-----------------------------------------------#
  52. # 种类置信度
  53. #-----------------------------------------------#
  54. pred_cls = torch.sigmoid(prediction[..., 5:])
  55. #-----------------------------------------------#
  56. # 获得网络应该有的预测结果
  57. #-----------------------------------------------#
  58. y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w)
  59. #---------------------------------------------------------------#
  60. # 将预测结果进行解码,判断预测结果和真实值的重合程度
  61. # 如果重合程度过大则忽略,因为这些特征点属于预测比较准确的特征点
  62. # 作为负样本不合适
  63. #----------------------------------------------------------------#
  64. noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask)
  65. if self.cuda:
  66. y_true = y_true.type_as(x)
  67. noobj_mask = noobj_mask.type_as(x)
  68. box_loss_scale = box_loss_scale.type_as(x)
  69. #--------------------------------------------------------------------------#
  70. # box_loss_scale是真实框宽高的乘积,宽高均在0-1之间,因此乘积也在0-1之间。
  71. # 2-宽高的乘积代表真实框越大,比重越小,小框的比重更大。
  72. #--------------------------------------------------------------------------#
  73. box_loss_scale = 2 - box_loss_scale
  74. loss = 0
  75. obj_mask = y_true[..., 4] == 1
  76. n = torch.sum(obj_mask)
  77. if n != 0:
  78. if self.giou:
  79. #---------------------------------------------------------------#
  80. # 计算预测结果和真实结果的giou
  81. #----------------------------------------------------------------#
  82. giou = self.box_giou(pred_boxes, y_true[..., :4]).type_as(x)
  83. loss_loc = torch.mean((1 - giou)[obj_mask])
  84. else:
  85. #-----------------------------------------------------------#
  86. # 计算中心偏移情况的loss,使用BCELoss效果好一些
  87. #-----------------------------------------------------------#
  88. loss_x = torch.mean(self.BCELoss(x[obj_mask], y_true[..., 0][obj_mask]) * box_loss_scale[obj_mask])
  89. loss_y = torch.mean(self.BCELoss(y[obj_mask], y_true[..., 1][obj_mask]) * box_loss_scale[obj_mask])
  90. #-----------------------------------------------------------#
  91. # 计算宽高调整值的loss
  92. #-----------------------------------------------------------#
  93. loss_w = torch.mean(self.MSELoss(w[obj_mask], y_true[..., 2][obj_mask]) * box_loss_scale[obj_mask])
  94. loss_h = torch.mean(self.MSELoss(h[obj_mask], y_true[..., 3][obj_mask]) * box_loss_scale[obj_mask])
  95. loss_loc = (loss_x + loss_y + loss_h + loss_w) * 0.1
  96. loss_cls = torch.mean(self.BCELoss(pred_cls[obj_mask], y_true[..., 5:][obj_mask]))
  97. loss += loss_loc * self.box_ratio + loss_cls * self.cls_ratio
  98. loss_conf = torch.mean(self.BCELoss(conf, obj_mask.type_as(conf))[noobj_mask.bool() | obj_mask])
  99. loss += loss_conf * self.balance[l] * self.obj_ratio
  100. # if n != 0:
  101. # print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio)
  102. return loss

二、获取正样本

  1. def get_target(self, l, targets, anchors, in_h, in_w):
  2. #targets = [中心点x,中心点y,宽,高]/416(归一化处理过)
  3. #in_h,in_w为输入特征层的尺寸
  4. #-----------------------------------------------------#
  5. # 计算一共有多少张图片
  6. #-----------------------------------------------------#
  7. bs = len(targets)
  8. #-----------------------------------------------------#
  9. # 初始化矩阵,用于存放不包含物体的先验框(b, 3, 13, 13)
  10. # 每个网格有三个先验框
  11. #-----------------------------------------------------#
  12. noobj_mask = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
  13. #-----------------------------------------------------#
  14. # 存放目标大小相对于原图的比例,当做加权系数
  15. #-----------------------------------------------------#
  16. box_loss_scale = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
  17. #-----------------------------------------------------#
  18. # batch_size, 3, 13, 13, 5 + num_classes
  19. #-----------------------------------------------------#
  20. y_true = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad = False)
  21. for b in range(bs):
  22. if len(targets[b])==0:
  23. continue
  24. #用于存放一张图片中物体的信息
  25. batch_target = torch.zeros_like(targets[b])
  26. #-------------------------------------------------------#
  27. # 计算出真实框在特征层上的中心点
  28. #-------------------------------------------------------#
  29. batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
  30. batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
  31. batch_target[:, 4] = targets[b][:, 4]
  32. batch_target = batch_target.cpu()
  33. #-------------------------------------------------------#
  34. # 将真实框转换一个形式
  35. # 相当于(0,0,w,h),方便后面计算相当于远点的左上角右下角坐标
  36. # num_true_box, 4
  37. #-------------------------------------------------------#
  38. gt_box = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
  39. #-------------------------------------------------------#
  40. # 将先验框转换一个形式
  41. # 每个像素点有
  42. # 6, 4
  43. #-------------------------------------------------------#
  44. anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1))
  45. #-------------------------------------------------------#
  46. # 计算交并比
  47. # self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
  48. # best_ns:
  49. # [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
  50. #-------------------------------------------------------#
  51. best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes), dim=-1)
  52. for t, best_n in enumerate(best_ns):
  53. #判断重合度最大的框框是否属于当前特征层
  54. #因为计算iou的时候计算了三个特征层的9个框框,
  55. if best_n not in self.anchors_mask[l]:
  56. continue
  57. #----------------------------------------#
  58. # 判断这个先验框是当前特征点的哪一个先验框
  59. #----------------------------------------#
  60. k = self.anchors_mask[l].index(best_n)
  61. #----------------------------------------#
  62. # 获得真实框属于哪个网格点
  63. #----------------------------------------#
  64. i = torch.floor(batch_target[t, 0]).long()#中心点x坐标
  65. j = torch.floor(batch_target[t, 1]).long()#中心点y坐标
  66. #----------------------------------------#
  67. # 取出真实框的种类
  68. #----------------------------------------#
  69. c = batch_target[t, 4].long()
  70. #----------------------------------------#
  71. # noobj_mask代表无目标的特征点,无目标为1,有目标的为0
  72. #----------------------------------------#
  73. noobj_mask[b, k, j, i] = 0
  74. #----------------------------------------#
  75. # tx、ty代表第7个目标的中心调整参数的真实值
  76. #----------------------------------------#
  77. if not self.giou:
  78. #----------------------------------------#
  79. # tx、ty代表中心调整参数的真实值
  80. #----------------------------------------#
  81. y_true[b, k, j, i, 0] = batch_target[t, 0] - i.float()
  82. y_true[b, k, j, i, 1] = batch_target[t, 1] - j.float()
  83. y_true[b, k, j, i, 2] = math.log(batch_target[t, 2] / anchors[best_n][0])
  84. y_true[b, k, j, i, 3] = math.log(batch_target[t, 3] / anchors[best_n][1])
  85. y_true[b, k, j, i, 4] = 1
  86. y_true[b, k, j, i, c + 5] = 1
  87. else:
  88. #----------------------------------------#
  89. # tx、ty代表中心调整参数的真实值
  90. #----------------------------------------#
  91. y_true[b, k, j, i, 0] = batch_target[t, 0]
  92. y_true[b, k, j, i, 1] = batch_target[t, 1]
  93. y_true[b, k, j, i, 2] = batch_target[t, 2]
  94. y_true[b, k, j, i, 3] = batch_target[t, 3]
  95. y_true[b, k, j, i, 4] = 1
  96. y_true[b, k, j, i, c + 5] = 1
  97. #----------------------------------------#
  98. # 用于获得xywh的比例
  99. # 大目标loss权重小,小目标loss权重大
  100. # 真实框面积/整张图片的面积
  101. #----------------------------------------#
  102. box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
  103. return y_true, noobj_mask, box_loss_scale

三、计算真实框和先验框的IOU

  1. def calculate_iou(self, _box_a, _box_b):
  2. #-----------------------------------------------------------#
  3. # 计算真实框的左上角和右下角
  4. #-----------------------------------------------------------#
  5. b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
  6. b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
  7. #-----------------------------------------------------------#
  8. # 计算先验框获得的预测框的左上角和右下角
  9. #-----------------------------------------------------------#
  10. b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
  11. b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
  12. #-----------------------------------------------------------#
  13. # 将真实框和预测框都转化成左上角右下角的形式
  14. #-----------------------------------------------------------#
  15. box_a = torch.zeros_like(_box_a)
  16. box_b = torch.zeros_like(_box_b)
  17. box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
  18. box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
  19. #-----------------------------------------------------------#
  20. # A为真实框的数量,B为先验框的数量
  21. #-----------------------------------------------------------#
  22. A = box_a.size(0)#1
  23. B = box_b.size(0)#9
  24. #-----------------------------------------------------------#
  25. # 计算交的面积
  26. # 先将真实框维度[A,2]扩展为[A,B,2],预测框[B,2]扩展为[A,B,2]
  27. # 计算每个真实框和九个先验框的IOU
  28. #-----------------------------------------------------------#
  29. #交集右下角坐标
  30. max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
  31. #交集左上角坐标
  32. min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
  33. inter = torch.clamp((max_xy - min_xy), min=0)
  34. #交集的宽x高
  35. inter = inter[:, :, 0] * inter[:, :, 1]
  36. #-----------------------------------------------------------#
  37. # 计算预测框和真实框各自的面积
  38. # (x2-x1)*(y2-y1) 计算出面积后扩展为交集的维度(交集为真实框和所有先验框的交集)
  39. #-----------------------------------------------------------#
  40. area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
  41. area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
  42. #-----------------------------------------------------------#
  43. # 求IOU
  44. #-----------------------------------------------------------#
  45. union = area_a + area_b - inter
  46. return inter / union # [A,B]

四、获取负样本

  1. def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask):
  2. #-----------------------------------------------------#
  3. # 计算一共有多少张图片
  4. #-----------------------------------------------------#
  5. bs = len(targets)
  6. #-----------------------------------------------------#
  7. # 生成网格,先验框中心,网格左上角
  8. #-----------------------------------------------------#
  9. grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat(
  10. int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type_as(x)
  11. grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat(
  12. int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type_as(x)
  13. # 生成当前特征层先验框的宽高
  14. scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]
  15. anchor_w = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([0])).type_as(x)
  16. anchor_h = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([1])).type_as(x)
  17. # 生成当前特征层每个网格的先验框
  18. anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
  19. anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
  20. #-------------------------------------------------------#
  21. # 计算调整后的预测框中心与宽高(根据yolo的方式进行解码)
  22. #-------------------------------------------------------#
  23. pred_boxes_x = torch.unsqueeze(x + grid_x, -1)
  24. pred_boxes_y = torch.unsqueeze(y + grid_y, -1)
  25. pred_boxes_w = torch.unsqueeze(torch.exp(w) * anchor_w, -1)
  26. pred_boxes_h = torch.unsqueeze(torch.exp(h) * anchor_h, -1)
  27. pred_boxes = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim = -1)
  28. for b in range(bs):
  29. #-------------------------------------------------------#
  30. # 将预测结果转换一个形式
  31. # pred_boxes_for_ignore num_anchors, 4
  32. #-------------------------------------------------------#
  33. pred_boxes_for_ignore = pred_boxes[b].view(-1, 4)
  34. #-------------------------------------------------------#
  35. # 计算真实框,并把真实框转换成相对于特征层的大小
  36. # gt_box num_true_box, 4
  37. #-------------------------------------------------------#
  38. if len(targets[b]) > 0:
  39. batch_target = torch.zeros_like(targets[b])
  40. #-------------------------------------------------------#
  41. # 计算出正样本在特征层上的中心点
  42. #-------------------------------------------------------#
  43. batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
  44. batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
  45. batch_target = batch_target[:, :4].type_as(x)
  46. #-------------------------------------------------------#
  47. # 计算交并比
  48. # anch_ious num_true_box, num_anchors
  49. #-------------------------------------------------------#
  50. anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)
  51. #-------------------------------------------------------#
  52. # 每个先验框对应真实框的最大重合度
  53. # anch_ious_max num_anchors
  54. #-------------------------------------------------------#
  55. #返回最大元素在这一列的行索引
  56. anch_ious_max, _ = torch.max(anch_ious, dim = 0)
  57. anch_ious_max = anch_ious_max.view(pred_boxes[b].size()[:3])
  58. #重合度大于阈值的置零(不适合作为负样本)
  59. noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0
  60. return noobj_mask, pred_boxes

五、GIOU计算

  1. def box_giou(self, b1, b2):
  2. """
  3. 输入为:
  4. ----------
  5. b1: tensor, shape=(batch, anchor_num, feat_w, feat_h, 4), xywh
  6. b2: tensor, shape=(batch, anchor_num, feat_w, feat_h, 4), xywh
  7. 返回为:
  8. -------
  9. giou: tensor, shape=(batch, anchor_num, feat_w, feat_h, 1)
  10. """
  11. #----------------------------------------------------#
  12. # 求出预测框左上角右下角
  13. #----------------------------------------------------#
  14. b1_xy = b1[..., :2]
  15. b1_wh = b1[..., 2:4]
  16. b1_wh_half = b1_wh/2.
  17. b1_mins = b1_xy - b1_wh_half #左上角xy坐标
  18. b1_maxes = b1_xy + b1_wh_half #右下角xy坐标
  19. #----------------------------------------------------#
  20. # 求出真实框左上角右下角
  21. #----------------------------------------------------#
  22. b2_xy = b2[..., :2]
  23. b2_wh = b2[..., 2:4]
  24. b2_wh_half = b2_wh/2.
  25. b2_mins = b2_xy - b2_wh_half
  26. b2_maxes = b2_xy + b2_wh_half
  27. #----------------------------------------------------#
  28. # 求真实框和预测框所有的iou
  29. #----------------------------------------------------#
  30. intersect_mins = torch.max(b1_mins, b2_mins)
  31. intersect_maxes = torch.min(b1_maxes, b2_maxes)
  32. intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
  33. intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
  34. b1_area = b1_wh[..., 0] * b1_wh[..., 1]
  35. b2_area = b2_wh[..., 0] * b2_wh[..., 1]
  36. union_area = b1_area + b2_area - intersect_area
  37. iou = intersect_area / union_area
  38. #----------------------------------------------------#
  39. # 找到包裹两个框的最小框的左上角和右下角
  40. #----------------------------------------------------#
  41. enclose_mins = torch.min(b1_mins, b2_mins)
  42. enclose_maxes = torch.max(b1_maxes, b2_maxes)
  43. enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
  44. #----------------------------------------------------#
  45. # 计算对角线距离
  46. #----------------------------------------------------#
  47. enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
  48. giou = iou - (enclose_area - union_area) / enclose_area
  49. return giou

六、BCEloss(二分类交叉熵函数)

image.png

  1. def BCELoss(self, pred, target):
  2. epsilon = 1e-7
  3. pred = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
  4. output = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
  5. return output
  6. def clip_by_tensor(self, t, t_min, t_max):
  7. t = t.float()
  8. result = (t >= t_min).float() * t + (t < t_min).float() * t_min
  9. result = (result <= t_max).float() * result + (result > t_max).float() * t_max
  10. return result