将图片读取并进行预处理后输入神经网络,网络会输出三个维度的特征,分别为框框信息,置信度和人脸关键点信息。之后对网络的输出进行解码。

一、推理代码

  1. def detect_image(self, image):
  2. #---------------------------------------------------#
  3. # 对输入图像进行一个备份,后面用于绘图
  4. #---------------------------------------------------#
  5. old_image = image.copy()
  6. #---------------------------------------------------#
  7. # 把图像转换成numpy的形式
  8. #---------------------------------------------------#
  9. image = np.array(image,np.float32)
  10. #---------------------------------------------------#
  11. # 计算输入图片的高和宽
  12. #---------------------------------------------------#
  13. im_height, im_width, _ = np.shape(image)
  14. #---------------------------------------------------#
  15. # 计算scale,用于将获得的预测框转换成原图的高宽
  16. #---------------------------------------------------#
  17. scale = [
  18. np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]
  19. ]
  20. scale_for_landmarks = [
  21. np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
  22. np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
  23. np.shape(image)[1], np.shape(image)[0]
  24. ]
  25. #---------------------------------------------------------#
  26. # letterbox_image可以给图像增加灰条,实现不失真的resize
  27. #---------------------------------------------------------#
  28. if self.letterbox_image:
  29. image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
  30. else:
  31. self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
  32. with torch.no_grad():
  33. #-----------------------------------------------------------#
  34. # 图片预处理,归一化。
  35. #-----------------------------------------------------------#
  36. image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)
  37. if self.cuda:
  38. self.anchors = self.anchors.cuda()
  39. image = image.cuda()
  40. #---------------------------------------------------------#
  41. # 传入网络进行预测
  42. #---------------------------------------------------------#
  43. loc, conf, landms = self.net(image)
  44. #-----------------------------------------------------------#
  45. # 对预测框进行解码
  46. #-----------------------------------------------------------#
  47. boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
  48. #-----------------------------------------------------------#
  49. # 获得预测结果的置信度
  50. #-----------------------------------------------------------#
  51. conf = conf.data.squeeze(0)[:, 1:2]
  52. #-----------------------------------------------------------#
  53. # 对人脸关键点进行解码
  54. #-----------------------------------------------------------#
  55. landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
  56. #-----------------------------------------------------------#
  57. # 对人脸识别结果进行堆叠
  58. #-----------------------------------------------------------#
  59. boxes_conf_landms = torch.cat([boxes, conf, landms], -1)
  60. boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
  61. if len(boxes_conf_landms) <= 0:
  62. return old_image
  63. #---------------------------------------------------------#
  64. # 如果使用了letterbox_image的话,要把灰条的部分去除掉。
  65. #---------------------------------------------------------#
  66. if self.letterbox_image:
  67. boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
  68. np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
  69. boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
  70. boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks
  71. for b in boxes_conf_landms:
  72. text = "{:.4f}".format(b[4])
  73. b = list(map(int, b))
  74. #---------------------------------------------------#
  75. # b[0]-b[3]为人脸框的坐标,b[4]为得分
  76. #---------------------------------------------------#
  77. cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
  78. cx = b[0]
  79. cy = b[1] + 12
  80. cv2.putText(old_image, text, (cx, cy),
  81. cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
  82. print(b[0], b[1], b[2], b[3], b[4])
  83. #---------------------------------------------------#
  84. # b[5]-b[14]为人脸关键点的坐标
  85. #---------------------------------------------------#
  86. cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
  87. cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
  88. cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
  89. cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
  90. cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
  91. return old_image

一、预测框解码

根据先验框编码的公式回推解码

  1. #-----------------------------#
  2. # 中心解码,宽高解码
  3. #-----------------------------#
  4. def decode(loc, priors, variances):
  5. boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
  6. priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
  7. boxes[:, :2] -= boxes[:, 2:] / 2
  8. boxes[:, 2:] += boxes[:, :2]
  9. return boxes

二、人脸关键点解码

  1. def decode_landm(pre, priors, variances):
  2. landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
  3. priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
  4. priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
  5. priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
  6. priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
  7. ), dim=1)
  8. return landms

三、非极大值抑制

  1. def non_max_suppression(detection, conf_thres=0.5, nms_thres=0.3):
  2. #------------------------------------------#
  3. # 找出该图片中得分大于门限函数的框。
  4. # 在进行重合框筛选前就
  5. # 进行得分的筛选可以大幅度减少框的数量。
  6. #------------------------------------------#
  7. mask = detection[:, 4] >= conf_thres
  8. detection = detection[mask]
  9. if len(detection) <= 0:
  10. return []
  11. #------------------------------------------#
  12. # 使用官方自带的非极大抑制会速度更快一些!
  13. #------------------------------------------#
  14. keep = nms(
  15. detection[:, :4],
  16. detection[:, 4],
  17. nms_thres
  18. )
  19. best_box = detection[keep]
  20. # best_box = []
  21. # scores = detection[:, 4]
  22. # # 2、根据得分对框进行从大到小排序。
  23. # arg_sort = np.argsort(scores)[::-1]
  24. # detection = detection[arg_sort]
  25. # while np.shape(detection)[0]>0:
  26. # # 3、每次取出得分最大的框,计算其与其它所有预测框的重合程度,重合程度过大的则剔除。
  27. # best_box.append(detection[0])
  28. # if len(detection) == 1:
  29. # break
  30. # ious = iou(best_box[-1], detection[1:])
  31. # detection = detection[1:][ious<nms_thres]
  32. return best_box.cpu().numpy()