将图片读取并进行预处理后输入神经网络，网络会输出三个维度的特征，分别为框框信息，置信度和人脸关键点信息。之后对网络的输出进行解码。

一、推理代码

def detect_image(self, image):
    #---------------------------------------------------#
    #   对输入图像进行一个备份，后面用于绘图
    #---------------------------------------------------#
    old_image = image.copy()
    #---------------------------------------------------#
    #   把图像转换成numpy的形式
    #---------------------------------------------------#
    image = np.array(image,np.float32)
    #---------------------------------------------------#
    #   计算输入图片的高和宽
    #---------------------------------------------------#
    im_height, im_width, _ = np.shape(image)
    #---------------------------------------------------#
    #   计算scale，用于将获得的预测框转换成原图的高宽
    #---------------------------------------------------#
    scale = [
        np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]
    ]
    scale_for_landmarks = [
        np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
        np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
        np.shape(image)[1], np.shape(image)[0]
    ]
    #---------------------------------------------------------#
    #   letterbox_image可以给图像增加灰条，实现不失真的resize
    #---------------------------------------------------------#
    if self.letterbox_image:
        image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
    else:
        self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
    with torch.no_grad():
        #-----------------------------------------------------------#
        #   图片预处理，归一化。
        #-----------------------------------------------------------#
        image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)
        if self.cuda:
            self.anchors = self.anchors.cuda()
            image        = image.cuda()
        #---------------------------------------------------------#
        #   传入网络进行预测
        #---------------------------------------------------------#
        loc, conf, landms = self.net(image)
        #-----------------------------------------------------------#
        #   对预测框进行解码
        #-----------------------------------------------------------#
        boxes   = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
        #-----------------------------------------------------------#
        #   获得预测结果的置信度
        #-----------------------------------------------------------#
        conf    = conf.data.squeeze(0)[:, 1:2]
        #-----------------------------------------------------------#
        #   对人脸关键点进行解码
        #-----------------------------------------------------------#
        landms  = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
        #-----------------------------------------------------------#
        #   对人脸识别结果进行堆叠
        #-----------------------------------------------------------#
        boxes_conf_landms = torch.cat([boxes, conf, landms], -1)
        boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
        if len(boxes_conf_landms) <= 0:
            return old_image
        #---------------------------------------------------------#
        #   如果使用了letterbox_image的话，要把灰条的部分去除掉。
        #---------------------------------------------------------#
        if self.letterbox_image:
            boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
                np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
    boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
    boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks
    for b in boxes_conf_landms:
        text = "{:.4f}".format(b[4])
        b = list(map(int, b))
        #---------------------------------------------------#
        #   b[0]-b[3]为人脸框的坐标，b[4]为得分
        #---------------------------------------------------#
        cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
        cx = b[0]
        cy = b[1] + 12
        cv2.putText(old_image, text, (cx, cy),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
        print(b[0], b[1], b[2], b[3], b[4])
        #---------------------------------------------------#
        #   b[5]-b[14]为人脸关键点的坐标
        #---------------------------------------------------#
        cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
        cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
        cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
        cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
        cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
    return old_image

一、预测框解码

根据先验框编码的公式回推解码

#-----------------------------#
#   中心解码，宽高解码
#-----------------------------#
def decode(loc, priors, variances):
    boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
                    priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes

二、人脸关键点解码

def decode_landm(pre, priors, variances):
    landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
                        priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
                        ), dim=1)
    return landms

三、非极大值抑制

def non_max_suppression(detection, conf_thres=0.5, nms_thres=0.3):
    #------------------------------------------#
    #   找出该图片中得分大于门限函数的框。
    #   在进行重合框筛选前就
    #   进行得分的筛选可以大幅度减少框的数量。
    #------------------------------------------#
    mask        = detection[:, 4] >= conf_thres
    detection   = detection[mask]
    if len(detection) <= 0:
        return []
    #------------------------------------------#
    #   使用官方自带的非极大抑制会速度更快一些！
    #------------------------------------------#
    keep = nms(
        detection[:, :4],
        detection[:, 4],
        nms_thres
    )
    best_box = detection[keep]
    # best_box = []
    # scores = detection[:, 4]
    # # 2、根据得分对框进行从大到小排序。
    # arg_sort = np.argsort(scores)[::-1]
    # detection = detection[arg_sort]
    # while np.shape(detection)[0]>0:
    #     # 3、每次取出得分最大的框，计算其与其它所有预测框的重合程度，重合程度过大的则剔除。
    #     best_box.append(detection[0])
    #     if len(detection) == 1:
    #         break
    #     ious = iou(best_box[-1], detection[1:])
    #     detection = detection[1:][ious<nms_thres]
    return best_box.cpu().numpy()

CV 代码笔记

Retinaface人脸检测推理代码

一、推理代码

一、预测框解码

二、人脸关键点解码

三、非极大值抑制