智能视觉模块 - 基于opencv dnn的人脸检测 - 《乐派Python速查手册》

读取模型
数据预处理
设置输入数据
执行预测
使用示例：打开摄像头，检测画面中的人脸
读取模型

net= cv2.dnn.readNetFromONNX(model_path)

从文件中加载模型
参数说明

model_path：模型文件路径
返回值

对应模型网络的对象
数据预处理

cv2.dnn.blobFromImage(img, scalefactor, size, mean)

输入数据预处理
参数说明

img：待处理图像
scalefactor：用来和图像相乘的比例因子
size：缩放尺寸
mean：用来和图像相减的均值
返回值

用于模型输入的blob对象
设置输入数据

net.setInput(blob)

准备模型网络的输入数据
参数说明

blob：输入blob对象
返回值

无
执行预测

net.forward(output_list)

执行模型推理，并根据输入数组返回对应的网络层
参数说明

output_list：需要返回的网络层
返回值

指定需要返回的网络层
使用示例：打开摄像头，检测画面中的人脸

# coding=utf-8
import os
import time
from math import ceil
import cv2
import numpy as np
Key_Esc = 27
image_mean = np.array([127, 127, 127])
image_std = 128.0
iou_threshold = 0.3
center_variance = 0.1
size_variance = 0.2
min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0],
             [64.0, 96.0], [128.0, 192.0, 256.0]]
strides = [8.0, 16.0, 32.0, 64.0]
def define_img_size(image_size):
    shrinkage_list = []
    feature_map_w_h_list = []
    for size in image_size:
        feature_map = [int(ceil(size / stride)) for stride in strides]
        feature_map_w_h_list.append(feature_map)
    for i in range(0, len(image_size)):
        shrinkage_list.append(strides)
    priors = generate_priors(feature_map_w_h_list,
                             shrinkage_list, image_size, min_boxes)
    return priors
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes):
    priors = []
    for index in range(0, len(feature_map_list[0])):
        scale_w = image_size[0] / shrinkage_list[0][index]
        scale_h = image_size[1] / shrinkage_list[1][index]
        for j in range(0, feature_map_list[1][index]):
            for i in range(0, feature_map_list[0][index]):
                x_center = (i + 0.5) / scale_w
                y_center = (j + 0.5) / scale_h
                for min_box in min_boxes[index]:
                    w = min_box / image_size[0]
                    h = min_box / image_size[1]
                    priors.append([
                        x_center,
                        y_center,
                        w,
                        h
                    ])
    print("priors nums:{}".format(len(priors)))
    return np.clip(priors, 0.0, 1.0)
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
    scores = box_scores[:, -1]
    boxes = box_scores[:, :-1]
    picked = []
    indexes = np.argsort(scores)
    indexes = indexes[-candidate_size:]
    while len(indexes) > 0:
        current = indexes[-1]
        picked.append(current)
        if 0 < top_k == len(picked) or len(indexes) == 1:
            break
        current_box = boxes[current, :]
        indexes = indexes[:-1]
        rest_boxes = boxes[indexes, :]
        iou = iou_of(
            rest_boxes,
            np.expand_dims(current_box, axis=0),
        )
        indexes = indexes[iou <= iou_threshold]
    return box_scores[picked, :]
def area_of(left_top, right_bottom):
    hw = np.clip(right_bottom - left_top, 0.0, None)
    return hw[..., 0] * hw[..., 1]
def iou_of(boxes0, boxes1, eps=1e-5):
    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
    return overlap_area / (area0 + area1 - overlap_area + eps)
def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
    boxes = boxes[0]
    confidences = confidences[0]
    picked_box_probs = []
    picked_labels = []
    for class_index in range(1, confidences.shape[1]):
        probs = confidences[:, class_index]
        mask = probs > prob_threshold
        probs = probs[mask]
        if probs.shape[0] == 0:
            continue
        subset_boxes = boxes[mask, :]
        box_probs = np.concatenate(
            [subset_boxes, probs.reshape(-1, 1)], axis=1)
        box_probs = hard_nms(box_probs,
                             iou_threshold=iou_threshold,
                             top_k=top_k,
                             )
        picked_box_probs.append(box_probs)
        picked_labels.extend([class_index] * box_probs.shape[0])
    if not picked_box_probs:
        return np.array([]), np.array([]), np.array([])
    picked_box_probs = np.concatenate(picked_box_probs)
    picked_box_probs[:, 0] *= width
    picked_box_probs[:, 1] *= height
    picked_box_probs[:, 2] *= width
    picked_box_probs[:, 3] *= height
    return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
def convert_locations_to_boxes(locations, priors, center_variance,
                               size_variance):
    if len(priors.shape) + 1 == len(locations.shape):
        priors = np.expand_dims(priors, 0)
    return np.concatenate([
        locations[..., :2] * center_variance *
        priors[..., 2:] + priors[..., :2],
        np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
    ], axis=len(locations.shape) - 1)
def center_form_to_corner_form(locations):
    return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
                           locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
class UltraFaceInference:
    def __init__(self):
        # 模型路径
        model_path = os.path.expanduser(
            '~') + '/Lepi_Data/ros/ultra_face_inference/models/onnx/version-RFB-320_simplified.onnx'
        # opencv dnn 模块加载onnx模型
        self.net = cv2.dnn.readNetFromONNX(model_path)  # onnx version
        # net = cv2.dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path)  # caffe model converted from onnx
        # 缩放尺寸，实际按照该尺寸进行检测，降低精度加快速度
        self.setResize(160, 120)
        # 检测阈值
        self.threshold = 0.7
        # 保存检测结果
        self.faceCount = 0
        self.faceDetections = []
        self.faceData = []
    def detect(self, img_ori):
        time_time = time.time()
        rect = cv2.resize(img_ori, (self.resized_witdh, self.resized_height))
        rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
        # 准备输入数据
        self.net.setInput(cv2.dnn.blobFromImage(
            rect, 1 / image_std, (self.resized_witdh, self.resized_height), 127))
        # 执行检测（前向传播）
        boxes, scores = self.net.forward(["boxes", "scores"])
        boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
        scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
        boxes = convert_locations_to_boxes(
            boxes, self.priors, center_variance, size_variance)
        boxes = center_form_to_corner_form(boxes)
        boxes, labels, probs = predict(
            img_ori.shape[1], img_ori.shape[0], scores, boxes, self.threshold)
        self.faceDetections = self.toFaceDetections(boxes, probs)
        self.faceData = self.toFaceData(self.faceDetections)
        for i in range(boxes.shape[0]):
            box = boxes[i, :]
        return boxes, labels, probs
    def drawBoxes(self, img_ori, boxes):
        # 绘制人脸框
        for i in range(boxes.shape[0]):
            box = boxes[i, :]
            cv2.rectangle(img_ori, (box[0], box[1]),
                          (box[2], box[3]), (0, 255, 0), 4)
        # print("inference time: {} s".format(round(time.time() - time_time, 4)))
        return img_ori
    def setThreshold(self, threshold):
        if threshold > 1:
            self.threshold = threshold / 100.0
        else:
            self.threshold = threshold
    def setResize(self, width, height):
        self.resized_witdh = width
        self.resized_height = height
        self.priors = define_img_size(
            [self.resized_witdh, self.resized_height])
    def toFaceDetections(self, face_locations=[], face_probs=[]):
        detections = []
        if len(face_probs) > 0 and len(face_probs) == len(face_locations):
            for (left, top, right, bottom), prob in zip(face_locations, face_probs):
                face_detection = (prob, [
                    left, top, right, bottom])
                detections.append(face_detection)
        elif len(face_locations) > 0:
            for (left, top, right, bottom) in face_locations:
                face_detection = (1, [left, top, right, bottom])
                detections.append(face_detection)
        return detections
    def toFaceData(self, faceDetections=[]):
        data = []
        for _, face in faceDetections:
            x = (face[0]+face[2])/2
            y = (face[1]+face[3])/2
            w = face[2]-face[0]
            h = face[3]-face[1]
            data.append([x, y, w, h])
        return data
if __name__ == '__main__':
    # inference()
    detector = UltraFaceInference()
    cap = cv2.VideoCapture(0)
    while True:
        # img_path = os.path.join(imgs_path, file_path)
        # img_ori = cv2.imread(img_path)
        ret, img_ori = cap.read()
        time_time = time.time()
        boxes, labels, probs = detector.detect(img_ori)
        img_ori = detector.drawBoxes(img_ori, boxes)
        print("inference time: {} s".format(round(time.time() - time_time, 4)))
        cv2.imshow("ultra_face_inference", np.rot90(
            cv2.resize(img_ori, (320, 240))))
        c = cv2.waitKey(2)
        if c == Key_Esc:
            break
    cv2.destroyAllWindows()
    cap.release()