读取模型
net= cv2.dnn.readNetFromONNX(model_path)
参数说明
返回值
对应模型网络的对象
数据预处理
cv2.dnn.blobFromImage(img, scalefactor, size, mean)
参数说明
img:待处理图像
scalefactor:用来和图像相乘的比例因子
size:缩放尺寸
mean:用来和图像相减的均值
返回值
用于模型输入的blob对象
设置输入数据
net.setInput(blob)
参数说明
返回值
无
执行预测
net.forward(output_list)
参数说明
返回值
指定需要返回的网络层
使用示例:打开摄像头,检测画面中的人脸
# coding=utf-8
import os
import time
from math import ceil
import cv2
import numpy as np
Key_Esc = 27
image_mean = np.array([127, 127, 127])
image_std = 128.0
iou_threshold = 0.3
center_variance = 0.1
size_variance = 0.2
min_boxes = [[10.0, 16.0, 24.0], [32.0, 48.0],
[64.0, 96.0], [128.0, 192.0, 256.0]]
strides = [8.0, 16.0, 32.0, 64.0]
def define_img_size(image_size):
shrinkage_list = []
feature_map_w_h_list = []
for size in image_size:
feature_map = [int(ceil(size / stride)) for stride in strides]
feature_map_w_h_list.append(feature_map)
for i in range(0, len(image_size)):
shrinkage_list.append(strides)
priors = generate_priors(feature_map_w_h_list,
shrinkage_list, image_size, min_boxes)
return priors
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes):
priors = []
for index in range(0, len(feature_map_list[0])):
scale_w = image_size[0] / shrinkage_list[0][index]
scale_h = image_size[1] / shrinkage_list[1][index]
for j in range(0, feature_map_list[1][index]):
for i in range(0, feature_map_list[0][index]):
x_center = (i + 0.5) / scale_w
y_center = (j + 0.5) / scale_h
for min_box in min_boxes[index]:
w = min_box / image_size[0]
h = min_box / image_size[1]
priors.append([
x_center,
y_center,
w,
h
])
print("priors nums:{}".format(len(priors)))
return np.clip(priors, 0.0, 1.0)
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(current_box, axis=0),
)
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def area_of(left_top, right_bottom):
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
def iou_of(boxes0, boxes1, eps=1e-5):
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def predict(width, height, confidences, boxes, prob_threshold, iou_threshold=0.3, top_k=-1):
boxes = boxes[0]
confidences = confidences[0]
picked_box_probs = []
picked_labels = []
for class_index in range(1, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = boxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(box_probs,
iou_threshold=iou_threshold,
top_k=top_k,
)
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array([]), np.array([]), np.array([])
picked_box_probs = np.concatenate(picked_box_probs)
picked_box_probs[:, 0] *= width
picked_box_probs[:, 1] *= height
picked_box_probs[:, 2] *= width
picked_box_probs[:, 3] *= height
return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
def convert_locations_to_boxes(locations, priors, center_variance,
size_variance):
if len(priors.shape) + 1 == len(locations.shape):
priors = np.expand_dims(priors, 0)
return np.concatenate([
locations[..., :2] * center_variance *
priors[..., 2:] + priors[..., :2],
np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
], axis=len(locations.shape) - 1)
def center_form_to_corner_form(locations):
return np.concatenate([locations[..., :2] - locations[..., 2:] / 2,
locations[..., :2] + locations[..., 2:] / 2], len(locations.shape) - 1)
class UltraFaceInference:
def __init__(self):
# 模型路径
model_path = os.path.expanduser(
'~') + '/Lepi_Data/ros/ultra_face_inference/models/onnx/version-RFB-320_simplified.onnx'
# opencv dnn 模块加载onnx模型
self.net = cv2.dnn.readNetFromONNX(model_path) # onnx version
# net = cv2.dnn.readNetFromCaffe(args.caffe_prototxt_path, args.caffe_model_path) # caffe model converted from onnx
# 缩放尺寸,实际按照该尺寸进行检测,降低精度加快速度
self.setResize(160, 120)
# 检测阈值
self.threshold = 0.7
# 保存检测结果
self.faceCount = 0
self.faceDetections = []
self.faceData = []
def detect(self, img_ori):
time_time = time.time()
rect = cv2.resize(img_ori, (self.resized_witdh, self.resized_height))
rect = cv2.cvtColor(rect, cv2.COLOR_BGR2RGB)
# 准备输入数据
self.net.setInput(cv2.dnn.blobFromImage(
rect, 1 / image_std, (self.resized_witdh, self.resized_height), 127))
# 执行检测(前向传播)
boxes, scores = self.net.forward(["boxes", "scores"])
boxes = np.expand_dims(np.reshape(boxes, (-1, 4)), axis=0)
scores = np.expand_dims(np.reshape(scores, (-1, 2)), axis=0)
boxes = convert_locations_to_boxes(
boxes, self.priors, center_variance, size_variance)
boxes = center_form_to_corner_form(boxes)
boxes, labels, probs = predict(
img_ori.shape[1], img_ori.shape[0], scores, boxes, self.threshold)
self.faceDetections = self.toFaceDetections(boxes, probs)
self.faceData = self.toFaceData(self.faceDetections)
for i in range(boxes.shape[0]):
box = boxes[i, :]
return boxes, labels, probs
def drawBoxes(self, img_ori, boxes):
# 绘制人脸框
for i in range(boxes.shape[0]):
box = boxes[i, :]
cv2.rectangle(img_ori, (box[0], box[1]),
(box[2], box[3]), (0, 255, 0), 4)
# print("inference time: {} s".format(round(time.time() - time_time, 4)))
return img_ori
def setThreshold(self, threshold):
if threshold > 1:
self.threshold = threshold / 100.0
else:
self.threshold = threshold
def setResize(self, width, height):
self.resized_witdh = width
self.resized_height = height
self.priors = define_img_size(
[self.resized_witdh, self.resized_height])
def toFaceDetections(self, face_locations=[], face_probs=[]):
detections = []
if len(face_probs) > 0 and len(face_probs) == len(face_locations):
for (left, top, right, bottom), prob in zip(face_locations, face_probs):
face_detection = (prob, [
left, top, right, bottom])
detections.append(face_detection)
elif len(face_locations) > 0:
for (left, top, right, bottom) in face_locations:
face_detection = (1, [left, top, right, bottom])
detections.append(face_detection)
return detections
def toFaceData(self, faceDetections=[]):
data = []
for _, face in faceDetections:
x = (face[0]+face[2])/2
y = (face[1]+face[3])/2
w = face[2]-face[0]
h = face[3]-face[1]
data.append([x, y, w, h])
return data
if __name__ == '__main__':
# inference()
detector = UltraFaceInference()
cap = cv2.VideoCapture(0)
while True:
# img_path = os.path.join(imgs_path, file_path)
# img_ori = cv2.imread(img_path)
ret, img_ori = cap.read()
time_time = time.time()
boxes, labels, probs = detector.detect(img_ori)
img_ori = detector.drawBoxes(img_ori, boxes)
print("inference time: {} s".format(round(time.time() - time_time, 4)))
cv2.imshow("ultra_face_inference", np.rot90(
cv2.resize(img_ori, (320, 240))))
c = cv2.waitKey(2)
if c == Key_Esc:
break
cv2.destroyAllWindows()
cap.release()