将图片读取并进行预处理后输入神经网络,网络会输出三个维度的特征,分别为框框信息,置信度和人脸关键点信息。之后对网络的输出进行解码。
一、推理代码
def detect_image(self, image):
#---------------------------------------------------#
# 对输入图像进行一个备份,后面用于绘图
#---------------------------------------------------#
old_image = image.copy()
#---------------------------------------------------#
# 把图像转换成numpy的形式
#---------------------------------------------------#
image = np.array(image,np.float32)
#---------------------------------------------------#
# 计算输入图片的高和宽
#---------------------------------------------------#
im_height, im_width, _ = np.shape(image)
#---------------------------------------------------#
# 计算scale,用于将获得的预测框转换成原图的高宽
#---------------------------------------------------#
scale = [
np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]
]
scale_for_landmarks = [
np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
np.shape(image)[1], np.shape(image)[0]
]
#---------------------------------------------------------#
# letterbox_image可以给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
if self.letterbox_image:
image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])
else:
self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
with torch.no_grad():
#-----------------------------------------------------------#
# 图片预处理,归一化。
#-----------------------------------------------------------#
image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)
if self.cuda:
self.anchors = self.anchors.cuda()
image = image.cuda()
#---------------------------------------------------------#
# 传入网络进行预测
#---------------------------------------------------------#
loc, conf, landms = self.net(image)
#-----------------------------------------------------------#
# 对预测框进行解码
#-----------------------------------------------------------#
boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
#-----------------------------------------------------------#
# 获得预测结果的置信度
#-----------------------------------------------------------#
conf = conf.data.squeeze(0)[:, 1:2]
#-----------------------------------------------------------#
# 对人脸关键点进行解码
#-----------------------------------------------------------#
landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
#-----------------------------------------------------------#
# 对人脸识别结果进行堆叠
#-----------------------------------------------------------#
boxes_conf_landms = torch.cat([boxes, conf, landms], -1)
boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
if len(boxes_conf_landms) <= 0:
return old_image
#---------------------------------------------------------#
# 如果使用了letterbox_image的话,要把灰条的部分去除掉。
#---------------------------------------------------------#
if self.letterbox_image:
boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks
for b in boxes_conf_landms:
text = "{:.4f}".format(b[4])
b = list(map(int, b))
#---------------------------------------------------#
# b[0]-b[3]为人脸框的坐标,b[4]为得分
#---------------------------------------------------#
cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
cx = b[0]
cy = b[1] + 12
cv2.putText(old_image, text, (cx, cy),
cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
print(b[0], b[1], b[2], b[3], b[4])
#---------------------------------------------------#
# b[5]-b[14]为人脸关键点的坐标
#---------------------------------------------------#
cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
return old_image
一、预测框解码
根据先验框编码的公式回推解码
#-----------------------------#
# 中心解码,宽高解码
#-----------------------------#
def decode(loc, priors, variances):
boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
二、人脸关键点解码
def decode_landm(pre, priors, variances):
landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
), dim=1)
return landms
三、非极大值抑制
def non_max_suppression(detection, conf_thres=0.5, nms_thres=0.3):
#------------------------------------------#
# 找出该图片中得分大于门限函数的框。
# 在进行重合框筛选前就
# 进行得分的筛选可以大幅度减少框的数量。
#------------------------------------------#
mask = detection[:, 4] >= conf_thres
detection = detection[mask]
if len(detection) <= 0:
return []
#------------------------------------------#
# 使用官方自带的非极大抑制会速度更快一些!
#------------------------------------------#
keep = nms(
detection[:, :4],
detection[:, 4],
nms_thres
)
best_box = detection[keep]
# best_box = []
# scores = detection[:, 4]
# # 2、根据得分对框进行从大到小排序。
# arg_sort = np.argsort(scores)[::-1]
# detection = detection[arg_sort]
# while np.shape(detection)[0]>0:
# # 3、每次取出得分最大的框,计算其与其它所有预测框的重合程度,重合程度过大的则剔除。
# best_box.append(detection[0])
# if len(detection) == 1:
# break
# ious = iou(best_box[-1], detection[1:])
# detection = detection[1:][ious<nms_thres]
return best_box.cpu().numpy()