将图片读取并进行预处理后输入神经网络,网络会输出三个维度的特征,分别为框框信息,置信度和人脸关键点信息。之后对网络的输出进行解码。
一、推理代码
def detect_image(self, image):#---------------------------------------------------## 对输入图像进行一个备份,后面用于绘图#---------------------------------------------------#old_image = image.copy()#---------------------------------------------------## 把图像转换成numpy的形式#---------------------------------------------------#image = np.array(image,np.float32)#---------------------------------------------------## 计算输入图片的高和宽#---------------------------------------------------#im_height, im_width, _ = np.shape(image)#---------------------------------------------------## 计算scale,用于将获得的预测框转换成原图的高宽#---------------------------------------------------#scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],np.shape(image)[1], np.shape(image)[0]]#---------------------------------------------------------## letterbox_image可以给图像增加灰条,实现不失真的resize#---------------------------------------------------------#if self.letterbox_image:image = letterbox_image(image, [self.input_shape[1], self.input_shape[0]])else:self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()with torch.no_grad():#-----------------------------------------------------------## 图片预处理,归一化。#-----------------------------------------------------------#image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)if self.cuda:self.anchors = self.anchors.cuda()image = image.cuda()#---------------------------------------------------------## 传入网络进行预测#---------------------------------------------------------#loc, conf, landms = self.net(image)#-----------------------------------------------------------## 对预测框进行解码#-----------------------------------------------------------#boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])#-----------------------------------------------------------## 获得预测结果的置信度#-----------------------------------------------------------#conf = conf.data.squeeze(0)[:, 1:2]#-----------------------------------------------------------## 对人脸关键点进行解码#-----------------------------------------------------------#landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])#-----------------------------------------------------------## 对人脸识别结果进行堆叠#-----------------------------------------------------------#boxes_conf_landms = torch.cat([boxes, conf, landms], -1)boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)if len(boxes_conf_landms) <= 0:return old_image#---------------------------------------------------------## 如果使用了letterbox_image的话,要把灰条的部分去除掉。#---------------------------------------------------------#if self.letterbox_image:boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scaleboxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarksfor b in boxes_conf_landms:text = "{:.4f}".format(b[4])b = list(map(int, b))#---------------------------------------------------## b[0]-b[3]为人脸框的坐标,b[4]为得分#---------------------------------------------------#cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)cx = b[0]cy = b[1] + 12cv2.putText(old_image, text, (cx, cy),cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))print(b[0], b[1], b[2], b[3], b[4])#---------------------------------------------------## b[5]-b[14]为人脸关键点的坐标#---------------------------------------------------#cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)return old_image
一、预测框解码
根据先验框编码的公式回推解码
#-----------------------------## 中心解码,宽高解码#-----------------------------#def decode(loc, priors, variances):boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)boxes[:, :2] -= boxes[:, 2:] / 2boxes[:, 2:] += boxes[:, :2]return boxes
二、人脸关键点解码
def decode_landm(pre, priors, variances):landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],), dim=1)return landms
三、非极大值抑制
def non_max_suppression(detection, conf_thres=0.5, nms_thres=0.3):#------------------------------------------## 找出该图片中得分大于门限函数的框。# 在进行重合框筛选前就# 进行得分的筛选可以大幅度减少框的数量。#------------------------------------------#mask = detection[:, 4] >= conf_thresdetection = detection[mask]if len(detection) <= 0:return []#------------------------------------------## 使用官方自带的非极大抑制会速度更快一些!#------------------------------------------#keep = nms(detection[:, :4],detection[:, 4],nms_thres)best_box = detection[keep]# best_box = []# scores = detection[:, 4]# # 2、根据得分对框进行从大到小排序。# arg_sort = np.argsort(scores)[::-1]# detection = detection[arg_sort]# while np.shape(detection)[0]>0:# # 3、每次取出得分最大的框,计算其与其它所有预测框的重合程度,重合程度过大的则剔除。# best_box.append(detection[0])# if len(detection) == 1:# break# ious = iou(best_box[-1], detection[1:])# detection = detection[1:][ious<nms_thres]return best_box.cpu().numpy()
