image.png
    预测过程中的detec_imaget函数,函数如下面三图
    #保留原始图片信息
    old_width = image_shape[1]
    old_height = image_shape[0]
    old_image = copy.deepcopy(image)
    #对图片修剪,并进行归一化
    width,height = get_new_img_size(old_width,old_height)
    image = image.resize([width,height])
    photo = np.array(image,dtype = np.float32)/255photo = np.transpose(photo, (2, 0, 1))
    #把图片传入到模型中进行预测
    images = []
    images.append(photo)
    images = np.asarray(images)
    images = torch.from_numpy(images).cuda()
    image.pngimage.pngimage.png

    在该函数中有调用 DecodeBox()函数该函数如下两图
    #防止框超出边缘
    _# clip bounding box_cls_bbox[…, 0] = (cls_bbox[…, 0]).clamp(min=0, max=width)
    cls_bbox[…, 2] = (cls_bbox[…, 2]).clamp(min=0, max=width)
    cls_bbox[…, 1] = (cls_bbox[…, 1]).clamp(min=0, max=height)
    cls_bbox[…, 3] = (cls_bbox[…, 3]).clamp(min=0, max=height)
    #计算每一个类的概率(包含背景类 如:数据集有4个类,加上背景总共便为五个类)
    prob = F.softmax(torch.tensor(roi_scores), dim=1)
    raw_cls_bbox = cls_bbox.cpu().numpy()
    raw_prob = prob.cpu().numpy()
    取出属于这一类的框
    cls_bbox_l = raw_cls_bbox[:, l, :]
    prob_l = raw_prob[:, l]
    判断是否大于score_thresh大于保留
    mask = prob_l > score_thresh
    cls_bbox_l = cls_bbox_l[mask]

    image.pngimage.png
    画框的过程

    1. for i, c in enumerate(label):
    2. predicted_class = self.class_names[int(c)]
    3. score = conf[i]
    4. left, top, right, bottom = bbox[i]
    5. top = top - 5
    6. left = left - 5
    7. bottom = bottom + 5
    8. right = right + 5
    9. top = max(0, np.floor(top + 0.5).astype('int32'))
    10. left = max(0, np.floor(left + 0.5).astype('int32'))
    11. bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
    12. right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
    13. # 画框框
    14. label = '{} {:.2f}'.format(predicted_class, score)
    15. draw = ImageDraw.Draw(image)
    16. label_size = draw.textsize(label, font)
    17. label = label.encode('utf-8')
    18. print(label)
    19. if top - label_size[1] >= 0:
    20. text_origin = np.array([left, top - label_size[1]])
    21. else:
    22. text_origin = np.array([left, top + 1])
    23. for i in range(thickness):
    24. draw.rectangle(
    25. [left + i, top + i, right - i, bottom - i],
    26. outline=self.colors[int(c)])
    27. draw.rectangle(
    28. [tuple(text_origin), tuple(text_origin + label_size)],
    29. fill=self.colors[int(c)])
    30. draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
    31. del draw
    32. print("time:",time.time()-start_time)
    33. return image