检测与标注
Supervision 提供了一个无缝的流程,用于标注由各种物体检测和分割模型生成的预测结果。本指南展示了如何使用 Inference、Ultralytics 或 Transformers 等包进行推理。接着,你将学习如何将这些预测导入 Supervision 并用于标注原始图像。

运行检测
首先,你需要从你的物体检测或分割模型中获取预测结果。
“Inference”
import cv2from inference import get_modelmodel = get_model(model_id="yolov8n-640")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model.infer(image)[0]
“Ultralytics”
import cv2from ultralytics import YOLOmodel = YOLO("yolov8n.pt")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model(image)[0]
“Transformers”
import torchfrom PIL import Imagefrom transformers import DetrImageProcessor, DetrForObjectDetectionprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")image = Image.open(<SOURCE_IMAGE_PATH>)inputs = processor(images=image, return_tensors="pt")with torch.no_grad():outputs = model(**inputs)width, height = image.sizetarget_size = torch.tensor([[height, width]])results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_size)[0]
将预测结果加载到 Supervision
现在我们有了模型的预测结果,可以将它们加载到 Supervision 中。
“Inference”
我们可以使用 sv.Detections.from_inference 方法加载,该方法支持检测和分割模型的结果。
import cv2import supervision as svfrom inference import get_modelmodel = get_model(model_id="yolov8n-640")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model.infer(image)[0]detections = sv.Detections.from_inference(results)
“Ultralytics”
使用 sv.Detections.from_ultralytics 方法加载,同样支持检测和分割模型结果。
import cv2import supervision as svfrom ultralytics import YOLOmodel = YOLO("yolov8n.pt")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model(image)[0]detections = sv.Detections.from_ultralytics(results)
“Transformers”
使用 sv.Detections.from_transformers 方法加载,支持检测和分割模型结果。
import torchimport supervision as svfrom PIL import Imagefrom transformers import DetrImageProcessor, DetrForObjectDetectionprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")image = Image.open(<SOURCE_IMAGE_PATH>)inputs = processor(images=image, return_tensors="pt")with torch.no_grad():outputs = model(**inputs)width, height = image.sizetarget_size = torch.tensor([[height, width]])results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_size)[0]detections = sv.Detections.from_transformers(transformers_results=results,id2label=model.config.id2label)
你也可以使用以下方法加载来自其他计算机视觉框架和库的预测:
from_deepsparse(Deepsparse)from_detectron2(Detectron2)from_mmdetection(MMDetection)from_sam(Segment Anything Model)from_yolo_nas(YOLO-NAS)
使用检测结果标注图像
最后,我们用预测结果对图像进行标注。因为我们使用的是物体检测模型,我们将用到 sv.BoxAnnotator 和 sv.LabelAnnotator 这两个类。
“Inference”
import cv2import supervision as svfrom inference import get_modelmodel = get_model(model_id="yolov8n-640")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model.infer(image)[0]detections = sv.Detections.from_inference(results)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)
“Ultralytics”
import cv2import supervision as svfrom ultralytics import YOLOmodel = YOLO("yolov8n.pt")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model(image)[0]detections = sv.Detections.from_ultralytics(results)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)
“Transformers”
import torchimport supervision as svfrom PIL import Imagefrom transformers import DetrImageProcessor, DetrForObjectDetectionprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")image = Image.open(<SOURCE_IMAGE_PATH>)inputs = processor(images=image, return_tensors="pt")with torch.no_grad():outputs = model(**inputs)width, height = image.sizetarget_size = torch.tensor([[height, width]])results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_size)[0]detections = sv.Detections.from_transformers(transformers_results=results,id2label=model.config.id2label)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)

显示自定义标签
默认情况下,sv.LabelAnnotator 会使用检测结果的 class_name(如果有)或 class_id 作为标签。你可以通过向 annotate 方法传入自定义的 labels 列表来覆盖这一行为。
“Inference”
import cv2import supervision as svfrom inference import get_modelmodel = get_model(model_id="yolov8n-640")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model.infer(image)[0]detections = sv.Detections.from_inference(results)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()labels = [f"{class_name} {confidence:.2f}"for class_name, confidencein zip(detections['class_name'], detections.confidence)]annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)
“Ultralytics”
import cv2import supervision as svfrom ultralytics import YOLOmodel = YOLO("yolov8n.pt")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model(image)[0]detections = sv.Detections.from_ultralytics(results)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()labels = [f"{class_name} {confidence:.2f}"for class_name, confidencein zip(detections['class_name'], detections.confidence)]annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)
“Transformers”
import torchimport supervision as svfrom PIL import Imagefrom transformers import DetrImageProcessor, DetrForObjectDetectionprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")image = Image.open(<SOURCE_IMAGE_PATH>)inputs = processor(images=image, return_tensors="pt")with torch.no_grad():outputs = model(**inputs)width, height = image.sizetarget_size = torch.tensor([[height, width]])results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_size)[0]detections = sv.Detections.from_transformers(transformers_results=results,id2label=model.config.id2label)box_annotator = sv.BoxAnnotator()label_annotator = sv.LabelAnnotator()labels = [f"{class_name} {confidence:.2f}"for class_name, confidencein zip(detections['class_name'], detections.confidence)]annotated_image = box_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)

使用分割结果标注图像
如果你使用的是分割模型,sv.MaskAnnotator 可以替代 sv.BoxAnnotator,用来绘制掩码而不是边框。
“Inference”
import cv2import supervision as svfrom inference import get_modelmodel = get_model(model_id="yolov8n-seg-640")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model.infer(image)[0]detections = sv.Detections.from_inference(results)mask_annotator = sv.MaskAnnotator()label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)annotated_image = mask_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)
“Ultralytics”
import cv2import supervision as svfrom ultralytics import YOLOmodel = YOLO("yolov8n-seg.pt")image = cv2.imread(<SOURCE_IMAGE_PATH>)results = model(image)[0]detections = sv.Detections.from_ultralytics(results)mask_annotator = sv.MaskAnnotator()label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)annotated_image = mask_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections)
“Transformers”
import torchimport supervision as svfrom PIL import Imagefrom transformers import DetrImageProcessor, DetrForSegmentationprocessor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50-panoptic")model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")image = Image.open(<SOURCE_IMAGE_PATH>)inputs = processor(images=image, return_tensors="pt")with torch.no_grad():outputs = model(**inputs)width, height = image.sizetarget_size = torch.tensor([[height, width]])results = processor.post_process_segmentation(outputs=outputs, target_sizes=target_size)[0]detections = sv.Detections.from_transformers(transformers_results=results,id2label=model.config.id2label)mask_annotator = sv.MaskAnnotator()label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER_OF_MASS)labels = [f"{class_name} {confidence:.2f}"for class_name, confidencein zip(detections['class_name'], detections.confidence)]annotated_image = mask_annotator.annotate(scene=image, detections=detections)annotated_image = label_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)

