跳至内容

追踪对象

利用Supervision的高级功能,通过无缝跟踪由多种目标检测、分割和关键点模型识别的对象,来增强您的视频分析能力。本综合指南将带您了解如何通过InferenceUltralytics包使用YOLOv8模型执行推理。随后,您将学习如何高效跟踪这些对象并为视频内容添加注释,以便进行更深入的分析。

目标检测与分割

为了让您更容易跟随我们的教程,请下载我们将用作示例的视频。您可以使用supervision[assets]扩展来完成此操作。

from supervision.assets import download_assets, VideoAssets

download_assets(VideoAssets.PEOPLE_WALKING)

运行推理

首先,您需要从目标检测或分割模型获取预测结果。本教程以YOLOv8模型为例进行说明。不过,Supervision具有通用性,可与多种模型兼容。如需了解如何接入其他模型,请参考此链接

我们将定义一个callback函数,该函数将通过获取模型预测结果并根据这些预测结果对视频帧进行标注来处理视频的每一帧。这个callback函数在本教程的后续步骤中至关重要,因为它将被修改以包含跟踪、标签和轨迹标注功能。

提示

支持目标检测和分割模型。试试 yolov8n.ptyolov8n-640-seg

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    return box_annotator.annotate(frame.copy(), detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    detections = sv.Detections.from_inference(results)
    return box_annotator.annotate(frame.copy(), detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)

追踪

在运行推理并获得预测结果后,下一步是跟踪视频中检测到的对象。利用Supervision的sv.ByteTrack功能,每个检测到的对象都会被分配一个唯一的追踪ID,从而能够持续跟踪对象在不同帧之间的运动路径。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    detections = tracker.update_with_detections(detections)
    return box_annotator.annotate(frame.copy(), detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    detections = sv.Detections.from_inference(results)
    detections = tracker.update_with_detections(detections)
    return box_annotator.annotate(frame.copy(), detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)

用追踪ID标注视频

在视频中标注跟踪ID有助于清晰区分和追踪每个对象。借助Supervision中的sv.LabelAnnotator,我们可以在检测到的对象上叠加跟踪ID和类别标签,从而直观展示每个对象的类别及其唯一标识符。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"#{tracker_id} {results.names[class_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    return label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    detections = sv.Detections.from_inference(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"#{tracker_id} {results.names[class_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    return label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)

使用轨迹标注视频

在视频中添加轨迹线涉及叠加检测到的物体的历史路径。这一功能由sv.TraceAnnotator提供支持,可以可视化物体的运动轨迹,有助于理解视频中物体的运动模式和交互情况。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"#{tracker_id} {results.names[class_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(model_id="yolov8n-640", api_key=<ROBOFLOW API KEY>)
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    detections = sv.Detections.from_inference(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"#{tracker_id} {results.names[class_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    annotated_frame = label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="people-walking.mp4",
    target_path="result.mp4",
    callback=callback
)

关键点

模型不仅限于目标检测和分割。关键点检测允许对身体关节和连接进行详细分析,对于人体姿态估计等应用尤其有价值。本节将介绍关键点跟踪。我们将逐步讲解如何标注关键点,将其转换为与ByteTrack兼容的边界框检测,并应用检测平滑以增强稳定性。

为了让您更容易跟随我们的教程,让我们下载将用作示例的视频。您可以使用supervision[assets]扩展来完成此操作。

from supervision.assets import download_assets, VideoAssets

download_assets(VideoAssets.SKIING)

关键点检测

首先,您需要从关键点检测模型获取预测结果。本教程以YOLOv8模型为例。不过,Supervision具有通用性,可与多种模型兼容。如需了解如何接入其他模型,请参考此链接

我们将定义一个callback函数,该函数将通过获取模型预测结果来处理视频的每一帧,然后基于这些预测结果对帧进行标注。

让我们立即使用EdgeAnnotatorVertexAnnotator来可视化结果。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    key_points = sv.KeyPoints.from_ultralytics(results)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    return vertex_annotator.annotate(
        annotated_frame, key_points=key_points)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(
    model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    key_points = sv.KeyPoints.from_inference(results)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    return vertex_annotator.annotate(
        annotated_frame, key_points=key_points)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)

转换为检测结果

关键点追踪目前支持通过将KeyPoints转换为Detections来实现。这可以通过KeyPoints.as_detections()函数完成。

让我们转换为检测结果并使用我们的BoxAnnotator来可视化结果。

提示

您可以使用selected_keypoint_indices参数来指定要转换的关键点子集。这在某些关键点可能被遮挡时非常有用。例如:一个人可能会挥动手臂,导致肘部有时会被躯干遮挡。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    key_points = sv.KeyPoints.from_ultralytics(results)
    detections = key_points.as_detections()

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    return box_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(
    model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    key_points = sv.KeyPoints.from_inference(results)
    detections = key_points.as_detections()

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    return box_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)

关键点追踪

现在我们有了一个Detections对象,可以在整个视频中对其进行跟踪。利用Supervision的sv.ByteTrack功能,每个检测到的对象都会被分配一个唯一的跟踪ID,从而能够跨不同帧持续追踪该对象的运动路径。我们将使用TraceAnnotator来可视化结果。

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

tracker = sv.ByteTrack()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    key_points = sv.KeyPoints.from_ultralytics(results)
    detections = key_points.as_detections()
    detections = tracker.update_with_detections(detections)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    annotated_frame = box_annotator.annotate(
        annotated_frame, detections=detections)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(
    model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

tracker = sv.ByteTrack()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    key_points = sv.KeyPoints.from_inference(results)
    detections = key_points.as_detections()
    detections = tracker.update_with_detections(detections)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    annotated_frame = box_annotator.annotate(
        annotated_frame, detections=detections)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)

额外功能:平滑处理

我们可以在此停止,因为我们已经成功跟踪了关键点模型检测到的对象。不过,我们可以通过应用DetectionsSmoother进一步增强边界框的稳定性。这个工具通过平滑跨帧的边界框坐标来帮助稳定边界框,使用起来非常简单:

import numpy as np
import supervision as sv
from ultralytics import YOLO

model = YOLO("yolov8m-pose.pt")
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

tracker = sv.ByteTrack()
smoother = sv.DetectionsSmoother()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    key_points = sv.KeyPoints.from_ultralytics(results)
    detections = key_points.as_detections()
    detections = tracker.update_with_detections(detections)
    detections = smoother.update_with_detections(detections)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    annotated_frame = box_annotator.annotate(
        annotated_frame, detections=detections)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)
import numpy as np
import supervision as sv
from inference.models.utils import get_roboflow_model

model = get_roboflow_model(
    model_id="yolov8m-pose-640", api_key=<ROBOFLOW API KEY>)
edge_annotator = sv.EdgeAnnotator()
vertex_annotator = sv.VertexAnnotator()
box_annotator = sv.BoxAnnotator()

tracker = sv.ByteTrack()
smoother = sv.DetectionsSmoother()
trace_annotator = sv.TraceAnnotator()

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model.infer(frame)[0]
    key_points = sv.KeyPoints.from_inference(results)
    detections = key_points.as_detections()
    detections = tracker.update_with_detections(detections)
    detections = smoother.update_with_detections(detections)

    annotated_frame = edge_annotator.annotate(
        frame.copy(), key_points=key_points)
    annotated_frame = vertex_annotator.annotate(
        annotated_frame, key_points=key_points)
    annotated_frame = box_annotator.annotate(
        annotated_frame, detections=detections)
    return trace_annotator.annotate(
        annotated_frame, detections=detections)

sv.process_video(
    source_path="skiing.mp4",
    target_path="result.mp4",
    callback=callback
)

这份结构化指南将详细介绍如何利用Supervision的各项功能(包括目标追踪和轨迹标注)来高效标注视频。

评论