torch.profiler.profiler 的源代码

import gzip
import json
import os
import tempfile
from abc import ABC, abstractmethod
from enum import Enum
from functools import partial
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
from warnings import warn

from typing_extensions import Self

import torch
import torch.autograd.profiler as prof
from torch._C import _get_privateuse1_backend_name
from torch._C._profiler import (
    _add_execution_trace_observer,
    _disable_execution_trace_observer,
    _enable_execution_trace_observer,
    _ExperimentalConfig,
    _remove_execution_trace_observer,
)
from torch.autograd import kineto_available, ProfilerActivity
from torch.profiler._memory_profiler import MemoryProfile, MemoryProfileTimeline


__all__ = [
    "supported_activities",
    "ProfilerAction",
    "schedule",
    "tensorboard_trace_handler",
    "profile",
    "ExecutionTraceObserver",
]
PROFILER_STEP_NAME = "ProfilerStep"


def supported_activities():
    """
    返回一组支持的分析器跟踪活动。

    注意：分析器使用CUPTI库来跟踪设备上的CUDA内核。
    如果启用了CUDA但CUPTI不可用，传递
    ``ProfilerActivity.CUDA``给分析器会导致使用旧的CUDA
    分析代码（与旧的``torch.autograd.profiler``相同）。
    这反过来会导致在分析器表输出中包含CUDA时间，
    但不会在JSON跟踪中包含。
    """
    return torch.autograd._supported_activities()


class _ITraceObserver(ABC):
    """Trace观察器的抽象接口。
    这满足3种方法：start、stop和cleanup"""

    @abstractmethod
    def start(self):
        pass

    @abstractmethod
    def stop(self):
        pass

    @abstractmethod
    def cleanup(self):
        pass


[docs]class _KinetoProfile:
    """低级分析器包装autograd分析器

    参数:
        activities (iterable): 用于分析的活动组（CPU、CUDA）列表，支持的值:
            ``torch.profiler.ProfilerActivity.CPU``, ``torch.profiler.ProfilerActivity.CUDA``。
            默认值: ProfilerActivity.CPU和（当可用时）ProfilerActivity.CUDA。
        record_shapes (bool): 保存关于操作符输入形状的信息。
        profile_memory (bool): 跟踪张量内存的分配/释放（参见``export_memory_timeline``
            了解更多详情）。
        with_stack (bool): 记录操作的源信息（文件和行号）。
        with_flops (bool): 使用公式估计特定操作符的FLOPS
            （矩阵乘法和2D卷积）。
        with_modules (bool): 记录与操作调用栈对应的模块层次结构（包括函数名称）。
            例如：如果模块A的前向调用的
            模块B的前向包含一个aten::add操作，
            那么aten::add的模块层次结构是A.B
            注意：目前仅支持TorchScript模型，
            不支持eager模式模型。
        experimental_config (_ExperimentalConfig) : 一组实验性选项
            用于Kineto库功能。注意，不保证向后兼容性。
        execution_trace_observer (ExecutionTraceObserver) : 一个PyTorch执行跟踪观察器对象。
            `PyTorch执行跟踪 `__ 提供基于图的
            AI/ML工作负载表示，并支持回放基准测试、模拟器和仿真器。
            当包含此参数时，观察器的start()和stop()将在
            与PyTorch分析器相同的时间窗口内被调用。

    .. 注意::
        此API是实验性的，未来可能会更改。

        启用形状和堆栈跟踪会导致额外的开销。
        当record_shapes=True时，分析器将暂时持有对张量的引用；
        这可能会进一步阻止某些依赖引用计数的优化并引入
        额外的张量复制。
    """

    def __init__(
        self,
        *,
        activities: Optional[Iterable[ProfilerActivity]] = None,
        record_shapes: bool = False,
        profile_memory: bool = False,
        with_stack: bool = False,
        with_flops: bool = False,
        with_modules: bool = False,
        experimental_config: Optional[_ExperimentalConfig] = None,
        execution_trace_observer: Optional[_ITraceObserver] = None,
    ):
        self.activities = set(activities) if activities else supported_activities()
        self.record_shapes = record_shapes
        self.with_flops = with_flops
        self.profile_memory = profile_memory
        self.with_stack = with_stack
        self.with_modules = with_modules
        self.experimental_config = experimental_config
        self.execution_trace_observer = execution_trace_observer
        self.profiler: Optional[prof.profile] = None
        self.mem_tl: Optional[MemoryProfileTimeline] = None
        self.use_device = None
        privateuse1_backend = _get_privateuse1_backend_name()
        if privateuse1_backend != "privateuseone":
            self.use_device = privateuse