torch.ao.quantization.backend_config.backend_config 的源代码

from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional, Type, Union

import torch
from torch.ao.quantization.utils import Pattern
from enum import Enum


__all__ = [
    "BackendConfig",
    "BackendPatternConfig",
    "DTypeConfig",
    "DTypeWithConstraints",
    "ObservationType",
]


# DTypeConfig 字典键
INPUT_DTYPE_DICT_KEY = "input_dtype"
OUTPUT_DTYPE_DICT_KEY = "output_dtype"
WEIGHT_DTYPE_DICT_KEY = "weight_dtype"
BIAS_DTYPE_DICT_KEY = "bias_dtype"
IS_DYNAMIC_DICT_KEY = "is_dynamic"

# BackendConfig 字典键
NAME_DICT_KEY = "name"
CONFIGS_DICT_KEY = "configs"

# BackendPatternConfig 字典键
PATTERN_DICT_KEY = "pattern"
PATTERN_COMPLEX_FORMAT_DICT_KEY = "pattern_complex_format"
OBSERVATION_TYPE_DICT_KEY = "observation_type"
DTYPE_CONFIGS_DICT_KEY = "dtype_configs"
ROOT_MODULE_DICT_KEY = "root_module"
QAT_MODULE_DICT_KEY = "qat_module"
REFERENCE_QUANTIZED_MODULE_DICT_KEY = "reference_quantized_module_for_root"
FUSED_MODULE_DICT_KEY = "fused_module"
FUSER_METHOD_DICT_KEY = "fuser_method"
ROOT_NODE_GETTER_DICT_KEY = "root_node_getter"
EXTRA_INPUTS_GETTER_DICT_KEY = "extra_inputs_getter"
NUM_TENSOR_ARGS_TO_OBSERVATION_TYPE_DICT_KEY = "num_tensor_args_to_observation_type"
INPUT_TYPE_TO_INDEX_DICT_KEY = "input_type_to_index"


# TODO: 也许将这个重命名为与观察者无关的东西
# 例如 QParamsType
[docs]class ObservationType(Enum):
    """ 一个枚举，表示操作符/操作符模式的不同观察方式 """

    OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT = 0
    """ 这意味着输入和输出使用不同的观察者进行观察，基于 qconfig.activation
    示例：conv, linear, softmax """

    OUTPUT_SHARE_OBSERVER_WITH_INPUT = 1
    """ 这意味着输出将使用与输入相同的观察者实例，基于 qconfig.activation
    示例：torch.cat, maxpool """

    INPUT_OUTPUT_NOT_OBSERVED = 2
    """ 这意味着输入和输出从未被观察
    示例：x.shape, x.size """


[docs]@dataclass
class DTypeWithConstraints:
    """
    用于指定给定数据类型的附加约束的配置，例如量化值范围、比例值范围和固定量化参数，
    在 :class:`~torch.ao.quantization.backend_config.DTypeConfig` 中使用。

    目前支持的约束有：

    * `quant_min_lower_bound` 和 `quant_max_upper_bound`：最小和最大量化值的下限和上限。
      如果 QConfig 的 `quant_min` 和 `quant_max` 超出此范围，则 QConfig 将被忽略。

    * `scale_min_lower_bound` 和 `scale_max_upper_bound`：最小和最大比例值的下限和上限。
      如果 QConfig 的最小比例值（当前暴露为 `eps`）低于下限，则 QConfig 将被忽略。
      注意，上限当前未被强制执行。

    * `scale_exact_match` 和 `zero_point_exact_match`：比例和零点的精确匹配要求，
      用于具有固定量化参数的操作符，如 sigmoid 和 tanh。如果 QConfig 中指定的观察者
      既不是 `FixedQParamsObserver` 也不是 `FixedQParamsFakeQuantize`，或者
      量化参数不匹配，则 QConfig 将被忽略。
    """
    dtype: Optional[torch.dtype] = None
    quant_min_lower_bound: Union[int, float, None] = None
    quant_max_upper_bound: Union[int, float, None] = None
    scale_min_lower_bound: Union[int, float, None] = None
    scale_max_upper_bound: Union[int, float, None] = None
    scale_exact_match: Optional[float] = None
    zero_point_exact_match: Optional[int] = None


[docs]@dataclass
class DTypeConfig:
    """
    指定在参考模型规范中作为参数传递给量化操作的受支持数据类型的配置对象，
    用于输入和输出激活、权重和偏置。

    例如，考虑以下参考模型：

      quant1 - [dequant1 - fp32_linear - quant2] - dequant2

    方括号中的模式指的是静态量化线性操作的参考模式。在 DTypeConfig 中将输入数据类型
    设置为 `torch.quint8` 意味着我们将 `torch.quint8` 作为数据类型参数传递给第一个
    量化操作（quant1）。同样，将输出数据类型设置为 `torch.quint8` 意味着我们将
    `torch.quint8` 作为数据类型参数传递给第二个量化操作（quant2）。

    请注意，此处的数据类型不指操作的接口数据类型。例如，此处的“输入数据类型”不是
    传递给量化线性操作的输入张量的数据类型。尽管它仍然可以与接口数据类型相同，但这
    并不总是如此，例如在动态量化中，接口数据类型是 fp32，但 DTypeConfig 中指定的
    “输入数据类型”仍然是 quint8。此处的数据类型语义与观察者中指定的数据类型语义相同。

    这些数据类型将与用户 QConfig 中指定的数据类型进行匹配。如果有匹配，并且 QConfig
    满足 DTypeConfig 中指定的约束（如果有），那么我们将使用此 DTypeConfig 量化给定
    模式。否则，QConfig 将被忽略，模式将不会被量化。

    示例用法::

        >>> # xdoctest: +SKIP(failing)
        >>> dtype_config1 = DTypeConfig(
        ...     input_dtype=torch.quint8,
        ...     output_dtype=torch.quint8,
        ...     weight_dtype=torch.qint8,
        ...     bias_dtype=torch.float)

        >>> dtype_config2 = DTypeConfig(
        ...     input_dtype=DTypeWithConstraints(
        ...         dtype=torch.quint8,
        ...         quant_min_lower_bound=0,
        ...         quant_max_upper_bound=255,
        ...     ),
        ...     output_dtype=DTypeWithConstraints(
        ...         dtype=torch.quint8,
        ...         quant_min_lower_bound=0,
<