torch.autograd 的源代码

"""
``torch.autograd`` 提供了实现任意标量值函数的自动微分的类和函数。它只需要对现有代码进行最小的更改 - 您只需要使用 ``requires_grad=True`` 关键字声明 :class:`Tensor` 来计算梯度。
目前，我们仅支持浮点型 :class:`Tensor` 类型的自动微分（half, float, double 和 bfloat16）和复数型 :class:`Tensor` 类型（cfloat, cdouble）。
"""
import warnings
from typing import Any, Callable, cast, List, Optional, Sequence, Tuple, Union

import torch

from torch.types import _size, _TensorOrTensors, _TensorOrTensorsOrGradEdge
from .. import _vmap_internals
from ..overrides import handle_torch_function, has_torch_function, is_tensor_like
from . import forward_ad, functional, graph
from .anomaly_mode import detect_anomaly, set_detect_anomaly
from .function import Function, NestedIOFunction
from .grad_mode import (
    _force_original_view_tracking,
    _unsafe_preserve_version_counter,
    enable_grad,
    inference_mode,
    no_grad,
    set_grad_enabled,
    set_multithreading_enabled,
)
from .gradcheck import gradcheck, gradgradcheck
from .graph import _engine_run_backward

from .variable import Variable

__all__ = ["Variable", "Function", "backward", "grad_mode"]

_OptionalTensor = Optional[torch.Tensor]
_ShapeorNestedShape = Union[_size, Sequence[_size], torch.Tensor]


def _calculate_shape(
    output: torch.Tensor, grad: torch.Tensor, is_grads_batched: bool
) -> Tuple[_ShapeorNestedShape, _ShapeorNestedShape]:
    # is_same_size 确保两个张量要么是嵌套的，要么是非嵌套的
    # 循环导入
    from torch.nested._internal.nested_tensor import NestedTensor

    if output.is_nested and not isinstance(output, NestedTensor):
        if is_grads_batched:
            raise RuntimeError("Batched grads are not supported with Nested Tensor.")
        out_shape = output._nested_tensor_size()
        grad_shape = grad._nested_tensor_size()

        return out_shape, grad_shape

    reg_out_shape = output.shape
    reg_grad_shape = grad.shape if not is_grads_batched else grad.shape[1:]
    return reg_out_shape, reg_grad_shape


def _make_grads(
    outputs: Sequence[torch.Tensor],
    grads: Sequence[_OptionalTensor],
    is_grads_batched: bool,
) -> Tuple[_OptionalTensor, ...]:
    new_grads: List[_OptionalTensor] = []
    for out, grad in zip(outputs, grads):
        if isinstance(grad, torch.Tensor):
            from torch.fx.experimental.symbolic_shapes import expect_true, sym_eq

            first_grad = grad if not is_grads_batched else grad[0]
            # TODO: 一旦我们统一使用单例 int 来表示不规则维度，我们就可以删除这个条件，这样嵌套张量的 size() 调用就可以工作了
            if out.is_nested or first_grad.is_nested:
                shape_matches = torch.is_same_size(out, first_grad)
            else:
                # 我们需要进行常规的 size 检查，而不通过操作符，以便能够处理未支持的 symints
                # expect_true 确保我们可以处理未支持的情况
                shape_matches = expect_true(sym_eq(out.size(), first_grad.size()))
            if not shape_matches:
                out_shape, grad_shape = _calculate_shape(
                    out, first_grad, is_grads_batched
                )
                if is_grads_batched:
                    raise RuntimeError(
                        "If `is_grads_batched=True`, we interpret the first "
                        "dimension of each grad_output as the batch dimension. "
                        "The sizes of the remaining dimensions are expected to match "
                        "the shape of corresponding output, but a mismatch "
                        "was detected: grad_output["
                        + str(grads.index(grad))
                        + "] has a shape of "
                        + str(grad_shape)
                        <span class="o