torch.nn.modules.linear 的源代码

import math
from typing import Any

import torch
from torch import Tensor
from torch.nn.parameter import Parameter, UninitializedParameter
from .. import functional as F
from .. import init
from .module import Module
from .lazy import LazyModuleMixin


__all__ = [
    'Bilinear',
    'Identity',
    'LazyLinear',
    'Linear',
]


[docs]class Identity(Module):
    r"""一个参数无关的占位符标识操作符。

    参数:
        args: 任何参数（未使用）
        kwargs: 任何关键字参数（未使用）

    形状:
        - 输入: :math:`(*)`，其中 :math:`*` 表示任意数量的维度。
        - 输出: :math:`(*)`，与输入形状相同。

    示例::

        >>> m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 20])

    """

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__()

    def forward(self, input: Tensor) -> Tensor:
        return input


[docs]class Linear(Module):
    r"""对输入数据应用线性变换: :math:`y = xA^T + b`。

    该模块支持 :ref:`TensorFloat32`。

    在某些 ROCm 设备上，当使用 float16 输入时，该模块将在反向传播中使用 :ref:`不同的精度`。

    参数:
        in_features: 每个输入样本的大小
        out_features: 每个输出样本的大小
        bias: 如果设置为 ``False``，则该层不会学习加性偏置。
            默认值: ``True``

    形状:
        - 输入: :math:`(*, H_{in})` 其中 :math:`*` 表示任意数量的维度，包括无，且 :math:`H_{in} = \text{in\_features}`。
        - 输出: :math:`(*, H_{out})` 其中除最后一个维度外，其余维度与输入形状相同，且 :math:`H_{out} = \text{out\_features}`。

    属性:
        weight: 模块的可学习权重，形状为 :math:`(\text{out\_features}, \text{in\_features})`。
            值从 :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` 初始化，其中 :math:`k = \frac{1}{\text{in\_features}}`
        bias: 模块的可学习偏置，形状为 :math:`(\text{out\_features})`。
            如果 :attr:`bias` 为 ``True``，则值从 :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` 初始化，其中 :math:`k = \frac{1}{\text{in\_features}}`

    示例::

        >>> m = nn.Linear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """

    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        # 在 kaiming_uniform 中设置 a=sqrt(5) 与从
        # uniform(-1/sqrt(in_features), 1/sqrt(in_features)) 初始化相同。有关详细信息，请参阅
        # https://github.com/pytorch/pytorch/issues/57109
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> <span class