torch.cuda.streams 的源代码

```html
import ctypes

import torch
from torch._streambase import _EventBase, _StreamBase
from .._utils import _dummy_type


if not hasattr(torch._C, "_CudaStreamBase"):
    # 定义虚拟基类
    torch._C.__dict__["_CudaStreamBase"] = _dummy_type("_CudaStreamBase")
    torch._C.__dict__["_CudaEventBase"] = _dummy_type("_CudaEventBase")


[docs]class Stream(torch._C._CudaStreamBase, _StreamBase):
    r"""CUDA 流包装器。

    CUDA 流是属于特定设备的线性执行序列，独立于其他流。有关详细信息，请参阅 :ref:`cuda-semantics`。

    参数:
        device(torch.device 或 int, 可选): 分配流的设备。如果 :attr:`device` 为 ``None`` (默认) 或负整数，则使用当前设备。
        priority(int, 可选): 流的优先级，应为 0 或负数，负数表示高优先级。默认情况下，流的优先级为 0。

    """

    def __new__(cls, device=None, priority=0, **kwargs):
        # 设置设备管理器开销较大，因此除非必要，否则我们避免使用它
        if device is None or ("stream_id" in kwargs and "device_index" in kwargs):
            return super().__new__(cls, priority=priority, **kwargs)
        else:
            with torch.cuda.device(device):
                return super().__new__(cls, priority=priority, **kwargs)

[docs]    def wait_event(self, event):
        r"""使提交到流的所有未来工作等待事件。

        参数:
            event (torch.cuda.Event): 等待的事件。

        .. 注意:: 这是 ``cudaStreamWaitEvent()`` 的包装器: 有关更多信息，请参阅 `CUDA 流文档`_。

           此函数在等待 :attr:`event` 之前返回: 只有未来的操作会受到影响。

        .. _CUDA 流文档:
           https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
        """
        event.wait(self)

[docs]    def wait_stream(self, stream):
        r"""与另一个流同步。

        提交到此流的所有未来工作将等待直到在调用时提交到给定流的所有内核完成。

        参数:
            stream (Stream): 要同步的流。

        .. 注意:: 此函数在等待 :attr:`stream` 中当前排队的内核之前返回: 只有未来的操作会受到影响。
        """
        self.wait_event(stream.record_event())

[docs]    def record_event(self, event=None):
        r"""记录事件。

        参数:
            event (torch.cuda.Event, 可选): 要记录的事件。如果未给出，将分配一个新的事件。

        返回:
            记录的事件。
        """
        if event is None:
            event = Event()
        event.record(self)
        return event

[docs]    def query(self):
        r"""检查所有提交的工作是否已完成。

        返回:
            一个布尔值，指示此流中的所有内核是否已完成。
        """
        return super().query()

[docs]    def synchronize(self):
        r"""等待此流中的所有内核完成。

        .. 注意:: 这是 ``cudaStreamSynchronize()`` 的包装器: 有关更多信息，请参阅 `CUDA 流文档`_。
        """
        super().synchronize()

    @property
    def _as_parameter_(self):
        return ctypes.c_void_p(self.cuda_stream)

    def __eq__(self, o):
        if isinstance(o, Stream):
            return super().__eq__(o)
        return False

    def __hash__(self):
        return hash((self.cuda_stream, self.device))

    def __repr__(self):
        return f"{self.device} cuda_stream={self.cuda_stream:#x}>"


[docs]class ExternalStream(Stream):
    r"""外部分配的 CUDA 流包装器。

    此类用于包装在其他库中分配的流，以便促进数据交换和多库交互。