torch.cuda.memory 的源代码

r"""此包增加了对在CUDA中实现的设备内存管理的支持。"""

import collections
import contextlib
import ctypes
import pickle
import sys
import warnings
from inspect import signature

from typing import Any, Dict, Optional, Tuple, Union

import torch
from torch import _C

from torch.types import Device
from .._utils import _dummy_type
from . import _get_device_index, _get_nvml_device_index, _lazy_init, is_initialized

from ._memory_viz import memory as _memory, segments as _segments

__all__ = [
    "caching_allocator_alloc",
    "caching_allocator_delete",
    "set_per_process_memory_fraction",
    "empty_cache",
    "memory_stats",
    "memory_stats_as_nested_dict",
    "reset_accumulated_memory_stats",
    "reset_peak_memory_stats",
    "reset_max_memory_allocated",
    "reset_max_memory_cached",
    "memory_allocated",
    "max_memory_allocated",
    "memory_reserved",
    "max_memory_reserved",
    "memory_cached",
    "max_memory_cached",
    "memory_snapshot",
    "memory_summary",
    "list_gpu_processes",
    "mem_get_info",
    "get_allocator_backend",
    "CUDAPluggableAllocator",
    "change_current_allocator",
]


if not hasattr(torch._C, "_cuda_CUDAAllocator"):
    # 定义虚拟基类
    torch._C.__dict__["_cuda_CUDAAllocator"] = _dummy_type("_cuda_CUDAAllocator")


def _host_allocator():
    _lazy_init()
    return torch._C._cuda_cudaHostAllocator()


@contextlib.contextmanager
def _free_mutex():
    torch._C._cuda_lock_mutex()
    try:
        yield
    finally:
        torch._C._cuda_unlock_mutex()


[docs]def caching_allocator_alloc(size, device: Union[Device, int] = None, stream=None):
    r"""使用CUDA内存分配器执行内存分配。

    为给定的设备和流分配内存，此函数旨在与其他框架互操作。
    分配的内存通过 :func:`~torch.cuda.caching_allocator_delete` 释放。

    参数:
        size (int): 要分配的字节数。
        device (torch.device 或 int, 可选): 选定的设备。如果为 ``None``，则使用默认的CUDA设备。
        stream (torch.cuda.Stream 或 int, 可选): 选定的流。如果为 ``None``，则使用所选设备的默认流。

    .. 注意::
        有关GPU内存管理的更多详细信息，请参阅 :ref:`cuda-memory-management`。
    """
    if device is None:
        device = torch.cuda.current_device()
    device = _get_device_index(device)
    if stream is None:
        stream = torch.cuda.current_stream(device)
    if isinstance(stream, torch.cuda.streams.Stream):
        stream = stream.cuda_stream
    if not isinstance(stream, int):
        raise TypeError(
            "流参数的类型无效，必须是 `torch.cuda.Stream` 或 `int`，表示现有流的指针"
        )
    with torch.cuda.device(device):
        return torch._C._cuda_cudaCachingAllocator_raw_alloc(size, stream)


[docs]def caching_allocator_delete(mem_ptr):
    r"""删除使用CUDA内存分配器分配的内存。

    使用 :func:`~torch.cuda.caching_allocator_alloc` 分配的内存在此处释放。
    分配器内部跟踪相关的设备和流。

    参数:
        mem_ptr (int): 分配器要释放的内存地址。

    .. 注意::
        有关GPU内存管理的更多详细信息，请参阅 :ref:`cuda-memory-management`。
    """
    torch._C._cuda_cudaCachingAllocator_raw_delete(mem_ptr)


[docs]def set_per_process_memory_fraction(
    fraction, device: Union[Device, int] = None
) -> None:
    r"""为进程设置内存分数。

    该分数用于限制缓存分配器在CUDA设备上分配的内存。
    允许的值等于总可见内存乘以分数。
    如果在进程中尝试分配超过允许值的内存，分配器将引发内存不足错误。

    参数:
        fraction(float): 范围: 0~1。允许的内存等于 total_memory * fraction。
        device (torch.device 或 int, 可选): 选定的设备。如果为 ``None``，则使用默认的CUDA设备。
    .. 注意::
        通常，总可用空闲内存小于总容量