6.36. CUDA运行时使用的数据类型
类
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- union
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
定义
- #define CUDA_EGL_MAX_PLANES 3
- #define CUDA_IPC_HANDLE_SIZE 64
- #define cudaArrayColorAttachment 0x20
- #define cudaArrayCubemap 0x04
- #define cudaArrayDefault 0x00
- #define cudaArrayDeferredMapping 0x80
- #define cudaArrayLayered 0x01
- #define cudaArraySparse 0x40
- #define cudaArraySparsePropertiesSingleMipTail 0x1
- #define cudaArraySurfaceLoadStore 0x02
- #define cudaArrayTextureGather 0x08
- #define cudaCooperativeLaunchMultiDeviceNoPostSync 0x02
- #define cudaCooperativeLaunchMultiDeviceNoPreSync 0x01
- #define cudaCpuDeviceId ((int)-1)
- #define cudaDeviceBlockingSync 0x04
- #define cudaDeviceLmemResizeToMax 0x10
- #define cudaDeviceMapHost 0x08
- #define cudaDeviceMask 0xff
- #define cudaDeviceScheduleAuto 0x00
- #define cudaDeviceScheduleBlockingSync 0x04
- #define cudaDeviceScheduleMask 0x07
- #define cudaDeviceScheduleSpin 0x01
- #define cudaDeviceScheduleYield 0x02
- #define cudaDeviceSyncMemops 0x80
- #define cudaEventBlockingSync 0x01
- #define cudaEventDefault 0x00
- #define cudaEventDisableTiming 0x02
- #define cudaEventInterprocess 0x04
- #define cudaEventRecordDefault 0x00
- #define cudaEventRecordExternal 0x01
- #define cudaEventWaitDefault 0x00
- #define cudaEventWaitExternal 0x01
- #define cudaExternalMemoryDedicated 0x1
- #define cudaExternalSemaphoreSignalSkipNvSciBufMemSync 0x01
- #define cudaExternalSemaphoreWaitSkipNvSciBufMemSync 0x02
- #define cudaGraphKernelNodePortDefault 0
- #define cudaGraphKernelNodePortLaunchCompletion 2
- #define cudaGraphKernelNodePortProgrammatic 1
- #define cudaHostAllocDefault 0x00
- #define cudaHostAllocMapped 0x02
- #define cudaHostAllocPortable 0x01
- #define cudaHostAllocWriteCombined 0x04
- #define cudaHostRegisterDefault 0x00
- #define cudaHostRegisterIoMemory 0x04
- #define cudaHostRegisterMapped 0x02
- #define cudaHostRegisterPortable 0x01
- #define cudaHostRegisterReadOnly 0x08
- #define cudaInitDeviceFlagsAreValid 0x01
- #define cudaInvalidDeviceId ((int)-2)
- #define cudaIpcMemLazyEnablePeerAccess 0x01
- #define cudaMemAttachGlobal 0x01
- #define cudaMemAttachHost 0x02
- #define cudaMemAttachSingle 0x04
- #define cudaMemPoolCreateUsageHwDecompress 0x2
- #define cudaNvSciSyncAttrSignal 0x1
- #define cudaNvSciSyncAttrWait 0x2
- #define cudaOccupancyDefault 0x00
- #define cudaOccupancyDisableCachingOverride 0x01
- #define cudaPeerAccessDefault 0x00
- #define cudaStreamDefault 0x00
- #define cudaStreamLegacy ((cudaStream_t)0x1)
- #define cudaStreamNonBlocking 0x01
- #define cudaStreamPerThread ((cudaStream_t)0x2)
类型定义
- typedef cudaArray * cudaArray_const_t
- typedef cudaArray * cudaArray_t
- typedef cudaAsyncCallbackEntry * cudaAsyncCallbackHandle_t
- typedef CUeglStreamConnection_st * cudaEglStreamConnection
- typedef enumcudaError cudaError_t
- typedef CUevent_st * cudaEvent_t
- typedef CUexternalMemory_st * cudaExternalMemory_t
- typedef CUexternalSemaphore_st * cudaExternalSemaphore_t
- typedef CUfunc_st * cudaFunction_t
- typedef unsigned long long cudaGraphConditionalHandle
- typedef CUgraphDeviceUpdatableNode_st * cudaGraphDeviceNode_t
- typedef CUgraphExec_st * cudaGraphExec_t
- typedef CUgraphNode_st * cudaGraphNode_t
- typedef CUgraph_st * cudaGraph_t
- typedef cudaGraphicsResource * cudaGraphicsResource_t
- typedef void(CUDART_CB* cudaHostFn_t )( void* userData )
- typedef CUkern_st * cudaKernel_t
- typedef CUlib_st * cudaLibrary_t
- typedef CUmemPoolHandle_st * cudaMemPool_t
- typedef cudaMipmappedArray * cudaMipmappedArray_const_t
- typedef cudaMipmappedArray * cudaMipmappedArray_t
- typedef CUstream_st * cudaStream_t
- typedef unsigned long long cudaSurfaceObject_t
- typedef unsigned long long cudaTextureObject_t
- typedef CUuserObject_st * cudaUserObject_t
枚举
- enum cudaAccessProperty
- enum cudaAsyncNotificationType
- enum cudaCGScope
- enum cudaChannelFormatKind
- enum cudaClusterSchedulingPolicy
- enum cudaComputeMode
- enum cudaDeviceAttr
- enum cudaDeviceNumaConfig
- enum cudaDeviceP2PAttr
- enum cudaDriverEntryPointQueryResult
- enum cudaEglColorFormat
- enum cudaEglFrameType
- enum cudaEglResourceLocationFlags
- enum cudaError
- enum cudaExternalMemoryHandleType
- enum cudaExternalSemaphoreHandleType
- enum cudaFlushGPUDirectRDMAWritesOptions
- enum cudaFlushGPUDirectRDMAWritesScope
- enum cudaFlushGPUDirectRDMAWritesTarget
- enum cudaFuncAttribute
- enum cudaFuncCache
- enum cudaGPUDirectRDMAWritesOrdering
- enum cudaGetDriverEntryPointFlags
- enum cudaGraphConditionalNodeType
- enum cudaGraphDebugDotFlags
- enum cudaGraphDependencyType
- enum cudaGraphExecUpdateResult
- enum cudaGraphInstantiateFlags
- enum cudaGraphInstantiateResult
- enum cudaGraphKernelNodeField
- enum cudaGraphMemAttributeType
- enum cudaGraphNodeType
- enum cudaGraphicsCubeFace
- enum cudaGraphicsMapFlags
- enum cudaGraphicsRegisterFlags
- enum cudaJitOption
- enum cudaJit_CacheMode
- enum cudaJit_Fallback
- enum cudaLaunchAttributeID
- enum cudaLaunchMemSyncDomain
- enum cudaLibraryOption
- enum cudaLimit
- enum cudaMemAccessFlags
- enum cudaMemAllocationHandleType
- enum cudaMemAllocationType
- enum cudaMemLocationType
- enum cudaMemPoolAttr
- enum cudaMemRangeAttribute
- enum cudaMemcpy3DOperandType
- enum cudaMemcpyFlags
- enum cudaMemcpyKind
- enum cudaMemoryAdvise
- enum cudaMemoryType
- enum cudaResourceType
- enum cudaResourceViewFormat
- enum cudaSharedCarveout
- enum cudaSharedMemConfig
- enum cudaStreamCaptureMode
- enum cudaStreamCaptureStatus
- enum cudaStreamUpdateCaptureDependenciesFlags
- enum cudaSurfaceBoundaryMode
- enum cudaSurfaceFormatMode
- enum cudaTextureAddressMode
- enum cudaTextureFilterMode
- enum cudaTextureReadMode
- enum cudaUserObjectFlags
- enum cudaUserObjectRetainFlags
定义
- #define CUDA_EGL_MAX_PLANES 3
-
每帧最大平面数
- #define CUDA_IPC_HANDLE_SIZE 64
-
CUDA IPC 句柄大小
- #define cudaArrayColorAttachment 0x20
-
如果mipmapped数组在图形API中用作颜色目标,则必须在cudaExternalMemoryGetMappedMipmappedArray中设置
- #define cudaArrayCubemap 0x04
-
必须在cudaMalloc3DArray中设置以创建立方体贴图CUDA数组
- #define cudaArrayDefault 0x00
-
默认的CUDA数组分配标志
- #define cudaArrayDeferredMapping 0x80
-
必须在cudaMallocArray、cudaMalloc3DArray或cudaMallocMipmappedArray中设置,以创建延迟映射的CUDA数组或CUDA多级渐远纹理数组
- #define cudaArrayLayered 0x01
-
必须在cudaMalloc3DArray中设置以创建分层CUDA数组
- #define cudaArraySparse 0x40
-
必须在cudaMallocArray、cudaMalloc3DArray或cudaMallocMipmappedArray中设置,以创建稀疏CUDA数组或CUDA mipmapped数组
- #define cudaArraySparsePropertiesSingleMipTail 0x1
-
表示分层稀疏CUDA数组或CUDA mipmapped数组的所有层共享一个mip尾部区域
- #define cudaArraySurfaceLoadStore 0x02
-
必须在cudaMallocArray或cudaMalloc3DArray中设置,以便将表面绑定到CUDA数组
- #define cudaArrayTextureGather 0x08
-
必须在cudaMallocArray或cudaMalloc3DArray中设置,以便在CUDA数组上执行纹理收集操作
- #define cudaCooperativeLaunchMultiDeviceNoPostSync 0x02
-
如果设置此选项,任何后续推送到参与调用cudaLaunchCooperativeKernelMultiDevice的流中的工作,将仅等待与该流对应的GPU上启动的内核完成后才开始执行。
- #define cudaCooperativeLaunchMultiDeviceNoPreSync 0x01
-
如果设置此项,作为cudaLaunchCooperativeKernelMultiDevice一部分启动的每个内核,仅等待对应GPU流中的先前工作完成后才开始执行内核。
- #define cudaCpuDeviceId ((int)-1)
-
代表CPU的设备ID
- #define cudaDeviceBlockingSync 0x04
- #define cudaDeviceLmemResizeToMax 0x10
-
设备标志 - 启动后保留本地内存分配
- #define cudaDeviceMapHost 0x08
-
设备标志 - 支持映射固定内存分配
- #define cudaDeviceMask 0xff
-
设备标志掩码
- #define cudaDeviceScheduleAuto 0x00
-
设备标志 - 自动调度
- #define cudaDeviceScheduleBlockingSync 0x04
-
设备标志 - 使用阻塞同步
- #define cudaDeviceScheduleMask 0x07
-
设备调度标志掩码
- #define cudaDeviceScheduleSpin 0x01
-
设备标志 - 默认自旋调度
- #define cudaDeviceScheduleYield 0x02
-
设备标志 - 默认调度让出
- #define cudaDeviceSyncMemops 0x80
-
设备标志 - 确保此上下文上的同步内存操作将同步
- #define cudaEventBlockingSync 0x01
-
事件使用阻塞同步
- #define cudaEventDefault 0x00
-
默认事件标志
- #define cudaEventDisableTiming 0x02
-
事件将不会记录时间数据
- #define cudaEventInterprocess 0x04
-
事件适用于进程间使用。必须设置cudaEventDisableTiming
- #define cudaEventRecordDefault 0x00
-
默认事件记录标志
- #define cudaEventRecordExternal 0x01
-
在执行流捕获时,事件在图中被捕获为外部事件节点
- #define cudaEventWaitDefault 0x00
-
默认事件等待标志
- #define cudaEventWaitExternal 0x01
-
在执行流捕获时,事件在图中被捕获为外部事件节点
- #define cudaExternalMemoryDedicated 0x1
-
表示外部内存对象是一个专用资源
- #define cudaExternalSemaphoreSignalSkipNvSciBufMemSync 0x01
-
当cudaExternalSemaphoreSignalParams的/p标志参数包含此标志时,表示对外部信号量对象进行信号通知时应跳过对所有以cudaExternalMemoryHandleTypeNvSciBuf方式导入的外部内存对象执行适当的内存同步操作,默认情况下会执行这些操作以确保与同一NvSciBuf内存对象的其他导入者的数据一致性。
- #define cudaExternalSemaphoreWaitSkipNvSciBufMemSync 0x02
-
当cudaExternalSemaphoreWaitParams的/p标志参数包含此标志时,表示等待外部信号量对象应跳过对所有以cudaExternalMemoryHandleTypeNvSciBuf方式导入的外部内存对象执行适当的内存同步操作,默认情况下会执行这些操作以确保与同一NvSciBuf内存对象的其他导入者的数据一致性。
- #define cudaGraphKernelNodePortDefault 0
-
当内核完成执行时,该端口将被激活。
- #define cudaGraphKernelNodePortLaunchCompletion 2
-
当内核的所有块开始执行时,此端口将被激活。另请参阅cudaLaunchAttributeLaunchCompletionEvent。
- #define cudaGraphKernelNodePortProgrammatic 1
-
当内核的所有块都执行了cudaTriggerProgrammaticLaunchCompletion()或已终止时,此端口将被激活。它必须与边类型cudaGraphDependencyTypeProgrammatic一起使用。另请参阅cudaLaunchAttributeProgrammaticEvent。
- #define cudaHostAllocDefault 0x00
-
默认的页面锁定分配标志
- #define cudaHostAllocMapped 0x02
-
将分配映射到设备空间
- #define cudaHostAllocPortable 0x01
-
所有CUDA上下文均可访问的固定内存
- #define cudaHostAllocWriteCombined 0x04
-
写合并内存
- #define cudaHostRegisterDefault 0x00
-
默认主机内存注册标志
- #define cudaHostRegisterIoMemory 0x04
-
内存映射I/O空间
- #define cudaHostRegisterMapped 0x02
-
将已注册的内存映射到设备空间
- #define cudaHostRegisterPortable 0x01
-
所有CUDA上下文均可访问的固定内存
- #define cudaHostRegisterReadOnly 0x08
-
内存映射只读
- #define cudaInitDeviceFlagsAreValid 0x01
-
告知CUDA运行时,在cudaInitDevice调用中正在设置DeviceFlags
- #define cudaInvalidDeviceId ((int)-2)
-
表示无效设备的设备ID
- #define cudaIpcMemLazyEnablePeerAccess 0x01
-
根据需要自动启用远程设备之间的对等访问
- #define cudaMemAttachGlobal 0x01
-
内存可以被任何设备上的任何流访问
- #define cudaMemAttachHost 0x02
-
任何设备上的任何流都无法访问该内存
- #define cudaMemAttachSingle 0x04
-
内存只能由关联设备上的单个流访问
- #define cudaMemPoolCreateUsageHwDecompress 0x2
-
如果设置了此标志,表示该内存将用作硬件加速解压缩的缓冲区。
- #define cudaNvSciSyncAttrSignal 0x1
-
当cudaDeviceGetNvSciSyncAttributes的/p标志被设置为该值时,表示应用程序需要由cudaDeviceGetNvSciSyncAttributes填充特定的信号发送者NvSciSyncAttr属性。
- #define cudaNvSciSyncAttrWait 0x2
-
当cudaDeviceGetNvSciSyncAttributes的/p标志被设置为该值时,表示应用程序需要由cudaDeviceGetNvSciSyncAttributes填充特定的NvSciSyncAttr等待者属性。
- #define cudaOccupancyDefault 0x00
-
默认行为
- #define cudaOccupancyDisableCachingOverride 0x01
-
假设全局缓存已启用且无法自动关闭
- #define cudaPeerAccessDefault 0x00
-
默认对等节点寻址启用标志
- #define cudaStreamDefault 0x00
-
默认流标志
- #define cudaStreamLegacy ((cudaStream_t)0x1)
- #define cudaStreamNonBlocking 0x01
-
流不与流0(NULL流)同步
- #define cudaStreamPerThread ((cudaStream_t)0x2)
类型定义
- typedef cudaArray * cudaArray_const_t
-
CUDA数组(作为源复制参数)
- typedef cudaArray * cudaArray_t
-
CUDA数组
- typedef cudaAsyncCallbackEntry * cudaAsyncCallbackHandle_t
-
CUDA异步回调句柄
- typedef CUeglStreamConnection_st * cudaEglStreamConnection
-
CUDA EGL流连接
- typedef enumcudaError cudaError_t
-
CUDA 错误类型
- typedef CUevent_st * cudaEvent_t
-
CUDA事件类型
- typedef CUexternalMemory_st * cudaExternalMemory_t
-
CUDA 外部内存
- typedef CUexternalSemaphore_st * cudaExternalSemaphore_t
-
CUDA外部信号量
- typedef CUfunc_st * cudaFunction_t
-
CUDA 函数
- typedef unsigned long long cudaGraphConditionalHandle
-
用于条件图节点的CUDA句柄
- typedef CUgraphDeviceUpdatableNode_st * cudaGraphDeviceNode_t
-
用于设备端节点更新的CUDA设备节点句柄
- typedef CUgraphExec_st * cudaGraphExec_t
-
CUDA可执行(可启动)图
- typedef CUgraphNode_st * cudaGraphNode_t
-
CUDA 图节点。
- typedef CUgraph_st * cudaGraph_t
-
CUDA图
- typedef cudaGraphicsResource * cudaGraphicsResource_t
-
CUDA图形资源类型
- void(CUDART_CB* cudaHostFn_t )( void* userData )
-
CUDA 主机函数
- userData
- Argument value passed to the function
- typedef CUkern_st * cudaKernel_t
-
CUDA内核
- typedef CUlib_st * cudaLibrary_t
-
CUDA库
- typedef CUmemPoolHandle_st * cudaMemPool_t
-
CUDA 内存池
- typedef cudaMipmappedArray * cudaMipmappedArray_const_t
-
CUDA mipmapped数组(作为源参数)
- typedef cudaMipmappedArray * cudaMipmappedArray_t
-
CUDA 多级渐远纹理数组
- typedef CUstream_st * cudaStream_t
-
CUDA流
- typedef unsigned long long cudaSurfaceObject_t
-
一个表示CUDA Surface对象的不透明值
- typedef unsigned long long cudaTextureObject_t
-
一个表示CUDA纹理对象的不透明值
- typedef CUuserObject_st * cudaUserObject_t
-
用于图的CUDA用户对象
参数
枚举
- enum cudaAccessProperty
-
为hitProp和missProp成员指定带有cudaAccessPolicyWindow的性能提示。
数值
- cudaAccessPropertyNormal = 0
- Normal cache persistence.
- cudaAccessPropertyStreaming = 1
- Streaming access is less likely to persit from cache.
- cudaAccessPropertyPersisting = 2
- Persisting access is more likely to persist in cache.
- enum cudaAsyncNotificationType
-
可能发生的异步通知类型
数值
- cudaAsyncNotificationTypeOverBudget = 0x1
- enum cudaCGScope
-
CUDA协作组作用域
数值
- cudaCGScopeInvalid = 0
- Invalid cooperative group scope
- cudaCGScopeGrid = 1
- Scope represented by a grid_group
- cudaCGScopeMultiGrid = 2
- Scope represented by a multi_grid_group
- enum cudaChannelFormatKind
-
通道格式类型
数值
- cudaChannelFormatKindSigned = 0
- Signed channel format
- cudaChannelFormatKindUnsigned = 1
- Unsigned channel format
- cudaChannelFormatKindFloat = 2
- Float channel format
- cudaChannelFormatKindNone = 3
- No channel format
- cudaChannelFormatKindNV12 = 4
- Unsigned 8-bit integers, planar 4:2:0 YUV format
- cudaChannelFormatKindUnsignedNormalized8X1 = 5
- 1 channel unsigned 8-bit normalized integer
- cudaChannelFormatKindUnsignedNormalized8X2 = 6
- 2 channel unsigned 8-bit normalized integer
- cudaChannelFormatKindUnsignedNormalized8X4 = 7
- 4 channel unsigned 8-bit normalized integer
- cudaChannelFormatKindUnsignedNormalized16X1 = 8
- 1 channel unsigned 16-bit normalized integer
- cudaChannelFormatKindUnsignedNormalized16X2 = 9
- 2 channel unsigned 16-bit normalized integer
- cudaChannelFormatKindUnsignedNormalized16X4 = 10
- 4 channel unsigned 16-bit normalized integer
- cudaChannelFormatKindSignedNormalized8X1 = 11
- 1 channel signed 8-bit normalized integer
- cudaChannelFormatKindSignedNormalized8X2 = 12
- 2 channel signed 8-bit normalized integer
- cudaChannelFormatKindSignedNormalized8X4 = 13
- 4 channel signed 8-bit normalized integer
- cudaChannelFormatKindSignedNormalized16X1 = 14
- 1 channel signed 16-bit normalized integer
- cudaChannelFormatKindSignedNormalized16X2 = 15
- 2 channel signed 16-bit normalized integer
- cudaChannelFormatKindSignedNormalized16X4 = 16
- 4 channel signed 16-bit normalized integer
- cudaChannelFormatKindUnsignedBlockCompressed1 = 17
- 4 channel unsigned normalized block-compressed (BC1 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed1SRGB = 18
- 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
- cudaChannelFormatKindUnsignedBlockCompressed2 = 19
- 4 channel unsigned normalized block-compressed (BC2 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed2SRGB = 20
- 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
- cudaChannelFormatKindUnsignedBlockCompressed3 = 21
- 4 channel unsigned normalized block-compressed (BC3 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed3SRGB = 22
- 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
- cudaChannelFormatKindUnsignedBlockCompressed4 = 23
- 1 channel unsigned normalized block-compressed (BC4 compression) format
- cudaChannelFormatKindSignedBlockCompressed4 = 24
- 1 channel signed normalized block-compressed (BC4 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed5 = 25
- 2 channel unsigned normalized block-compressed (BC5 compression) format
- cudaChannelFormatKindSignedBlockCompressed5 = 26
- 2 channel signed normalized block-compressed (BC5 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed6H = 27
- 3 channel unsigned half-float block-compressed (BC6H compression) format
- cudaChannelFormatKindSignedBlockCompressed6H = 28
- 3 channel signed half-float block-compressed (BC6H compression) format
- cudaChannelFormatKindUnsignedBlockCompressed7 = 29
- 4 channel unsigned normalized block-compressed (BC7 compression) format
- cudaChannelFormatKindUnsignedBlockCompressed7SRGB = 30
- 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
- cudaChannelFormatKindUnsignedNormalized1010102 = 31
- 4 channel unsigned normalized (10-bit, 10-bit, 10-bit, 2-bit) format
- enum cudaClusterSchedulingPolicy
-
集群调度策略。这些可以传递给cudaFuncSetAttribute
数值
- cudaClusterSchedulingPolicyDefault = 0
- the default policy
- cudaClusterSchedulingPolicySpread = 1
- spread the blocks within a cluster to the SMs
- cudaClusterSchedulingPolicyLoadBalancing = 2
- allow the hardware to load-balance the blocks in a cluster to the SMs
- enum cudaComputeMode
-
CUDA设备计算模式
数值
- cudaComputeModeDefault = 0
- Default compute mode (Multiple threads can use cudaSetDevice() with this device)
- cudaComputeModeExclusive = 1
- Compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice() with this device)
- cudaComputeModeProhibited = 2
- Compute-prohibited mode (No threads can use cudaSetDevice() with this device)
- cudaComputeModeExclusiveProcess = 3
- Compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice() with this device)
- enum cudaDeviceAttr
-
CUDA设备属性
数值
- cudaDevAttrMaxThreadsPerBlock = 1
- Maximum number of threads per block
- cudaDevAttrMaxBlockDimX = 2
- Maximum block dimension X
- cudaDevAttrMaxBlockDimY = 3
- Maximum block dimension Y
- cudaDevAttrMaxBlockDimZ = 4
- Maximum block dimension Z
- cudaDevAttrMaxGridDimX = 5
- Maximum grid dimension X
- cudaDevAttrMaxGridDimY = 6
- Maximum grid dimension Y
- cudaDevAttrMaxGridDimZ = 7
- Maximum grid dimension Z
- cudaDevAttrMaxSharedMemoryPerBlock = 8
- Maximum shared memory available per block in bytes
- cudaDevAttrTotalConstantMemory = 9
- Memory available on device for __constant__ variables in a CUDA C kernel in bytes
- cudaDevAttrWarpSize = 10
- Warp size in threads
- cudaDevAttrMaxPitch = 11
- Maximum pitch in bytes allowed by memory copies
- cudaDevAttrMaxRegistersPerBlock = 12
- Maximum number of 32-bit registers available per block
- cudaDevAttrClockRate = 13
- Peak clock frequency in kilohertz
- cudaDevAttrTextureAlignment = 14
- Alignment requirement for textures
- cudaDevAttrGpuOverlap = 15
- Device can possibly copy memory and execute a kernel concurrently
- cudaDevAttrMultiProcessorCount = 16
- Number of multiprocessors on device
- cudaDevAttrKernelExecTimeout = 17
- Specifies whether there is a run time limit on kernels
- cudaDevAttrIntegrated = 18
- Device is integrated with host memory
- cudaDevAttrCanMapHostMemory = 19
- Device can map host memory into CUDA address space
- cudaDevAttrComputeMode = 20
- Compute mode (See cudaComputeMode for details)
- cudaDevAttrMaxTexture1DWidth = 21
- Maximum 1D texture width
- cudaDevAttrMaxTexture2DWidth = 22
- Maximum 2D texture width
- cudaDevAttrMaxTexture2DHeight = 23
- Maximum 2D texture height
- cudaDevAttrMaxTexture3DWidth = 24
- Maximum 3D texture width
- cudaDevAttrMaxTexture3DHeight = 25
- Maximum 3D texture height
- cudaDevAttrMaxTexture3DDepth = 26
- Maximum 3D texture depth
- cudaDevAttrMaxTexture2DLayeredWidth = 27
- Maximum 2D layered texture width
- cudaDevAttrMaxTexture2DLayeredHeight = 28
- Maximum 2D layered texture height
- cudaDevAttrMaxTexture2DLayeredLayers = 29
- Maximum layers in a 2D layered texture
- cudaDevAttrSurfaceAlignment = 30
- Alignment requirement for surfaces
- cudaDevAttrConcurrentKernels = 31
- Device can possibly execute multiple kernels concurrently
- cudaDevAttrEccEnabled = 32
- Device has ECC support enabled
- cudaDevAttrPciBusId = 33
- PCI bus ID of the device
- cudaDevAttrPciDeviceId = 34
- PCI device ID of the device
- cudaDevAttrTccDriver = 35
- Device is using TCC driver model
- cudaDevAttrMemoryClockRate = 36
- Peak memory clock frequency in kilohertz
- cudaDevAttrGlobalMemoryBusWidth = 37
- Global memory bus width in bits
- cudaDevAttrL2CacheSize = 38
- Size of L2 cache in bytes
- cudaDevAttrMaxThreadsPerMultiProcessor = 39
- Maximum resident threads per multiprocessor
- cudaDevAttrAsyncEngineCount = 40
- Number of asynchronous engines
- cudaDevAttrUnifiedAddressing = 41
- Device shares a unified address space with the host
- cudaDevAttrMaxTexture1DLayeredWidth = 42
- Maximum 1D layered texture width
- cudaDevAttrMaxTexture1DLayeredLayers = 43
- Maximum layers in a 1D layered texture
- cudaDevAttrMaxTexture2DGatherWidth = 45
- Maximum 2D texture width if cudaArrayTextureGather is set
- cudaDevAttrMaxTexture2DGatherHeight = 46
- Maximum 2D texture height if cudaArrayTextureGather is set
- cudaDevAttrMaxTexture3DWidthAlt = 47
- Alternate maximum 3D texture width
- cudaDevAttrMaxTexture3DHeightAlt = 48
- Alternate maximum 3D texture height
- cudaDevAttrMaxTexture3DDepthAlt = 49
- Alternate maximum 3D texture depth
- cudaDevAttrPciDomainId = 50
- PCI domain ID of the device
- cudaDevAttrTexturePitchAlignment = 51
- Pitch alignment requirement for textures
- cudaDevAttrMaxTextureCubemapWidth = 52
- Maximum cubemap texture width/height
- cudaDevAttrMaxTextureCubemapLayeredWidth = 53
- Maximum cubemap layered texture width/height
- cudaDevAttrMaxTextureCubemapLayeredLayers = 54
- Maximum layers in a cubemap layered texture
- cudaDevAttrMaxSurface1DWidth = 55
- Maximum 1D surface width
- cudaDevAttrMaxSurface2DWidth = 56
- Maximum 2D surface width
- cudaDevAttrMaxSurface2DHeight = 57
- Maximum 2D surface height
- cudaDevAttrMaxSurface3DWidth = 58
- Maximum 3D surface width
- cudaDevAttrMaxSurface3DHeight = 59
- Maximum 3D surface height
- cudaDevAttrMaxSurface3DDepth = 60
- Maximum 3D surface depth
- cudaDevAttrMaxSurface1DLayeredWidth = 61
- Maximum 1D layered surface width
- cudaDevAttrMaxSurface1DLayeredLayers = 62
- Maximum layers in a 1D layered surface
- cudaDevAttrMaxSurface2DLayeredWidth = 63
- Maximum 2D layered surface width
- cudaDevAttrMaxSurface2DLayeredHeight = 64
- Maximum 2D layered surface height
- cudaDevAttrMaxSurface2DLayeredLayers = 65
- Maximum layers in a 2D layered surface
- cudaDevAttrMaxSurfaceCubemapWidth = 66
- Maximum cubemap surface width
- cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67
- Maximum cubemap layered surface width
- cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68
- Maximum layers in a cubemap layered surface
- cudaDevAttrMaxTexture1DLinearWidth = 69
- Maximum 1D linear texture width
- cudaDevAttrMaxTexture2DLinearWidth = 70
- Maximum 2D linear texture width
- cudaDevAttrMaxTexture2DLinearHeight = 71
- Maximum 2D linear texture height
- cudaDevAttrMaxTexture2DLinearPitch = 72
- Maximum 2D linear texture pitch in bytes
- cudaDevAttrMaxTexture2DMipmappedWidth = 73
- Maximum mipmapped 2D texture width
- cudaDevAttrMaxTexture2DMipmappedHeight = 74
- Maximum mipmapped 2D texture height
- cudaDevAttrComputeCapabilityMajor = 75
- Major compute capability version number
- cudaDevAttrComputeCapabilityMinor = 76
- Minor compute capability version number
- cudaDevAttrMaxTexture1DMipmappedWidth = 77
- Maximum mipmapped 1D texture width
- cudaDevAttrStreamPrioritiesSupported = 78
- Device supports stream priorities
- cudaDevAttrGlobalL1CacheSupported = 79
- Device supports caching globals in L1
- cudaDevAttrLocalL1CacheSupported = 80
- Device supports caching locals in L1
- cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81
- Maximum shared memory available per multiprocessor in bytes
- cudaDevAttrMaxRegistersPerMultiprocessor = 82
- Maximum number of 32-bit registers available per multiprocessor
- cudaDevAttrManagedMemory = 83
- Device can allocate managed memory on this system
- cudaDevAttrIsMultiGpuBoard = 84
- Device is on a multi-GPU board
- cudaDevAttrMultiGpuBoardGroupID = 85
- Unique identifier for a group of devices on the same multi-GPU board
- cudaDevAttrHostNativeAtomicSupported = 86
- Link between the device and the host supports native atomic operations
- cudaDevAttrSingleToDoublePrecisionPerfRatio = 87
- Ratio of single precision performance (in floating-point operations per second) to double precision performance
- cudaDevAttrPageableMemoryAccess = 88
- Device supports coherently accessing pageable memory without calling cudaHostRegister on it
- cudaDevAttrConcurrentManagedAccess = 89
- Device can coherently access managed memory concurrently with the CPU
- cudaDevAttrComputePreemptionSupported = 90
- Device supports Compute Preemption
- cudaDevAttrCanUseHostPointerForRegisteredMem = 91
- Device can access host registered memory at the same virtual address as the CPU
- cudaDevAttrReserved92 = 92
- cudaDevAttrReserved93 = 93
- cudaDevAttrReserved94 = 94
- cudaDevAttrCooperativeLaunch = 95
- Device supports launching cooperative kernels via cudaLaunchCooperativeKernel
- cudaDevAttrCooperativeMultiDeviceLaunch = 96
- Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.
- cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
- The maximum optin shared memory per block. This value may vary by chip. See cudaFuncSetAttribute
- cudaDevAttrCanFlushRemoteWrites = 98
- Device supports flushing of outstanding remote writes.
- cudaDevAttrHostRegisterSupported = 99
- Device supports host memory registration via cudaHostRegister.
- cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100
- Device accesses pageable memory via the host's page tables.
- cudaDevAttrDirectManagedMemAccessFromHost = 101
- Host can directly access managed memory on the device without migration.
- cudaDevAttrMaxBlocksPerMultiprocessor = 106
- Maximum number of blocks per multiprocessor
- cudaDevAttrMaxPersistingL2CacheSize = 108
- Maximum L2 persisting lines capacity setting in bytes.
- cudaDevAttrMaxAccessPolicyWindowSize = 109
- Maximum value of cudaAccessPolicyWindow::num_bytes.
- cudaDevAttrReservedSharedMemoryPerBlock = 111
- Shared memory reserved by CUDA driver per block in bytes
- cudaDevAttrSparseCudaArraySupported = 112
- Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
- cudaDevAttrHostRegisterReadOnlySupported = 113
- Device supports using the cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU
- cudaDevAttrTimelineSemaphoreInteropSupported = 114
- External timeline semaphore interop is supported on the device
- cudaDevAttrMaxTimelineSemaphoreInteropSupported = 114
- Deprecated, External timeline semaphore interop is supported on the device
- cudaDevAttrMemoryPoolsSupported = 115
- Device supports using the cudaMallocAsync and cudaMemPool family of APIs
- cudaDevAttrGPUDirectRDMASupported = 116
- Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
- cudaDevAttrGPUDirectRDMAFlushWritesOptions = 117
- The returned attribute shall be interpreted as a bitmask, where the individual bits are listed in the cudaFlushGPUDirectRDMAWritesOptions enum
- cudaDevAttrGPUDirectRDMAWritesOrdering = 118
- GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See cudaGPUDirectRDMAWritesOrdering for the numerical values returned here.
- cudaDevAttrMemoryPoolSupportedHandleTypes = 119
- Handle types supported with mempool based IPC
- cudaDevAttrClusterLaunch = 120
- Indicates device supports cluster launch
- cudaDevAttrDeferredMappingCudaArraySupported = 121
- Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
- cudaDevAttrReserved122 = 122
- cudaDevAttrReserved123 = 123
- cudaDevAttrReserved124 = 124
- cudaDevAttrIpcEventSupport = 125
- Device supports IPC Events.
- cudaDevAttrMemSyncDomainCount = 126
- Number of memory synchronization domains the device supports.
- cudaDevAttrReserved127 = 127
- cudaDevAttrReserved128 = 128
- cudaDevAttrReserved129 = 129
- cudaDevAttrNumaConfig = 130
- NUMA configuration of a device: value is of type cudaDeviceNumaConfig enum
- cudaDevAttrNumaId = 131
- NUMA node ID of the GPU memory
- cudaDevAttrReserved132 = 132
- cudaDevAttrMpsEnabled = 133
- Contexts created on this device will be shared via MPS
- cudaDevAttrHostNumaId = 134
- NUMA ID of the host node closest to the device or -1 when system does not support NUMA
- cudaDevAttrD3D12CigSupported = 135
- Device supports CIG with D3D12.
- cudaDevAttrGpuPciDeviceId = 139
- The combined 16-bit PCI device ID and 16-bit PCI vendor ID.
- cudaDevAttrGpuPciSubsystemId = 140
- The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID.
- cudaDevAttrHostNumaMultinodeIpcSupported = 143
- Device supports HostNuma location IPC between nodes in a multi-node system.
- cudaDevAttrMax
- enum cudaDeviceNumaConfig
-
CUDA设备NUMA配置
数值
- cudaDeviceNumaConfigNone = 0
- The GPU is not a NUMA node
- cudaDeviceNumaConfigNumaNode
- The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID
- enum cudaDeviceP2PAttr
-
CUDA设备点对点属性
数值
- cudaDevP2PAttrPerformanceRank = 1
- A relative value indicating the performance of the link between two devices
- cudaDevP2PAttrAccessSupported = 2
- Peer access is enabled
- cudaDevP2PAttrNativeAtomicSupported = 3
- Native atomic operation over the link supported
- cudaDevP2PAttrCudaArrayAccessSupported = 4
- Accessing CUDA arrays over the link supported
- enum cudaDriverEntryPointQueryResult
-
用于从获取驱动程序入口点返回状态的枚举,与cudaApiGetDriverEntryPoint配合使用
数值
- cudaDriverEntryPointSuccess = 0
- Search for symbol found a match
- cudaDriverEntryPointSymbolNotFound = 1
- Search for symbol was not found
- cudaDriverEntryPointVersionNotSufficent = 2
- Search for symbol was found but version wasn't great enough
- enum cudaEglColorFormat
-
CUDA EGL颜色格式 - 当前CUDA_EGL互操作支持的不同平面和多平面格式。
数值
- cudaEglColorFormatYUV420Planar = 0
- Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYUV420SemiPlanar = 1
- Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV420Planar.
- cudaEglColorFormatYUV422Planar = 2
- Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYUV422SemiPlanar = 3
- Y, UV in two surfaces with VU byte ordering, width, height ratio same as YUV422Planar.
- cudaEglColorFormatARGB = 6
- R/G/B/A four channels in one surface with BGRA byte ordering.
- cudaEglColorFormatRGBA = 7
- R/G/B/A four channels in one surface with ABGR byte ordering.
- cudaEglColorFormatL = 8
- single luminance channel in one surface.
- cudaEglColorFormatR = 9
- single color channel in one surface.
- cudaEglColorFormatYUV444Planar = 10
- Y, U, V in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYUV444SemiPlanar = 11
- Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV444Planar.
- cudaEglColorFormatYUYV422 = 12
- Y, U, V in one surface, interleaved as UYVY in one channel.
- cudaEglColorFormatUYVY422 = 13
- Y, U, V in one surface, interleaved as YUYV in one channel.
- cudaEglColorFormatABGR = 14
- R/G/B/A four channels in one surface with RGBA byte ordering.
- cudaEglColorFormatBGRA = 15
- R/G/B/A four channels in one surface with ARGB byte ordering.
- cudaEglColorFormatA = 16
- Alpha color format - one channel in one surface.
- cudaEglColorFormatRG = 17
- R/G color format - two channels in one surface with GR byte ordering
- cudaEglColorFormatAYUV = 18
- Y, U, V, A four channels in one surface, interleaved as VUYA.
- cudaEglColorFormatYVU444SemiPlanar = 19
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYVU422SemiPlanar = 20
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYVU420SemiPlanar = 21
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_444SemiPlanar = 22
- Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatY10V10U10_420SemiPlanar = 23
- Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY12V12U12_444SemiPlanar = 24
- Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatY12V12U12_420SemiPlanar = 25
- Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatVYUY_ER = 26
- Extended Range Y, U, V in one surface, interleaved as YVYU in one channel.
- cudaEglColorFormatUYVY_ER = 27
- Extended Range Y, U, V in one surface, interleaved as YUYV in one channel.
- cudaEglColorFormatYUYV_ER = 28
- Extended Range Y, U, V in one surface, interleaved as UYVY in one channel.
- cudaEglColorFormatYVYU_ER = 29
- Extended Range Y, U, V in one surface, interleaved as VYUY in one channel.
- cudaEglColorFormatYUVA_ER = 31
- Extended Range Y, U, V, A four channels in one surface, interleaved as AVUY.
- cudaEglColorFormatAYUV_ER = 32
- Extended Range Y, U, V, A four channels in one surface, interleaved as VUYA.
- cudaEglColorFormatYUV444Planar_ER = 33
- Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYUV422Planar_ER = 34
- Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYUV420Planar_ER = 35
- Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYUV444SemiPlanar_ER = 36
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYUV422SemiPlanar_ER = 37
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYUV420SemiPlanar_ER = 38
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU444Planar_ER = 39
- Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYVU422Planar_ER = 40
- Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYVU420Planar_ER = 41
- Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU444SemiPlanar_ER = 42
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYVU422SemiPlanar_ER = 43
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYVU420SemiPlanar_ER = 44
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatBayerRGGB = 45
- Bayer format - one channel in one surface with interleaved RGGB ordering.
- cudaEglColorFormatBayerBGGR = 46
- Bayer format - one channel in one surface with interleaved BGGR ordering.
- cudaEglColorFormatBayerGRBG = 47
- Bayer format - one channel in one surface with interleaved GRBG ordering.
- cudaEglColorFormatBayerGBRG = 48
- Bayer format - one channel in one surface with interleaved GBRG ordering.
- cudaEglColorFormatBayer10RGGB = 49
- Bayer10 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- cudaEglColorFormatBayer10BGGR = 50
- Bayer10 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- cudaEglColorFormatBayer10GRBG = 51
- Bayer10 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- cudaEglColorFormatBayer10GBRG = 52
- Bayer10 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- cudaEglColorFormatBayer12RGGB = 53
- Bayer12 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12BGGR = 54
- Bayer12 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12GRBG = 55
- Bayer12 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12GBRG = 56
- Bayer12 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer14RGGB = 57
- Bayer14 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- cudaEglColorFormatBayer14BGGR = 58
- Bayer14 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- cudaEglColorFormatBayer14GRBG = 59
- Bayer14 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- cudaEglColorFormatBayer14GBRG = 60
- Bayer14 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- cudaEglColorFormatBayer20RGGB = 61
- Bayer20 format - one channel in one surface with interleaved RGGB ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- cudaEglColorFormatBayer20BGGR = 62
- Bayer20 format - one channel in one surface with interleaved BGGR ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- cudaEglColorFormatBayer20GRBG = 63
- Bayer20 format - one channel in one surface with interleaved GRBG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- cudaEglColorFormatBayer20GBRG = 64
- Bayer20 format - one channel in one surface with interleaved GBRG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- cudaEglColorFormatYVU444Planar = 65
- Y, V, U in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatYVU422Planar = 66
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatYVU420Planar = 67
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatBayerIspRGGB = 68
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved RGGB ordering and mapped to opaque integer datatype.
- cudaEglColorFormatBayerIspBGGR = 69
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved BGGR ordering and mapped to opaque integer datatype.
- cudaEglColorFormatBayerIspGRBG = 70
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GRBG ordering and mapped to opaque integer datatype.
- cudaEglColorFormatBayerIspGBRG = 71
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GBRG ordering and mapped to opaque integer datatype.
- cudaEglColorFormatBayerBCCR = 72
- Bayer format - one channel in one surface with interleaved BCCR ordering.
- cudaEglColorFormatBayerRCCB = 73
- Bayer format - one channel in one surface with interleaved RCCB ordering.
- cudaEglColorFormatBayerCRBC = 74
- Bayer format - one channel in one surface with interleaved CRBC ordering.
- cudaEglColorFormatBayerCBRC = 75
- Bayer format - one channel in one surface with interleaved CBRC ordering.
- cudaEglColorFormatBayer10CCCC = 76
- Bayer10 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- cudaEglColorFormatBayer12BCCR = 77
- Bayer12 format - one channel in one surface with interleaved BCCR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12RCCB = 78
- Bayer12 format - one channel in one surface with interleaved RCCB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12CRBC = 79
- Bayer12 format - one channel in one surface with interleaved CRBC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12CBRC = 80
- Bayer12 format - one channel in one surface with interleaved CBRC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatBayer12CCCC = 81
- Bayer12 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- cudaEglColorFormatY = 82
- Color format for single Y plane.
- cudaEglColorFormatYUV420SemiPlanar_2020 = 83
- Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU420SemiPlanar_2020 = 84
- Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYUV420Planar_2020 = 85
- Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU420Planar_2020 = 86
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYUV420SemiPlanar_709 = 87
- Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU420SemiPlanar_709 = 88
- Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYUV420Planar_709 = 89
- Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatYVU420Planar_709 = 90
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_420SemiPlanar_709 = 91
- Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = 92
- Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = 93
- Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatY10V10U10_422SemiPlanar = 94
- Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatY10V10U10_422SemiPlanar_709 = 95
- Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- cudaEglColorFormatY_ER = 96
- Extended Range Color format for single Y plane.
- cudaEglColorFormatY_709_ER = 97
- Extended Range Color format for single Y plane.
- cudaEglColorFormatY10_ER = 98
- Extended Range Color format for single Y10 plane.
- cudaEglColorFormatY10_709_ER = 99
- Extended Range Color format for single Y10 plane.
- cudaEglColorFormatY12_ER = 100
- Extended Range Color format for single Y12 plane.
- cudaEglColorFormatY12_709_ER = 101
- Extended Range Color format for single Y12 plane.
- cudaEglColorFormatYUVA = 102
- Y, U, V, A four channels in one surface, interleaved as AVUY.
- cudaEglColorFormatYVYU = 104
- Y, U, V in one surface, interleaved as YVYU in one channel.
- cudaEglColorFormatVYUY = 105
- Y, U, V in one surface, interleaved as VYUY in one channel.
- cudaEglColorFormatY10V10U10_420SemiPlanar_ER = 106
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = 107
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY10V10U10_444SemiPlanar_ER = 108
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = 109
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatY12V12U12_420SemiPlanar_ER = 110
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = 111
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- cudaEglColorFormatY12V12U12_444SemiPlanar_ER = 112
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = 113
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- cudaEglColorFormatUYVY709 = 114
- Y, U, V in one surface, interleaved as UYVY in one channel.
- cudaEglColorFormatUYVY709_ER = 115
- Extended Range Y, U, V in one surface, interleaved as UYVY in one channel.
- cudaEglColorFormatUYVY2020 = 116
- Y, U, V in one surface, interleaved as UYVY in one channel.
- enum cudaEglFrameType
-
CUDA EglFrame类型 - 数组或指针
数值
- cudaEglFrameTypeArray = 0
- Frame type CUDA array
- cudaEglFrameTypePitch = 1
- Frame type CUDA pointer
- enum cudaEglResourceLocationFlags
-
资源位置标志 - 系统内存(sysmem)或显存(vidmem)
对于iGPU上的CUDA上下文,由于视频内存和系统内存是等效的——这些标志不会对执行产生影响。
对于dGPU上的CUDA上下文,应用程序可以使用标志cudaEglResourceLocationFlags来提示所需的位置。
cudaEglResourceLocationSysmem - 帧数据驻留在系统内存中以便CUDA访问。
cudaEglResourceLocationVidmem - 帧数据驻留在专用视频内存中,供CUDA访问。
如果帧是在不同的内存上生成的,可能会由于新的分配和数据迁移而产生额外的延迟。
数值
- cudaEglResourceLocationSysmem = 0x00
- Resource location sysmem
- cudaEglResourceLocationVidmem = 0x01
- Resource location vidmem
- enum cudaError
-
CUDA错误类型
数值
- cudaSuccess = 0
- The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see cudaEventQuery() and cudaStreamQuery()).
- cudaErrorInvalidValue = 1
- This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
- cudaErrorMemoryAllocation = 2
- The API call failed because it was unable to allocate enough memory or other resources to perform the requested operation.
- cudaErrorInitializationError = 3
- The API call failed because the CUDA driver and runtime could not be initialized.
- cudaErrorCudartUnloading = 4
- This indicates that a CUDA Runtime API call cannot be executed because it is being called during process shut down, at a point in time after CUDA driver has been unloaded.
- cudaErrorProfilerDisabled = 5
- This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler.
- cudaErrorProfilerNotInitialized = 6
-
已弃用
从CUDA 5.0开始,此错误返回已被弃用。现在即使未初始化,尝试通过cudaProfilerStart或cudaProfilerStop启用/禁用性能分析也不再被视为错误。
- cudaErrorProfilerAlreadyStarted = 7
-
已弃用
自CUDA 5.0起,此错误返回已被弃用。当性能分析已启用时调用cudaProfilerStart()不再被视为错误。
- cudaErrorProfilerAlreadyStopped = 8
-
已弃用
从CUDA 5.0开始,此错误返回已被弃用。当性能分析已禁用时调用cudaProfilerStop()不再被视为错误。
- cudaErrorInvalidConfiguration = 9
- This indicates that a kernel launch is requesting resources that can never be satisfied by the current device. Requesting more shared memory per block than the device supports will trigger this error, as will requesting too many threads or blocks. See cudaDeviceProp for more device limitations.
- cudaErrorInvalidPitchValue = 12
- This indicates that one or more of the pitch-related parameters passed to the API call is not within the acceptable range for pitch.
- cudaErrorInvalidSymbol = 13
- This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier.
- cudaErrorInvalidHostPointer = 16
-
已弃用
自 CUDA 10.1 起,此错误返回已被弃用。
这表明传递给API调用的至少一个主机指针不是有效的主机指针。
- cudaErrorInvalidDevicePointer = 17
-
已弃用
自 CUDA 10.1 起,此错误返回已被弃用。
这表明至少有一个传递给API调用的设备指针不是有效的设备指针。
- cudaErrorInvalidTexture = 18
- This indicates that the texture passed to the API call is not a valid texture.
- cudaErrorInvalidTextureBinding = 19
- This indicates that the texture binding is not valid. This occurs if you call cudaGetTextureAlignmentOffset() with an unbound texture.
- cudaErrorInvalidChannelDescriptor = 20
- This indicates that the channel descriptor passed to the API call is not valid. This occurs if the format is not one of the formats specified by cudaChannelFormatKind, or if one of the dimensions is invalid.
- cudaErrorInvalidMemcpyDirection = 21
- This indicates that the direction of the memcpy passed to the API call is not one of the types specified by cudaMemcpyKind.
- cudaErrorAddressOfConstant = 22
-
已弃用
自CUDA 3.1起,此错误返回已被弃用。现在运行时可以通过cudaGetSymbolAddress()获取常量内存中变量的地址。
这表明用户获取了一个常量变量的地址,这一操作在CUDA 3.1版本发布之前是被禁止的。
- cudaErrorTextureFetchFailed = 23
-
已弃用
自CUDA 3.1版本起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
这表明无法执行纹理获取操作。这之前用于设备模拟纹理操作。
- cudaErrorTextureNotBound = 24
-
已弃用
自CUDA 3.1起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
这表明纹理未被绑定以供访问。这曾用于设备模拟纹理操作。
- cudaErrorSynchronizationError = 25
-
已弃用
自CUDA 3.1版本起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
这表明同步操作失败。此前该错误用于某些设备模拟功能。
- cudaErrorInvalidFilterSetting = 26
- This indicates that a non-float texture was being accessed with linear filtering. This is not supported by CUDA.
- cudaErrorInvalidNormSetting = 27
- This indicates that an attempt was made to read an unsupported data type as a normalized float. This is not supported by CUDA.
- cudaErrorMixedDeviceExecution = 28
-
已弃用
自CUDA 3.1版本起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
不允许混合使用设备代码和设备仿真代码。
- cudaErrorNotYetImplemented = 31
-
已弃用
自 CUDA 4.1 起,此错误返回已被弃用。
这表明该API调用尚未实现。CUDA的生产版本永远不会返回此错误。
- cudaErrorMemoryValueTooLarge = 32
-
已弃用
自CUDA 3.1版本起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
这表明一个模拟设备指针超出了32位地址范围。
- cudaErrorStubLibrary = 34
- This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
- cudaErrorInsufficientDriver = 35
- This indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library. This is not a supported configuration. Users should install an updated NVIDIA display driver to allow the application to run.
- cudaErrorCallRequiresNewerDriver = 36
- This indicates that the API call requires a newer CUDA driver than the one currently installed. Users should install an updated NVIDIA CUDA driver to allow the API call to succeed.
- cudaErrorInvalidSurface = 37
- This indicates that the surface passed to the API call is not a valid surface.
- cudaErrorDuplicateVariableName = 43
- This indicates that multiple global or constant variables (across separate CUDA source files in the application) share the same string name.
- cudaErrorDuplicateTextureName = 44
- This indicates that multiple textures (across separate CUDA source files in the application) share the same string name.
- cudaErrorDuplicateSurfaceName = 45
- This indicates that multiple surfaces (across separate CUDA source files in the application) share the same string name.
- cudaErrorDevicesUnavailable = 46
- This indicates that all CUDA devices are busy or unavailable at the current time. Devices are often busy/unavailable due to use of cudaComputeModeProhibited, cudaComputeModeExclusiveProcess, or when long running CUDA kernels have filled up the GPU and are blocking new work from starting. They can also be unavailable due to memory constraints on a device that already has active CUDA work being performed.
- cudaErrorIncompatibleDriverContext = 49
- This indicates that the current context is not compatible with this the CUDA Runtime. This can only occur if you are using CUDA Runtime/Driver interoperability and have created an existing Driver context using the driver API. The Driver context may be incompatible either because the Driver context was created using an older version of the API, because the Runtime API call expects a primary driver context and the Driver context is not primary, or because the Driver context has been destroyed. Please see 交互 with the CUDA Driver API" for more information.
- cudaErrorMissingConfiguration = 52
- The device function being invoked (usually via cudaLaunchKernel()) was not previously configured via the cudaConfigureCall() function.
- cudaErrorPriorLaunchFailure = 53
-
已弃用
自CUDA 3.1版本起,此错误返回已被弃用。设备仿真模式在CUDA 3.1发布时已移除。
这表明之前的内核启动失败。这曾用于内核启动的设备仿真。
- cudaErrorLaunchMaxDepthExceeded = 65
- This error indicates that a device runtime grid launch did not occur because the depth of the child grid would exceed the maximum supported number of nested grid launches.
- cudaErrorLaunchFileScopedTex = 66
- This error indicates that a grid launch did not occur because the kernel uses file-scoped textures which are unsupported by the device runtime. Kernels launched via the device runtime only support textures created with the Texture Object API's.
- cudaErrorLaunchFileScopedSurf = 67
- This error indicates that a grid launch did not occur because the kernel uses file-scoped surfaces which are unsupported by the device runtime. Kernels launched via the device runtime only support surfaces created with the Surface Object API's.
- cudaErrorSyncDepthExceeded = 68
- This error indicates that a call to cudaDeviceSynchronize made from the device runtime failed because the call was made at grid depth greater than than either the default (2 levels of grids) or user specified device limit cudaLimitDevRuntimeSyncDepth. To be able to synchronize on launched grids at a greater depth successfully, the maximum nested depth at which cudaDeviceSynchronize will be called must be specified with the cudaLimitDevRuntimeSyncDepth limit to the cudaDeviceSetLimit api before the host-side launch of a kernel using the device runtime. Keep in mind that additional levels of sync depth require the runtime to reserve large amounts of device memory that cannot be used for user allocations. Note that cudaDeviceSynchronize made from device runtime is only supported on devices of compute capability < 9.0.
- cudaErrorLaunchPendingCountExceeded = 69
- This error indicates that a device runtime grid launch failed because the launch would exceed the limit cudaLimitDevRuntimePendingLaunchCount. For this launch to proceed successfully, cudaDeviceSetLimit must be called to set the cudaLimitDevRuntimePendingLaunchCount to be higher than the upper bound of outstanding launches that can be issued to the device runtime. Keep in mind that raising the limit of pending device runtime launches will require the runtime to reserve device memory that cannot be used for user allocations.
- cudaErrorInvalidDeviceFunction = 98
- The requested device function does not exist or is not compiled for the proper device architecture.
- cudaErrorNoDevice = 100
- This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
- cudaErrorInvalidDevice = 101
- This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for the specified device.
- cudaErrorDeviceNotLicensed = 102
- This indicates that the device doesn't have a valid Grid License.
- cudaErrorSoftwareValidityNotEstablished = 103
- By default, the CUDA runtime may perform a minimal set of self-tests, as well as CUDA driver tests, to establish the validity of both. Introduced in CUDA 11.2, this error return indicates that at least one of these tests has failed and the validity of either the runtime or the driver could not be established.
- cudaErrorStartupFailure = 127
- This indicates an internal startup failure in the CUDA runtime.
- cudaErrorInvalidKernelImage = 200
- This indicates that the device kernel image is invalid.
- cudaErrorDeviceUninitialized = 201
- This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call is not a valid handle (such as a context that has had cuCtxDestroy() invoked on it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). See cuCtxGetApiVersion() for more details.
- cudaErrorMapBufferObjectFailed = 205
- This indicates that the buffer object could not be mapped.
- cudaErrorUnmapBufferObjectFailed = 206
- This indicates that the buffer object could not be unmapped.
- cudaErrorArrayIsMapped = 207
- This indicates that the specified array is currently mapped and thus cannot be destroyed.
- cudaErrorAlreadyMapped = 208
- This indicates that the resource is already mapped.
- cudaErrorNoKernelImageForDevice = 209
- This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration.
- cudaErrorAlreadyAcquired = 210
- This indicates that a resource has already been acquired.
- cudaErrorNotMapped = 211
- This indicates that a resource is not mapped.
- cudaErrorNotMappedAsArray = 212
- This indicates that a mapped resource is not available for access as an array.
- cudaErrorNotMappedAsPointer = 213
- This indicates that a mapped resource is not available for access as a pointer.
- cudaErrorECCUncorrectable = 214
- This indicates that an uncorrectable ECC error was detected during execution.
- cudaErrorUnsupportedLimit = 215
- This indicates that the cudaLimit passed to the API call is not supported by the active device.
- cudaErrorDeviceAlreadyInUse = 216
- This indicates that a call tried to access an exclusive-thread device that is already in use by a different thread.
- cudaErrorPeerAccessUnsupported = 217
- This error indicates that P2P access is not supported across the given devices.
- cudaErrorInvalidPtx = 218
- A PTX compilation failed. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
- cudaErrorInvalidGraphicsContext = 219
- This indicates an error with the OpenGL or DirectX context.
- cudaErrorNvlinkUncorrectable = 220
- This indicates that an uncorrectable NVLink error was detected during the execution.
- cudaErrorJitCompilerNotFound = 221
- This indicates that the PTX JIT compiler library was not found. The JIT Compiler library is used for PTX compilation. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
- cudaErrorUnsupportedPtxVersion = 222
- This indicates that the provided PTX was compiled with an unsupported toolchain. The most common reason for this, is the PTX was generated by a compiler newer than what is supported by the CUDA driver and PTX JIT compiler.
- cudaErrorJitCompilationDisabled = 223
- This indicates that the JIT compilation was disabled. The JIT compilation compiles PTX. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
- cudaErrorUnsupportedExecAffinity = 224
- This indicates that the provided execution affinity is not supported by the device.
- cudaErrorUnsupportedDevSideSync = 225
- This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize.
- cudaErrorContained = 226
- This indicates that an exception occurred on the device that is now contained by the GPU's error containment capability. Common causes are - a. Certain types of invalid accesses of peer GPU memory over nvlink b. Certain classes of hardware errors This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorInvalidSource = 300
- This indicates that the device kernel source is invalid.
- cudaErrorFileNotFound = 301
- This indicates that the file specified was not found.
- cudaErrorSharedObjectSymbolNotFound = 302
- This indicates that a link to a shared object failed to resolve.
- cudaErrorSharedObjectInitFailed = 303
- This indicates that initialization of a shared object failed.
- cudaErrorOperatingSystem = 304
- This error indicates that an OS call failed.
- cudaErrorInvalidResourceHandle = 400
- This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like cudaStream_t and cudaEvent_t.
- cudaErrorIllegalState = 401
- This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
- cudaErrorLossyQuery = 402
- This indicates an attempt was made to introspect an object in a way that would discard semantically important information. This is either due to the object using funtionality newer than the API version used to introspect it or omission of optional return arguments.
- cudaErrorSymbolNotFound = 500
- This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and surface names.
- cudaErrorNotReady = 600
- This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than cudaSuccess (which indicates completion). Calls that may return this value include cudaEventQuery() and cudaStreamQuery().
- cudaErrorIllegalAddress = 700
- The device encountered a load or store instruction on an invalid memory address. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorLaunchOutOfResources = 701
- This indicates that a launch did not occur because it did not have appropriate resources. Although this error is similar to cudaErrorInvalidConfiguration, this error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count.
- cudaErrorLaunchTimeout = 702
- This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device property kernelExecTimeoutEnabled for more information. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorLaunchIncompatibleTexturing = 703
- This error indicates a kernel launch that uses an incompatible texturing mode.
- cudaErrorPeerAccessAlreadyEnabled = 704
- This error indicates that a call to cudaDeviceEnablePeerAccess() is trying to re-enable peer addressing on from a context which has already had peer addressing enabled.
- cudaErrorPeerAccessNotEnabled = 705
- This error indicates that cudaDeviceDisablePeerAccess() is trying to disable peer addressing which has not been enabled yet via cudaDeviceEnablePeerAccess().
- cudaErrorSetOnActiveProcess = 708
- This indicates that the user has called cudaSetValidDevices(), cudaSetDeviceFlags(), cudaD3D9SetDirect3DDevice(), cudaD3D10SetDirect3DDevice, cudaD3D11SetDirect3DDevice(), or cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by calling non-device management operations (allocating memory and launching kernels are examples of non-device management operations). This error can also be returned if using runtime/driver interoperability and there is an existing CUcontext active on the host thread.
- cudaErrorContextIsDestroyed = 709
- This error indicates that the context current to the calling thread has been destroyed using cuCtxDestroy, or is a primary context which has not yet been initialized.
- cudaErrorAssert = 710
- An assert triggered in device code during kernel execution. The device cannot be used again. All existing allocations are invalid. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorTooManyPeers = 711
- This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to cudaEnablePeerAccess().
- cudaErrorHostMemoryAlreadyRegistered = 712
- This error indicates that the memory range passed to cudaHostRegister() has already been registered.
- cudaErrorHostMemoryNotRegistered = 713
- This error indicates that the pointer passed to cudaHostUnregister() does not correspond to any currently registered memory region.
- cudaErrorHardwareStackError = 714
- Device encountered an error in the call stack during kernel execution, possibly due to stack corruption or exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorIllegalInstruction = 715
- The device encountered an illegal instruction during kernel execution This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorMisalignedAddress = 716
- The device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorInvalidAddressSpace = 717
- While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorInvalidPc = 718
- The device encountered an invalid program counter. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorLaunchFailure = 719
- An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorCooperativeLaunchTooLarge = 720
- This error indicates that the number of blocks launched per grid for a kernel that was launched via either cudaLaunchCooperativeKernel or cudaLaunchCooperativeKernelMultiDevice exceeds the maximum number of blocks as allowed by cudaOccupancyMaxActiveBlocksPerMultiprocessor or cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as specified by the device attribute cudaDevAttrMultiProcessorCount.
- cudaErrorTensorMemoryLeak = 721
- An exception occurred on the device while exiting a kernel using tensor memory: the tensor memory was not completely deallocated. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorNotPermitted = 800
- This error indicates the attempted operation is not permitted.
- cudaErrorNotSupported = 801
- This error indicates the attempted operation is not supported on the current system or device.
- cudaErrorSystemNotReady = 802
- This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide.
- cudaErrorSystemDriverMismatch = 803
- This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility documentation for supported versions.
- cudaErrorCompatNotSupportedOnDevice = 804
- This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES environment variable.
- cudaErrorMpsConnectionFailed = 805
- This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
- cudaErrorMpsRpcFailure = 806
- This error indicates that the remote procedural call between the MPS server and the MPS client failed.
- cudaErrorMpsServerNotReady = 807
- This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the process of recovering from a fatal failure.
- cudaErrorMpsMaxClientsReached = 808
- This error indicates that the hardware resources required to create MPS client have been exhausted.
- cudaErrorMpsMaxConnectionsReached = 809
- This error indicates the the hardware resources required to device connections have been exhausted.
- cudaErrorMpsClientTerminated = 810
- This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorCdpNotSupported = 811
- This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
- cudaErrorCdpVersionMismatch = 812
- This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
- cudaErrorStreamCaptureUnsupported = 900
- The operation is not permitted when the stream is capturing.
- cudaErrorStreamCaptureInvalidated = 901
- The current capture sequence on the stream has been invalidated due to a previous error.
- cudaErrorStreamCaptureMerge = 902
- The operation would have resulted in a merge of two independent capture sequences.
- cudaErrorStreamCaptureUnmatched = 903
- The capture was not initiated in this stream.
- cudaErrorStreamCaptureUnjoined = 904
- The capture sequence contains a fork that was not joined to the primary stream.
- cudaErrorStreamCaptureIsolation = 905
- A dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary.
- cudaErrorStreamCaptureImplicit = 906
- The operation would have resulted in a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
- cudaErrorCapturedEvent = 907
- The operation is not permitted on an event which was last recorded in a capturing stream.
- cudaErrorStreamCaptureWrongThread = 908
- A stream capture sequence not initiated with the cudaStreamCaptureModeRelaxed argument to cudaStreamBeginCapture was passed to cudaStreamEndCapture in a different thread.
- cudaErrorTimeout = 909
- This indicates that the wait operation has timed out.
- cudaErrorGraphExecUpdateFailure = 910
- This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph update.
- cudaErrorExternalDevice = 911
- This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- cudaErrorInvalidClusterSize = 912
- This indicates that a kernel launch error has occurred due to cluster misconfiguration.
- cudaErrorFunctionNotLoaded = 913
- Indiciates a function handle is not loaded when calling an API that requires a loaded function.
- cudaErrorInvalidResourceType = 914
- This error indicates one or more resources passed in are not valid resource types for the operation.
- cudaErrorInvalidResourceConfiguration = 915
- This error indicates one or more resources are insufficient or non-applicable for the operation.
- cudaErrorUnknown = 999
- This indicates that an unknown internal error has occurred.
- cudaErrorApiFailureBase = 10000
- enum cudaExternalMemoryHandleType
-
外部内存句柄类型
数值
- cudaExternalMemoryHandleTypeOpaqueFd = 1
- Handle is an opaque file descriptor
- cudaExternalMemoryHandleTypeOpaqueWin32 = 2
- Handle is an opaque shared NT handle
- cudaExternalMemoryHandleTypeOpaqueWin32Kmt = 3
- Handle is an opaque, globally shared handle
- cudaExternalMemoryHandleTypeD3D12Heap = 4
- Handle is a D3D12 heap object
- cudaExternalMemoryHandleTypeD3D12Resource = 5
- Handle is a D3D12 committed resource
- cudaExternalMemoryHandleTypeD3D11Resource = 6
- Handle is a shared NT handle to a D3D11 resource
- cudaExternalMemoryHandleTypeD3D11ResourceKmt = 7
- Handle is a globally shared handle to a D3D11 resource
- cudaExternalMemoryHandleTypeNvSciBuf = 8
- Handle is an NvSciBuf object
- enum cudaExternalSemaphoreHandleType
-
外部信号量句柄类型
数值
- cudaExternalSemaphoreHandleTypeOpaqueFd = 1
- Handle is an opaque file descriptor
- cudaExternalSemaphoreHandleTypeOpaqueWin32 = 2
- Handle is an opaque shared NT handle
- cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3
- Handle is an opaque, globally shared handle
- cudaExternalSemaphoreHandleTypeD3D12Fence = 4
- Handle is a shared NT handle referencing a D3D12 fence object
- cudaExternalSemaphoreHandleTypeD3D11Fence = 5
- Handle is a shared NT handle referencing a D3D11 fence object
- cudaExternalSemaphoreHandleTypeNvSciSync = 6
- Opaque handle to NvSciSync Object
- cudaExternalSemaphoreHandleTypeKeyedMutex = 7
- Handle is a shared NT handle referencing a D3D11 keyed mutex object
- cudaExternalSemaphoreHandleTypeKeyedMutexKmt = 8
- Handle is a shared KMT handle referencing a D3D11 keyed mutex object
- cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9
- Handle is an opaque handle file descriptor referencing a timeline semaphore
- cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10
- Handle is an opaque handle file descriptor referencing a timeline semaphore
- enum cudaFlushGPUDirectRDMAWritesOptions
-
设备支持的CUDA GPUDirect RDMA写入刷新API
数值
- cudaFlushGPUDirectRDMAWritesOptionHost = 1<<0
- cudaDeviceFlushGPUDirectRDMAWrites() and its CUDA Driver API counterpart are supported on the device.
- cudaFlushGPUDirectRDMAWritesOptionMemOps = 1<<1
- The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the CUDA device.
- enum cudaFlushGPUDirectRDMAWritesScope
-
CUDA GPUDirect RDMA 刷新写入作用域
数值
- cudaFlushGPUDirectRDMAWritesToOwner = 100
- Blocks until remote writes are visible to the CUDA device context owning the data.
- cudaFlushGPUDirectRDMAWritesToAllDevices = 200
- Blocks until remote writes are visible to all CUDA device contexts.
- enum cudaFlushGPUDirectRDMAWritesTarget
-
CUDA GPUDirect RDMA 刷新写入目标
数值
- cudaFlushGPUDirectRDMAWritesTargetCurrentDevice
- Sets the target for cudaDeviceFlushGPUDirectRDMAWrites() to the currently active CUDA device context.
- enum cudaFuncAttribute
-
可以使用cudaFuncSetAttribute设置的CUDA函数属性
数值
- cudaFuncAttributeMaxDynamicSharedMemorySize = 8
- Maximum dynamic shared memory size
- cudaFuncAttributePreferredSharedMemoryCarveout = 9
- Preferred shared memory-L1 cache split
- cudaFuncAttributeClusterDimMustBeSet = 10
- Indicator to enforce valid cluster dimension specification on kernel launch
- cudaFuncAttributeRequiredClusterWidth = 11
- Required cluster width
- cudaFuncAttributeRequiredClusterHeight = 12
- Required cluster height
- cudaFuncAttributeRequiredClusterDepth = 13
- Required cluster depth
- cudaFuncAttributeNonPortableClusterSizeAllowed = 14
- Whether non-portable cluster scheduling policy is supported
- cudaFuncAttributeClusterSchedulingPolicyPreference = 15
- Required cluster scheduling policy preference
- cudaFuncAttributeMax
- enum cudaFuncCache
-
CUDA函数缓存配置
数值
- cudaFuncCachePreferNone = 0
- Default function cache configuration, no preference
- cudaFuncCachePreferShared = 1
- Prefer larger shared memory and smaller L1 cache
- cudaFuncCachePreferL1 = 2
- Prefer larger L1 cache and smaller shared memory
- cudaFuncCachePreferEqual = 3
- Prefer equal size L1 cache and shared memory
- enum cudaGPUDirectRDMAWritesOrdering
-
CUDA GPUDirect RDMA 设备刷新写入顺序特性
数值
- cudaGPUDirectRDMAWritesOrderingNone = 0
- The device does not natively support ordering of GPUDirect RDMA writes. cudaFlushGPUDirectRDMAWrites() can be leveraged if supported.
- cudaGPUDirectRDMAWritesOrderingOwner = 100
- Natively, the device can consistently consume GPUDirect RDMA writes, although other CUDA devices may not.
- cudaGPUDirectRDMAWritesOrderingAllDevices = 200
- Any CUDA device in the system can consistently consume GPUDirect RDMA writes to this device.
- enum cudaGetDriverEntryPointFlags
-
用于指定搜索选项的标志,与cudaGetDriverEntryPoint一起使用。更多详情请参阅cuGetProcAddress
数值
- cudaEnableDefault = 0x0
- Default search mode for driver symbols.
- cudaEnableLegacyStream = 0x1
- Search for legacy versions of driver symbols.
- cudaEnablePerThreadDefaultStream = 0x2
- Search for per-thread versions of driver symbols.
- enum cudaGraphConditionalNodeType
-
CUDA条件节点类型
数值
- cudaGraphCondTypeIf = 0
- Conditional 'if/else' Node. Body[0] executed if condition is non-zero. If size == 2, an optional ELSE graph is created and this is executed if the condition is zero.
- cudaGraphCondTypeWhile = 1
- Conditional 'while' Node. Body executed repeatedly while condition value is non-zero.
- cudaGraphCondTypeSwitch = 2
- Conditional 'switch' Node. Body[n] is executed once, where 'n' is the value of the condition. If the condition does not match a body index, no body is launched.
- enum cudaGraphDebugDotFlags
-
CUDA Graph调试写入选项
数值
- cudaGraphDebugDotFlagsVerbose = 1<<0
- Output all debug data as if every debug flag is enabled
- cudaGraphDebugDotFlagsKernelNodeParams = 1<<2
- Adds cudaKernelNodeParams to output
- cudaGraphDebugDotFlagsMemcpyNodeParams = 1<<3
- Adds cudaMemcpy3DParms to output
- cudaGraphDebugDotFlagsMemsetNodeParams = 1<<4
- Adds cudaMemsetParams to output
- cudaGraphDebugDotFlagsHostNodeParams = 1<<5
- Adds cudaHostNodeParams to output
- cudaGraphDebugDotFlagsEventNodeParams = 1<<6
- Adds cudaEvent_t handle from record and wait nodes to output
- cudaGraphDebugDotFlagsExtSemasSignalNodeParams = 1<<7
- Adds cudaExternalSemaphoreSignalNodeParams values to output
- cudaGraphDebugDotFlagsExtSemasWaitNodeParams = 1<<8
- Adds cudaExternalSemaphoreWaitNodeParams to output
- cudaGraphDebugDotFlagsKernelNodeAttributes = 1<<9
- Adds cudaKernelNodeAttrID values to output
- cudaGraphDebugDotFlagsHandles = 1<<10
- Adds node handles and every kernel function handle to output
- cudaGraphDebugDotFlagsConditionalNodeParams = 1<<15
- Adds cudaConditionalNodeParams to output
- enum cudaGraphDependencyType
-
可作为cudaGraphEdgeData一部分应用于图边的类型注解。
数值
- cudaGraphDependencyTypeDefault = 0
- This is an ordinary dependency.
- cudaGraphDependencyTypeProgrammatic = 1
- This dependency type allows the downstream node to use cudaGridDependencySynchronize(). It may only be used between kernel nodes, and must be used with either the cudaGraphKernelNodePortProgrammatic or cudaGraphKernelNodePortLaunchCompletion outgoing port.
- enum cudaGraphExecUpdateResult
-
CUDA Graph 更新错误类型
数值
- cudaGraphExecUpdateSuccess = 0x0
- The update succeeded
- cudaGraphExecUpdateError = 0x1
- The update failed for an unexpected reason which is described in the return value of the function
- cudaGraphExecUpdateErrorTopologyChanged = 0x2
- The update failed because the topology changed
- cudaGraphExecUpdateErrorNodeTypeChanged = 0x3
- The update failed because a node type changed
- cudaGraphExecUpdateErrorFunctionChanged = 0x4
- The update failed because the function of a kernel node changed (CUDA driver < 11.2)
- cudaGraphExecUpdateErrorParametersChanged = 0x5
- The update failed because the parameters changed in a way that is not supported
- cudaGraphExecUpdateErrorNotSupported = 0x6
- The update failed because something about the node is not supported
- cudaGraphExecUpdateErrorUnsupportedFunctionChange = 0x7
- The update failed because the function of a kernel node changed in an unsupported way
- cudaGraphExecUpdateErrorAttributesChanged = 0x8
- The update failed because the node attributes changed in a way that is not supported
- enum cudaGraphInstantiateFlags
-
用于实例化图表的标志
数值
- cudaGraphInstantiateFlagAutoFreeOnLaunch = 1
- Automatically free memory allocated in a graph before relaunching.
- cudaGraphInstantiateFlagUpload = 2
- Automatically upload the graph after instantiation. Only supported by cudaGraphInstantiateWithParams. The upload will be performed using the stream provided in instantiateParams.
- cudaGraphInstantiateFlagDeviceLaunch = 4
- Instantiate the graph to be launchable from the device. This flag can only be used on platforms which support unified addressing. This flag cannot be used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch.
- cudaGraphInstantiateFlagUseNodePriority = 8
- Run the graph using the per-node priority attributes rather than the priority of the stream it is launched into.
- enum cudaGraphInstantiateResult
-
图实例化结果
数值
- cudaGraphInstantiateSuccess = 0
- Instantiation succeeded
- cudaGraphInstantiateError = 1
- Instantiation failed for an unexpected reason which is described in the return value of the function
- cudaGraphInstantiateInvalidStructure = 2
- Instantiation failed due to invalid structure, such as cycles
- cudaGraphInstantiateNodeOperationNotSupported = 3
- Instantiation for device launch failed because the graph contained an unsupported operation
- cudaGraphInstantiateMultipleDevicesNotSupported = 4
- Instantiation for device launch failed due to the nodes belonging to different contexts
- cudaGraphInstantiateConditionalHandleUnused = 5
- One or more conditional handles are not associated with conditional nodes
- enum cudaGraphKernelNodeField
-
指定从设备执行多个节点更新时要更新的字段
数值
- cudaGraphKernelNodeFieldInvalid = 0
- Invalid field
- cudaGraphKernelNodeFieldGridDim
- Grid dimension update
- cudaGraphKernelNodeFieldParam
- Kernel parameter update
- cudaGraphKernelNodeFieldEnabled
- Node enable/disable
- enum cudaGraphMemAttributeType
-
图形内存属性
数值
- cudaGraphMemAttrUsedMemCurrent = 0x0
- (value type = cuuint64_t) Amount of memory, in bytes, currently associated with graphs.
- cudaGraphMemAttrUsedMemHigh = 0x1
- (value type = cuuint64_t) High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero.
- cudaGraphMemAttrReservedMemCurrent = 0x2
- (value type = cuuint64_t) Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
- cudaGraphMemAttrReservedMemHigh = 0x3
- (value type = cuuint64_t) High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
- enum cudaGraphNodeType
-
CUDA图节点类型
数值
- cudaGraphNodeTypeKernel = 0x00
- GPU kernel node
- cudaGraphNodeTypeMemcpy = 0x01
- Memcpy node
- cudaGraphNodeTypeMemset = 0x02
- Memset node
- cudaGraphNodeTypeHost = 0x03
- Host (executable) node
- cudaGraphNodeTypeGraph = 0x04
- Node which executes an embedded graph
- cudaGraphNodeTypeEmpty = 0x05
- Empty (no-op) node
- cudaGraphNodeTypeWaitEvent = 0x06
- External event wait node
- cudaGraphNodeTypeEventRecord = 0x07
- External event record node
- cudaGraphNodeTypeExtSemaphoreSignal = 0x08
- External semaphore signal node
- cudaGraphNodeTypeExtSemaphoreWait = 0x09
- External semaphore wait node
- cudaGraphNodeTypeMemAlloc = 0x0a
- Memory allocation node
- cudaGraphNodeTypeMemFree = 0x0b
- Memory free node
- cudaGraphNodeTypeConditional = 0x0d
- Conditional nodeMay be used to implement a conditional execution path or loop inside of a graph. The graph(s) contained within the body of the conditional node can be selectively executed or iterated upon based on the value of a conditional variable.Handles must be created in advance of creating the node using cudaGraphConditionalHandleCreate.The following restrictions apply to graphs which contain conditional nodes: The graph cannot be used in a child node. Only one instantiation of the graph may exist at any point in time. The graph cannot be cloned.To set the control value, supply a default value when creating the handle and/or call cudaGraphSetConditional from device code.
- cudaGraphNodeTypeCount
- enum cudaGraphicsCubeFace
-
CUDA图形互操作数组的立方体贴图索引
数值
- cudaGraphicsCubeFacePositiveX = 0x00
- Positive X face of cubemap
- cudaGraphicsCubeFaceNegativeX = 0x01
- Negative X face of cubemap
- cudaGraphicsCubeFacePositiveY = 0x02
- Positive Y face of cubemap
- cudaGraphicsCubeFaceNegativeY = 0x03
- Negative Y face of cubemap
- cudaGraphicsCubeFacePositiveZ = 0x04
- Positive Z face of cubemap
- cudaGraphicsCubeFaceNegativeZ = 0x05
- Negative Z face of cubemap
- enum cudaGraphicsMapFlags
-
CUDA图形互操作映射标志
数值
- cudaGraphicsMapFlagsNone = 0
- Default; Assume resource can be read/written
- cudaGraphicsMapFlagsReadOnly = 1
- CUDA will not write to this resource
- cudaGraphicsMapFlagsWriteDiscard = 2
- CUDA will only write to and will not read from this resource
- enum cudaGraphicsRegisterFlags
-
CUDA图形互操作注册标志
数值
- cudaGraphicsRegisterFlagsNone = 0
- Default
- cudaGraphicsRegisterFlagsReadOnly = 1
- CUDA will not write to this resource
- cudaGraphicsRegisterFlagsWriteDiscard = 2
- CUDA will only write to and will not read from this resource
- cudaGraphicsRegisterFlagsSurfaceLoadStore = 4
- CUDA will bind this resource to a surface reference
- cudaGraphicsRegisterFlagsTextureGather = 8
- CUDA will perform texture gather operations on this resource
- enum cudaJitOption
-
在线编译器和链接器选项
数值
- cudaJitMaxRegisters = 0
- Max number of registers that a thread may use. Option type: unsigned int Applies to: compiler only
- cudaJitThreadsPerBlock = 1
- IN: Specifies minimum number of threads per block to target compilation for OUT: Returns the number of threads the compiler actually targeted. This restricts the resource utilization of the compiler (e.g. max registers) such that a block with the given number of threads should be able to launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory utilization. Option type: unsigned int Applies to: compiler only
- cudaJitWallTime = 2
- Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker Option type: float Applies to: compiler and linker
- cudaJitInfoLogBuffer = 3
- Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option cudaJitInfoLogBufferSizeBytes) Option type: char * Applies to: compiler and linker
- cudaJitInfoLogBufferSizeBytes = 4
- IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator) OUT: Amount of log buffer filled with messages Option type: unsigned int Applies to: compiler and linker
- cudaJitErrorLogBuffer = 5
- Pointer to a buffer in which to print any log messages that reflect errors (the buffer size is specified via option cudaJitErrorLogBufferSizeBytes) Option type: char * Applies to: compiler and linker
- cudaJitErrorLogBufferSizeBytes = 6
- IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator) OUT: Amount of log buffer filled with messages Option type: unsigned int Applies to: compiler and linker
- cudaJitOptimizationLevel = 7
- Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations. Option type: unsigned int Applies to: compiler only
- cudaJitFallbackStrategy = 10
- Specifies choice of fallback strategy if matching cubin is not found. Choice is based on supplied cudaJit_Fallback. Option type: unsigned int for enumerated type cudaJit_Fallback Applies to: compiler only
- cudaJitGenerateDebugInfo = 11
- Specifies whether to create debug information in output (-g) (0: false, default) Option type: int Applies to: compiler and linker
- cudaJitLogVerbose = 12
- Generate verbose log messages (0: false, default) Option type: int Applies to: compiler and linker
- cudaJitGenerateLineInfo = 13
- Generate line number information (-lineinfo) (0: false, default) Option type: int Applies to: compiler only
- cudaJitCacheMode = 14
- Specifies whether to enable caching explicitly (-dlcm) Choice is based on supplied cudaJit_CacheMode. Option type: unsigned int for enumerated type cudaJit_CacheMode Applies to: compiler only
- cudaJitPositionIndependentCode = 30
- Generate position independent code (0: false) Option type: int Applies to: compiler only
- cudaJitMinCtaPerSm = 31
- This option hints to the JIT compiler the minimum number of CTAs from the kernel’s grid to be mapped to a SM. This option is ignored when used together with cudaJitMaxRegisters or cudaJitThreadsPerBlock. Optimizations based on this option need cudaJitMaxThreadsPerBlock to be specified as well. For kernels already using PTX directive .minnctapersm, this option will be ignored by default. Use cudaJitOverrideDirectiveValues to let this option take precedence over the PTX directive. Option type: unsigned int Applies to: compiler only
- cudaJitMaxThreadsPerBlock = 32
- Maximum number threads in a thread block, computed as the product of the maximum extent specifed for each dimension of the block. This limit is guaranteed not to be exeeded in any invocation of the kernel. Exceeding the the maximum number of threads results in runtime error or kernel launch failure. For kernels already using PTX directive .maxntid, this option will be ignored by default. Use cudaJitOverrideDirectiveValues to let this option take precedence over the PTX directive. Option type: int Applies to: compiler only
- cudaJitOverrideDirectiveValues = 33
- This option lets the values specified using cudaJitMaxRegisters, cudaJitThreadsPerBlock, cudaJitMaxThreadsPerBlock and cudaJitMinCtaPerSm take precedence over any PTX directives. (0: Disable, default; 1: Enable) Option type: int Applies to: compiler only
- enum cudaJit_CacheMode
-
dlcm的缓存模式
数值
- cudaJitCacheOptionNone = 0
- Compile with no -dlcm flag specified
- cudaJitCacheOptionCG
- Compile with L1 cache disabled
- cudaJitCacheOptionCA
- Compile with L1 cache enabled
- enum cudaJit_Fallback
-
Cubin匹配回退策略
数值
- cudaPreferPtx = 0
- Prefer to compile ptx if exact binary match not found
- cudaPreferBinary
- Prefer to fall back to compatible binary code if exact match not found
- enum cudaLaunchAttributeID
-
启动属性枚举;用作cudaLaunchAttribute的id字段
数值
- cudaLaunchAttributeIgnore = 0
- Ignored entry, for convenient composition
- cudaLaunchAttributeAccessPolicyWindow = 1
- Valid for streams, graph nodes, launches. See cudaLaunchAttributeValue::accessPolicyWindow.
- cudaLaunchAttributeCooperative = 2
- Valid for graph nodes, launches. See cudaLaunchAttributeValue::cooperative.
- cudaLaunchAttributeSynchronizationPolicy = 3
- Valid for streams. See cudaLaunchAttributeValue::syncPolicy.
- cudaLaunchAttributeClusterDimension = 4
- Valid for graph nodes, launches. See cudaLaunchAttributeValue::clusterDim.
- cudaLaunchAttributeClusterSchedulingPolicyPreference = 5
- Valid for graph nodes, launches. See cudaLaunchAttributeValue::clusterSchedulingPolicyPreference.
- cudaLaunchAttributeProgrammaticStreamSerialization = 6
- Valid for launches. Setting cudaLaunchAttributeValue::programmaticStreamSerializationAllowed to non-0 signals that the kernel will use programmatic means to resolve its stream dependency, so that the CUDA runtime should opportunistically allow the grid's execution to overlap with the previous kernel in the stream, if that kernel requests the overlap. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions).
- cudaLaunchAttributeProgrammaticEvent = 7
- Valid for launches. Set cudaLaunchAttributeValue::programmaticEvent to record the event. Event recorded through this launch attribute is guaranteed to only trigger after all block in the associated kernel trigger the event. A block can trigger the event programmatically in a future CUDA release. A trigger can also be inserted at the beginning of each block's execution if triggerAtBlockStart is set to non-0. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions). Note that dependents (including the CPU thread calling cudaEventSynchronize()) are not guaranteed to observe the release precisely when it is released. For example, cudaEventSynchronize() may only observe the event trigger long after the associated kernel has completed. This recording type is primarily meant for establishing programmatic dependency between device tasks. Note also this type of dependency allows, but does not guarantee, concurrent execution of tasks. The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the cudaEventDisableTiming flag set).
- cudaLaunchAttributePriority = 8
- Valid for streams, graph nodes, launches. See cudaLaunchAttributeValue::priority.
- cudaLaunchAttributeMemSyncDomainMap = 9
- Valid for streams, graph nodes, launches. See cudaLaunchAttributeValue::memSyncDomainMap.
- cudaLaunchAttributeMemSyncDomain = 10
- Valid for streams, graph nodes, launches. See cudaLaunchAttributeValue::memSyncDomain.
- cudaLaunchAttributePreferredClusterDimension = 11
- Valid for graph nodes and launches. Set cudaLaunchAttributeValue::preferredClusterDim to allow the kernel launch to specify a preferred substitute cluster dimension. Blocks may be grouped according to either the dimensions specified with this attribute (grouped into a "preferred substitute cluster"), or the one specified with cudaLaunchAttributeClusterDimension attribute (grouped into a "regular cluster"). The cluster dimensions of a "preferred substitute cluster" shall be an integer multiple greater than zero of the regular cluster dimensions. The device will attempt - on a best-effort basis - to group thread blocks into preferred clusters over grouping them into regular clusters. When it deems necessary (primarily when the device temporarily runs out of physical resources to launch the larger preferred clusters), the device may switch to launch the regular clusters instead to attempt to utilize as much of the physical device resources as possible. Each type of cluster will have its enumeration / coordinate setup as if the grid consists solely of its type of cluster. For example, if the preferred substitute cluster dimensions double the regular cluster dimensions, there might be simultaneously a regular cluster indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In this example, the preferred substitute cluster (1,0,0) replaces regular clusters (2,0,0) and (3,0,0) and groups their blocks. This attribute will only take effect when a regular cluster dimension has been specified. The preferred substitute cluster dimension must be an integer multiple greater than zero of the regular cluster dimension and must divide the grid. It must also be no more than `maxBlocksPerCluster`, if it is set in the kernel's `__launch_bounds__`. Otherwise it must be less than the maximum value the driver can support. Otherwise, setting this attribute to a value physically unable to fit on any particular device is permitted.
- cudaLaunchAttributeLaunchCompletionEvent = 12
- Valid for launches. Set cudaLaunchAttributeValue::launchCompletionEvent to record the event. Nominally, the event is triggered once all blocks of the kernel have begun execution. Currently this is a best effort. If a kernel B has a launch completion dependency on a kernel A, B may wait until A is complete. Alternatively, blocks of B may begin before all blocks of A have begun, for example if B can claim execution resources unavailable to A (e.g. they run on different GPUs) or if B is a higher priority than A. Exercise caution if such an ordering inversion could lead to deadlock. A launch completion event is nominally similar to a programmatic event with triggerAtBlockStart set except that it is not visible to cudaGridDependencySynchronize() and can be used with compute capability less than 9.0. The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the cudaEventDisableTiming flag set).
- cudaLaunchAttributeDeviceUpdatableKernelNode = 13
- Valid for graph nodes, launches. This attribute is graphs-only, and passing it to a launch in a non-capturing stream will result in an error. :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable can only be set to 0 or 1. Setting the field to 1 indicates that the corresponding kernel node should be device-updatable. On success, a handle will be returned via cudaLaunchAttributeValue::deviceUpdatableKernelNode::devNode which can be passed to the various device-side update functions to update the node's kernel parameters from within another kernel. For more information on the types of device updates that can be made, as well as the relevant limitations thereof, see cudaGraphKernelNodeUpdatesApply. Nodes which are device-updatable have additional restrictions compared to regular kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via cudaGraphDestroyNode. Additionally, once opted-in to this functionality, a node cannot opt out, and any attempt to set the deviceUpdatable attribute to 0 will result in an error. Device-updatable kernel nodes also cannot have their attributes copied to/from another kernel node via cudaGraphKernelNodeCopyAttributes. Graphs containing one or more device-updatable nodes also do not allow multiple instantiation, and neither the graph nor its instantiated version can be passed to cudaGraphExecUpdate. If a graph contains device-updatable nodes and updates those nodes from the device from within the graph, the graph must be uploaded with cuGraphUpload before it is launched. For such a graph, if host-side executable graph updates are made to the device-updatable nodes, the graph must be uploaded before it is launched again.
- cudaLaunchAttributePreferredSharedMemoryCarveout = 14
- Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting cudaLaunchAttributeValue::sharedMemCarveout to a percentage between 0-100 signals sets the shared memory carveout preference in percent of the total shared memory for that kernel launch. This attribute takes precedence over cudaFuncAttributePreferredSharedMemoryCarveout. This is only a hint, and the driver can choose a different configuration if required for the launch.
- enum cudaLaunchMemSyncDomain
-
内存同步域
内核可以在指定的内存同步域中启动,这将影响该内核发出的所有内存操作。 在一个域中发出的内存屏障仅会排序该域中的内存操作,从而消除因内存屏障排序无关流量而增加的延迟。
默认情况下,内核在域0中启动。使用cudaLaunchMemSyncDomainRemote启动的内核将具有不同的域ID。用户还可以通过cudaLaunchMemSyncDomainMap为特定流/图节点/内核启动修改域ID。参见cudaLaunchAttributeMemSyncDomain、cudaStreamSetAttribute、cudaLaunchKernelEx、cudaGraphKernelNodeSetAttribute。
Memory operations done in kernels launched in different domains are considered system-scope distanced. In other words, a GPU scoped memory synchronization is not sufficient for memory order to be observed by kernels in another memory synchronization domain even if they are on the same GPU.
数值
- cudaLaunchMemSyncDomainDefault = 0
- Launch kernels in the default domain
- cudaLaunchMemSyncDomainRemote = 1
- Launch kernels in the remote domain
- enum cudaLibraryOption
-
数值
- cudaLibraryHostUniversalFunctionAndDataTable = 0
- cudaLibraryBinaryIsPreserved = 1
- Specifes that the argument code passed to cudaLibraryLoadData() will be preserved. Specifying this option will let the driver know that code can be accessed at any point until cudaLibraryUnload(). The default behavior is for the driver to allocate and maintain its own copy of code. Note that this is only a memory usage optimization hint and the driver can choose to ignore it if required. Specifying this option with cudaLibraryLoadFromFile() is invalid and will return cudaErrorInvalidValue.
- enum cudaLimit
-
CUDA限制
数值
- cudaLimitStackSize = 0x00
- GPU thread stack size
- cudaLimitPrintfFifoSize = 0x01
- GPU printf FIFO size
- cudaLimitMallocHeapSize = 0x02
- GPU malloc heap size
- cudaLimitDevRuntimeSyncDepth = 0x03
- GPU device runtime synchronize depth
- cudaLimitDevRuntimePendingLaunchCount = 0x04
- GPU device runtime pending launch count
- cudaLimitMaxL2FetchGranularity = 0x05
- A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
- cudaLimitPersistingL2CacheSize = 0x06
- A size in bytes for L2 persisting lines cache size
- enum cudaMemAccessFlags
-
指定用于映射的内存保护标志。
数值
- cudaMemAccessFlagsProtNone = 0
- Default, make the address range not accessible
- cudaMemAccessFlagsProtRead = 1
- Make the address range read accessible
- cudaMemAccessFlagsProtReadWrite = 3
- Make the address range read-write accessible
- enum cudaMemAllocationHandleType
-
用于指定特定句柄类型的标志
数值
- cudaMemHandleTypeNone = 0x0
- Does not allow any export mechanism. >
- cudaMemHandleTypePosixFileDescriptor = 0x1
- Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int)
- cudaMemHandleTypeWin32 = 0x2
- Allows a Win32 NT handle to be used for exporting. (HANDLE)
- cudaMemHandleTypeWin32Kmt = 0x4
- Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
- cudaMemHandleTypeFabric = 0x8
- Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t)
- enum cudaMemAllocationType
-
定义可用的分配类型
数值
- cudaMemAllocationTypeInvalid = 0x0
- cudaMemAllocationTypePinned = 0x1
- This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
- cudaMemAllocationTypeMax = 0x7FFFFFFF
- enum cudaMemLocationType
-
指定位置类型
数值
- cudaMemLocationTypeInvalid = 0
- cudaMemLocationTypeDevice = 1
- Location is a device location, thus id is a device ordinal
- cudaMemLocationTypeHost = 2
- Location is host, id is ignored
- cudaMemLocationTypeHostNuma = 3
- Location is a host NUMA node, thus id is a host NUMA node id
- cudaMemLocationTypeHostNumaCurrent = 4
- Location is the host NUMA node closest to the current thread's CPU, id is ignored
- enum cudaMemPoolAttr
-
CUDA内存池属性
数值
- cudaMemPoolReuseFollowEventDependencies = 0x1
- (value type = int) Allow cuMemAllocAsync to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies. (default enabled)
- cudaMemPoolReuseAllowOpportunistic = 0x2
- (value type = int) Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled)
- cudaMemPoolReuseAllowInternalDependencies = 0x3
- (value type = int) Allow cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by cuFreeAsync (default enabled).
- cudaMemPoolAttrReleaseThreshold = 0x4
- (value type = cuuint64_t) Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS. When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next call to stream, event or context synchronize. (default 0)
- cudaMemPoolAttrReservedMemCurrent = 0x5
- (value type = cuuint64_t) Amount of backing memory currently allocated for the mempool.
- cudaMemPoolAttrReservedMemHigh = 0x6
- (value type = cuuint64_t) High watermark of backing memory allocated for the mempool since the last time it was reset. High watermark can only be reset to zero.
- cudaMemPoolAttrUsedMemCurrent = 0x7
- (value type = cuuint64_t) Amount of memory from the pool that is currently in use by the application.
- cudaMemPoolAttrUsedMemHigh = 0x8
- (value type = cuuint64_t) High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only be reset to zero.
- enum cudaMemRangeAttribute
-
CUDA范围属性
数值
- cudaMemRangeAttributeReadMostly = 1
- Whether the range will mostly be read and only occassionally be written to
- cudaMemRangeAttributePreferredLocation = 2
- The preferred location of the range
- cudaMemRangeAttributeAccessedBy = 3
- Memory range has cudaMemAdviseSetAccessedBy set for specified device
- cudaMemRangeAttributeLastPrefetchLocation = 4
- The last location to which the range was prefetched
- cudaMemRangeAttributePreferredLocationType = 5
- The preferred location type of the range
- cudaMemRangeAttributePreferredLocationId = 6
- The preferred location id of the range
- cudaMemRangeAttributeLastPrefetchLocationType = 7
- The last location type to which the range was prefetched
- cudaMemRangeAttributeLastPrefetchLocationId = 8
- The last location id to which the range was prefetched
- enum cudaMemcpy3DOperandType
-
这些标志允许应用程序为cudaMemcpy3DBatchAsync中指定的各个副本传递操作数类型。
数值
- cudaMemcpyOperandTypePointer = 0x1
- Memcpy operand is a valid pointer.
- cudaMemcpyOperandTypeArray = 0x2
- Memcpy operand is a CUarray.
- cudaMemcpyOperandTypeMax = 0x7FFFFFFF
- enum cudaMemcpyFlags
-
用于指定批量复制操作的标志。更多详情请参阅cudaMemcpyBatchAsync。
数值
- cudaMemcpyFlagDefault = 0x0
- cudaMemcpyFlagPreferOverlapWithCompute = 0x1
- Hint to the driver to try and overlap the copy with compute work on the SMs.
- enum cudaMemcpyKind
-
CUDA内存拷贝类型
数值
- cudaMemcpyHostToHost = 0
- Host -> Host
- cudaMemcpyHostToDevice = 1
- Host -> Device
- cudaMemcpyDeviceToHost = 2
- Device -> Host
- cudaMemcpyDeviceToDevice = 3
- Device -> Device
- cudaMemcpyDefault = 4
- Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing
- enum cudaMemoryAdvise
-
CUDA内存建议值
数值
- cudaMemAdviseSetReadMostly = 1
- Data will mostly be read and only occassionally be written to
- cudaMemAdviseUnsetReadMostly = 2
- Undo the effect of cudaMemAdviseSetReadMostly
- cudaMemAdviseSetPreferredLocation = 3
- Set the preferred location for the data as the specified device
- cudaMemAdviseUnsetPreferredLocation = 4
- Clear the preferred location for the data
- cudaMemAdviseSetAccessedBy = 5
- Data will be accessed by the specified device, so prevent page faults as much as possible
- cudaMemAdviseUnsetAccessedBy = 6
- Let the Unified Memory subsystem decide on the page faulting policy for the specified device
- enum cudaMemoryType
-
CUDA内存类型
数值
- cudaMemoryTypeUnregistered = 0
- Unregistered memory
- cudaMemoryTypeHost = 1
- Host memory
- cudaMemoryTypeDevice = 2
- Device memory
- cudaMemoryTypeManaged = 3
- Managed memory
- enum cudaResourceType
-
CUDA资源类型
数值
- cudaResourceTypeArray = 0x00
- Array resource
- cudaResourceTypeMipmappedArray = 0x01
- Mipmapped array resource
- cudaResourceTypeLinear = 0x02
- Linear resource
- cudaResourceTypePitch2D = 0x03
- Pitch 2D resource
- enum cudaResourceViewFormat
-
CUDA纹理资源视图格式
数值
- cudaResViewFormatNone = 0x00
- No resource view format (use underlying resource format)
- cudaResViewFormatUnsignedChar1 = 0x01
- 1 channel unsigned 8-bit integers
- cudaResViewFormatUnsignedChar2 = 0x02
- 2 channel unsigned 8-bit integers
- cudaResViewFormatUnsignedChar4 = 0x03
- 4 channel unsigned 8-bit integers
- cudaResViewFormatSignedChar1 = 0x04
- 1 channel signed 8-bit integers
- cudaResViewFormatSignedChar2 = 0x05
- 2 channel signed 8-bit integers
- cudaResViewFormatSignedChar4 = 0x06
- 4 channel signed 8-bit integers
- cudaResViewFormatUnsignedShort1 = 0x07
- 1 channel unsigned 16-bit integers
- cudaResViewFormatUnsignedShort2 = 0x08
- 2 channel unsigned 16-bit integers
- cudaResViewFormatUnsignedShort4 = 0x09
- 4 channel unsigned 16-bit integers
- cudaResViewFormatSignedShort1 = 0x0a
- 1 channel signed 16-bit integers
- cudaResViewFormatSignedShort2 = 0x0b
- 2 channel signed 16-bit integers
- cudaResViewFormatSignedShort4 = 0x0c
- 4 channel signed 16-bit integers
- cudaResViewFormatUnsignedInt1 = 0x0d
- 1 channel unsigned 32-bit integers
- cudaResViewFormatUnsignedInt2 = 0x0e
- 2 channel unsigned 32-bit integers
- cudaResViewFormatUnsignedInt4 = 0x0f
- 4 channel unsigned 32-bit integers
- cudaResViewFormatSignedInt1 = 0x10
- 1 channel signed 32-bit integers
- cudaResViewFormatSignedInt2 = 0x11
- 2 channel signed 32-bit integers
- cudaResViewFormatSignedInt4 = 0x12
- 4 channel signed 32-bit integers
- cudaResViewFormatHalf1 = 0x13
- 1 channel 16-bit floating point
- cudaResViewFormatHalf2 = 0x14
- 2 channel 16-bit floating point
- cudaResViewFormatHalf4 = 0x15
- 4 channel 16-bit floating point
- cudaResViewFormatFloat1 = 0x16
- 1 channel 32-bit floating point
- cudaResViewFormatFloat2 = 0x17
- 2 channel 32-bit floating point
- cudaResViewFormatFloat4 = 0x18
- 4 channel 32-bit floating point
- cudaResViewFormatUnsignedBlockCompressed1 = 0x19
- Block compressed 1
- cudaResViewFormatUnsignedBlockCompressed2 = 0x1a
- Block compressed 2
- cudaResViewFormatUnsignedBlockCompressed3 = 0x1b
- Block compressed 3
- cudaResViewFormatUnsignedBlockCompressed4 = 0x1c
- Block compressed 4 unsigned
- cudaResViewFormatSignedBlockCompressed4 = 0x1d
- Block compressed 4 signed
- cudaResViewFormatUnsignedBlockCompressed5 = 0x1e
- Block compressed 5 unsigned
- cudaResViewFormatSignedBlockCompressed5 = 0x1f
- Block compressed 5 signed
- cudaResViewFormatUnsignedBlockCompressed6H = 0x20
- Block compressed 6 unsigned half-float
- cudaResViewFormatSignedBlockCompressed6H = 0x21
- Block compressed 6 signed half-float
- cudaResViewFormatUnsignedBlockCompressed7 = 0x22
- Block compressed 7
- enum cudaSharedCarveout
-
共享内存预留配置。这些可以传递给cudaFuncSetAttribute
数值
- cudaSharedmemCarveoutDefault = -1
- No preference for shared memory or L1 (default)
- cudaSharedmemCarveoutMaxShared = 100
- Prefer maximum available shared memory, minimum L1 cache
- cudaSharedmemCarveoutMaxL1 = 0
- Prefer maximum available L1 cache, minimum shared memory
- enum cudaSharedMemConfig
-
已弃用
CUDA shared memory configuration数值
- cudaSharedMemBankSizeDefault = 0
- cudaSharedMemBankSizeFourByte = 1
- cudaSharedMemBankSizeEightByte = 2
- enum cudaStreamCaptureMode
-
流捕获线程交互的可能模式。更多详情请参阅cudaStreamBeginCapture和cudaThreadExchangeStreamCaptureMode
数值
- cudaStreamCaptureModeGlobal = 0
- cudaStreamCaptureModeThreadLocal = 1
- cudaStreamCaptureModeRelaxed = 2
- enum cudaStreamCaptureStatus
-
cudaStreamIsCapturing可能返回的流捕获状态
数值
- cudaStreamCaptureStatusNone = 0
- Stream is not capturing
- cudaStreamCaptureStatusActive = 1
- Stream is actively capturing
- cudaStreamCaptureStatusInvalidated = 2
- Stream is part of a capture sequence that has been invalidated, but not terminated
- enum cudaStreamUpdateCaptureDependenciesFlags
-
数值
- cudaStreamAddCaptureDependencies = 0x0
- Add new nodes to the dependency set
- cudaStreamSetCaptureDependencies = 0x1
- Replace the dependency set with the new nodes
- enum cudaSurfaceBoundaryMode
-
CUDA表面边界模式
数值
- cudaBoundaryModeZero = 0
- Zero boundary mode
- cudaBoundaryModeClamp = 1
- Clamp boundary mode
- cudaBoundaryModeTrap = 2
- Trap boundary mode
- enum cudaSurfaceFormatMode
-
CUDA表面格式模式
数值
- cudaFormatModeForced = 0
- Forced format mode
- cudaFormatModeAuto = 1
- Auto format mode
- enum cudaTextureAddressMode
-
CUDA纹理寻址模式
数值
- cudaAddressModeWrap = 0
- Wrapping address mode
- cudaAddressModeClamp = 1
- Clamp to edge address mode
- cudaAddressModeMirror = 2
- Mirror address mode
- cudaAddressModeBorder = 3
- Border address mode
- enum cudaTextureFilterMode
-
CUDA纹理过滤模式
数值
- cudaFilterModePoint = 0
- Point filter mode
- cudaFilterModeLinear = 1
- Linear filter mode
- enum cudaTextureReadMode
-
CUDA纹理读取模式
数值
- cudaReadModeElementType = 0
- Read texture as specified element type
- cudaReadModeNormalizedFloat = 1
- Read texture as normalized float
- enum cudaUserObjectFlags
-
图形用户对象的标志
数值
- cudaUserObjectNoDestructorSync = 0x1
- Indicates the destructor execution is not synchronized by any CUDA handle.
- enum cudaUserObjectRetainFlags
-
用于保留图中用户对象引用的标志
数值
- cudaGraphUserObjectMove = 0x1
- Transfer references from the caller rather than creating new references.