
In a nutshell, this moves our libomptarget code to populate the offload subproject. With this commit, users need to enable the new LLVM/Offload subproject as a runtime in their cmake configuration. No further changes are expected for downstream code. Tests and other components still depend on OpenMP and have also not been renamed. The results below are for a build in which OpenMP and Offload are enabled runtimes. In addition to the pure `git mv`, we needed to adjust some CMake files. Nothing is intended to change semantics. ``` ninja check-offload ``` Works with the X86 and AMDGPU offload tests ``` ninja check-openmp ``` Still works but doesn't build offload tests anymore. ``` ls install/lib ``` Shows all expected libraries, incl. - `libomptarget.devicertl.a` - `libomptarget-nvptx-sm_90.bc` - `libomptarget.rtl.amdgpu.so` -> `libomptarget.rtl.amdgpu.so.18git` - `libomptarget.so` -> `libomptarget.so.18git` Fixes: https://github.com/llvm/llvm-project/issues/75124 --------- Co-authored-by: Saiyedul Islam <Saiyedul.Islam@amd.com>
356 lines
14 KiB
C++
356 lines
14 KiB
C++
//===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// The parts of the cuda api that are presently in use by the openmp cuda plugin
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED
|
|
#define DYNAMIC_CUDA_CUDA_H_INCLUDED
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
|
|
typedef int CUdevice;
|
|
typedef uintptr_t CUdeviceptr;
|
|
typedef struct CUmod_st *CUmodule;
|
|
typedef struct CUctx_st *CUcontext;
|
|
typedef struct CUfunc_st *CUfunction;
|
|
typedef struct CUstream_st *CUstream;
|
|
typedef struct CUevent_st *CUevent;
|
|
|
|
#define CU_DEVICE_INVALID ((CUdevice)-2)
|
|
|
|
typedef unsigned long long CUmemGenericAllocationHandle_v1;
|
|
typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle;
|
|
|
|
#define CU_DEVICE_INVALID ((CUdevice)-2)
|
|
|
|
typedef enum CUmemAllocationGranularity_flags_enum {
|
|
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
|
|
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
|
|
} CUmemAllocationGranularity_flags;
|
|
|
|
typedef enum CUmemAccess_flags_enum {
|
|
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
|
|
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
|
|
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
|
|
CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
|
|
} CUmemAccess_flags;
|
|
|
|
typedef enum CUmemLocationType_enum {
|
|
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
|
|
CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
|
|
CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemLocationType;
|
|
|
|
typedef struct CUmemLocation_st {
|
|
CUmemLocationType type;
|
|
int id;
|
|
} CUmemLocation_v1;
|
|
typedef CUmemLocation_v1 CUmemLocation;
|
|
|
|
typedef struct CUmemAccessDesc_st {
|
|
CUmemLocation location;
|
|
CUmemAccess_flags flags;
|
|
} CUmemAccessDesc_v1;
|
|
|
|
typedef CUmemAccessDesc_v1 CUmemAccessDesc;
|
|
|
|
typedef enum CUmemAllocationType_enum {
|
|
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
|
|
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
|
|
CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemAllocationType;
|
|
|
|
typedef enum CUmemAllocationHandleType_enum {
|
|
CU_MEM_HANDLE_TYPE_NONE = 0x0,
|
|
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
|
|
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
|
|
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
|
|
CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
|
|
} CUmemAllocationHandleType;
|
|
|
|
typedef struct CUmemAllocationProp_st {
|
|
CUmemAllocationType type;
|
|
CUmemAllocationHandleType requestedHandleTypes;
|
|
CUmemLocation location;
|
|
|
|
void *win32HandleMetaData;
|
|
struct {
|
|
unsigned char compressionType;
|
|
unsigned char gpuDirectRDMACapable;
|
|
unsigned short usage;
|
|
unsigned char reserved[4];
|
|
} allocFlags;
|
|
} CUmemAllocationProp_v1;
|
|
typedef CUmemAllocationProp_v1 CUmemAllocationProp;
|
|
|
|
typedef enum cudaError_enum {
|
|
CUDA_SUCCESS = 0,
|
|
CUDA_ERROR_INVALID_VALUE = 1,
|
|
CUDA_ERROR_NO_DEVICE = 100,
|
|
CUDA_ERROR_INVALID_HANDLE = 400,
|
|
CUDA_ERROR_NOT_READY = 600,
|
|
CUDA_ERROR_TOO_MANY_PEERS = 711,
|
|
} CUresult;
|
|
|
|
typedef enum CUstream_flags_enum {
|
|
CU_STREAM_DEFAULT = 0x0,
|
|
CU_STREAM_NON_BLOCKING = 0x1,
|
|
} CUstream_flags;
|
|
|
|
typedef enum CUlimit_enum {
|
|
CU_LIMIT_STACK_SIZE = 0x0,
|
|
CU_LIMIT_PRINTF_FIFO_SIZE = 0x1,
|
|
CU_LIMIT_MALLOC_HEAP_SIZE = 0x2,
|
|
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3,
|
|
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4,
|
|
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5,
|
|
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6,
|
|
CU_LIMIT_MAX
|
|
} CUlimit;
|
|
|
|
typedef enum CUdevice_attribute_enum {
|
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
|
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
|
|
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
|
|
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
|
|
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
|
|
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
|
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
|
|
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
|
|
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
|
|
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
|
|
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
|
|
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
|
|
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
|
|
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
|
|
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
|
|
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
|
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
|
|
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
|
|
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
|
|
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
|
|
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
|
|
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
|
|
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
|
|
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
|
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
|
|
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
|
|
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
|
|
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
|
|
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
|
|
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
|
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
|
|
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
|
|
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
|
|
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
|
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
|
|
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
|
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
|
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
|
|
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
|
|
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
|
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
|
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
|
|
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
|
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
|
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
|
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
|
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
|
|
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
|
|
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
|
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
|
|
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
|
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
|
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
|
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
|
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
|
|
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
|
|
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
|
|
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
|
|
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
|
|
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
|
|
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
|
|
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
|
|
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
|
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
|
|
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,
|
|
CU_DEVICE_ATTRIBUTE_MAX,
|
|
} CUdevice_attribute;
|
|
|
|
typedef enum CUfunction_attribute_enum {
|
|
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
|
|
} CUfunction_attribute;
|
|
|
|
typedef enum CUctx_flags_enum {
|
|
CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
|
|
CU_CTX_SCHED_MASK = 0x07,
|
|
} CUctx_flags;
|
|
|
|
typedef enum CUmemAttach_flags_enum {
|
|
CU_MEM_ATTACH_GLOBAL = 0x1,
|
|
CU_MEM_ATTACH_HOST = 0x2,
|
|
CU_MEM_ATTACH_SINGLE = 0x4,
|
|
} CUmemAttach_flags;
|
|
|
|
typedef enum CUcomputeMode_enum {
|
|
CU_COMPUTEMODE_DEFAULT = 0,
|
|
CU_COMPUTEMODE_PROHIBITED = 2,
|
|
CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
|
|
} CUcompute_mode;
|
|
|
|
typedef enum CUevent_flags_enum {
|
|
CU_EVENT_DEFAULT = 0x0,
|
|
CU_EVENT_BLOCKING_SYNC = 0x1,
|
|
CU_EVENT_DISABLE_TIMING = 0x2,
|
|
CU_EVENT_INTERPROCESS = 0x4
|
|
} CUevent_flags;
|
|
|
|
CUresult cuCtxGetDevice(CUdevice *);
|
|
CUresult cuDeviceGet(CUdevice *, int);
|
|
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
|
|
CUresult cuDeviceGetCount(int *);
|
|
CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
|
|
|
|
// Device info
|
|
CUresult cuDeviceGetName(char *, int, CUdevice);
|
|
CUresult cuDeviceTotalMem(size_t *, CUdevice);
|
|
CUresult cuDriverGetVersion(int *);
|
|
|
|
CUresult cuGetErrorString(CUresult, const char **);
|
|
CUresult cuInit(unsigned);
|
|
CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
|
|
unsigned, unsigned, unsigned, CUstream, void **,
|
|
void **);
|
|
|
|
CUresult cuMemAlloc(CUdeviceptr *, size_t);
|
|
CUresult cuMemAllocHost(void **, size_t);
|
|
CUresult cuMemAllocManaged(CUdeviceptr *, size_t, unsigned int);
|
|
CUresult cuMemAllocAsync(CUdeviceptr *, size_t, CUstream);
|
|
|
|
CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
|
|
CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
|
|
CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
|
|
CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
|
|
CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
|
|
|
|
CUresult cuMemFree(CUdeviceptr);
|
|
CUresult cuMemFreeHost(void *);
|
|
CUresult cuMemFreeAsync(CUdeviceptr, CUstream);
|
|
|
|
CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
|
|
CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
|
|
|
|
CUresult cuModuleUnload(CUmodule);
|
|
CUresult cuStreamCreate(CUstream *, unsigned);
|
|
CUresult cuStreamDestroy(CUstream);
|
|
CUresult cuStreamSynchronize(CUstream);
|
|
CUresult cuStreamQuery(CUstream);
|
|
CUresult cuCtxSetCurrent(CUcontext);
|
|
CUresult cuDevicePrimaryCtxRelease(CUdevice);
|
|
CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
|
|
CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
|
|
CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
|
|
CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
|
|
void **);
|
|
|
|
CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice);
|
|
CUresult cuCtxEnablePeerAccess(CUcontext, unsigned);
|
|
CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
|
|
size_t, CUstream);
|
|
|
|
CUresult cuCtxGetLimit(size_t *, CUlimit);
|
|
CUresult cuCtxSetLimit(CUlimit, size_t);
|
|
|
|
CUresult cuEventCreate(CUevent *, unsigned int);
|
|
CUresult cuEventRecord(CUevent, CUstream);
|
|
CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
|
|
CUresult cuEventSynchronize(CUevent);
|
|
CUresult cuEventDestroy(CUevent);
|
|
|
|
CUresult cuMemUnmap(CUdeviceptr ptr, size_t size);
|
|
CUresult cuMemRelease(CUmemGenericAllocationHandle handle);
|
|
CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size);
|
|
CUresult cuMemGetInfo(size_t *free, size_t *total);
|
|
CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment,
|
|
CUdeviceptr addr, unsigned long long flags);
|
|
CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset,
|
|
CUmemGenericAllocationHandle handle,
|
|
unsigned long long flags);
|
|
CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
|
|
const CUmemAllocationProp *prop, unsigned long long flags);
|
|
CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size,
|
|
const CUmemAccessDesc *desc, size_t count);
|
|
CUresult cuMemGetAllocationGranularity(size_t *granularity,
|
|
const CUmemAllocationProp *prop,
|
|
CUmemAllocationGranularity_flags option);
|
|
|
|
#endif
|