7 #include <boost/predef.h>
9 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
10 # include <cuda_runtime_api.h>
17 static constexpr
char name[] =
"Cuda";
18 static constexpr
auto version = BOOST_PREDEF_MAKE_10_VVRRP(CUDART_VERSION);
94 ptr->
func_(ptr->data_);
101 ptr->
func_(ptr->data_);
109 return ::cudaDeviceGetAttribute(value, attr, device);
114 return ::cudaDeviceGetLimit(pValue, limit);
119 return ::cudaDeviceReset();
124 return ::cudaDeviceSetLimit(limit, value);
129 return ::cudaDeviceSynchronize();
134 return ::cudaEventCreate(event);
139 return ::cudaEventCreateWithFlags(event, flags);
144 return ::cudaEventDestroy(event);
149 return ::cudaEventQuery(event);
154 return ::cudaEventRecord(event, stream);
159 return ::cudaEventSynchronize(event);
164 return ::cudaFree(devPtr);
169 # if CUDART_VERSION >= 11020
170 return ::cudaFreeAsync(devPtr, stream);
179 return ::cudaFuncGetAttributes(attr, func);
186 # pragma GCC diagnostic push
187 # pragma GCC diagnostic ignored "-Wconditionally-supported"
189 return ::cudaFuncGetAttributes(attr,
reinterpret_cast<void const*
>(func));
191 # pragma GCC diagnostic pop
197 return ::cudaGetDeviceCount(count);
202 return ::cudaGetDeviceProperties(prop, device);
207 return ::cudaGetErrorName(error);
212 return ::cudaGetErrorString(error);
217 return ::cudaGetLastError();
222 return ::cudaGetSymbolAddress(devPtr, symbol);
228 return ::cudaGetSymbolAddress(devPtr, symbol);
233 return ::cudaHostGetDevicePointer(pDevice, pHost, flags);
238 return ::cudaFreeHost(ptr);
243 return ::cudaHostAlloc(ptr, size, flags);
248 return ::cudaHostRegister(ptr, size, flags);
253 return ::cudaHostUnregister(ptr);
258 # if CUDART_VERSION >= 10000
269 return ::cudaMalloc(devPtr, size);
274 return ::cudaMalloc3D(pitchedDevPtr, extent);
278 [[maybe_unused]]
void** devPtr,
279 [[maybe_unused]]
size_t size,
282 # if CUDART_VERSION >= 11020
283 return ::cudaMallocAsync(devPtr, size, stream);
292 return ::cudaMallocPitch(devPtr, pitch, width, height);
297 return ::cudaMemGetInfo(
free, total);
302 return ::cudaMemcpy(dst, src, count, kind);
315 return ::cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
320 return ::cudaMemcpy3DAsync(p, stream);
325 return ::cudaMemcpyAsync(dst, src, count, kind, stream);
336 return ::cudaMemset2DAsync(devPtr, pitch, value, width, height, stream);
341 return ::cudaMemset3DAsync(pitchedDevPtr, value, extent, stream);
346 return ::cudaMemsetAsync(devPtr, value, count, stream);
351 return ::cudaSetDevice(device);
356 return ::cudaStreamCreate(pStream);
361 return ::cudaStreamCreateWithFlags(pStream, flags);
366 return ::cudaStreamDestroy(stream);
371 return ::cudaStreamQuery(stream);
376 return ::cudaStreamSynchronize(stream);
381 return ::cudaStreamWaitEvent(stream, event, flags);
386 return ::make_cudaPitchedPtr(d, p, xsz, ysz);
391 return ::make_cudaPos(x, y, z);
396 return ::make_cudaExtent(w, h, d);
The alpaka accelerator library.
static void CUDART_CB streamCallback(Stream_t, Error_t, void *data)
static void CUDART_CB hostFunction(void *data)
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX
static Error_t getDeviceCount(int *count)
static Error_t hostMalloc(void **ptr, size_t size, Flag_t flags)
static constexpr Flag_t eventDefault
static Error_t deviceReset()
static constexpr Flag_t eventInterprocess
static Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ
static Error_t funcGetAttributes(FuncAttributes_t *attr, T *func)
::cudaMemcpy3DParms Memcpy3DParms_t
static constexpr Flag_t eventDisableTiming
static constexpr Flag_t hostMallocPortable
static Error_t memcpyAsync(void *dst, void const *src, size_t count, MemcpyKind_t kind, Stream_t stream)
static Error_t memset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, Stream_t stream)
static constexpr Flag_t hostMallocWriteCombined
static Error_t streamDestroy(Stream_t stream)
static constexpr Flag_t eventBlockingSync
static constexpr char name[]
static Error_t getLastError()
static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
::cudaPitchedPtr PitchedPtr_t
static constexpr Flag_t hostMallocDefault
static Error_t malloc(void **devPtr, size_t size)
static Error_t hostUnregister(void *ptr)
static Error_t hostFree(void *ptr)
static constexpr Flag_t hostRegisterPortable
static Error_t deviceSetLimit(Limit_t limit, size_t value)
static constexpr Limit_t limitMallocHeapSize
static Error_t memGetInfo(size_t *free, size_t *total)
static constexpr Error_t errorNotReady
static Error_t eventSynchronize(Event_t event)
static Error_t mallocAsync([[maybe_unused]] void **devPtr, [[maybe_unused]] size_t size, [[maybe_unused]] Stream_t stream)
static char const * getErrorName(Error_t error)
static PitchedPtr_t makePitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
static constexpr Flag_t hostMallocNonCoherent
static constexpr Limit_t limitPrintfFifoSize
static constexpr auto version
static Error_t streamCreate(Stream_t *pStream)
static Error_t mallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height)
static Error_t funcGetAttributes(FuncAttributes_t *attr, void const *func)
static constexpr Flag_t streamNonBlocking
static Error_t hostGetDevicePointer(void **pDevice, void *pHost, Flag_t flags)
static constexpr Flag_t hostMallocMapped
static Extent_t makeExtent(size_t w, size_t h, size_t d)
static Error_t deviceSynchronize()
::cudaMemcpyKind MemcpyKind_t
static constexpr Error_t success
static Error_t eventQuery(Event_t event)
static Error_t deviceGetAttribute(int *value, DeviceAttr_t attr, int device)
static char const * getErrorString(Error_t error)
static constexpr Flag_t hostRegisterDefault
static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock
static constexpr Error_t errorUnknown
static Error_t getSymbolAddress(void **devPtr, T const &symbol)
static Error_t memcpy2DAsync(void *dst, size_t dpitch, void const *src, size_t spitch, size_t width, size_t height, MemcpyKind_t kind, Stream_t stream)
::cudaFuncAttributes FuncAttributes_t
static Error_t free(void *devPtr)
static constexpr MemcpyKind_t memcpyDeviceToDevice
static Error_t streamSynchronize(Stream_t stream)
static Pos_t makePos(size_t x, size_t y, size_t z)
static constexpr MemcpyKind_t memcpyHostToDevice
static constexpr Flag_t hostRegisterMapped
void(*)(void *data) HostFn_t
static Error_t streamCreateWithFlags(Stream_t *pStream, Flag_t flags)
static Error_t streamQuery(Stream_t stream)
static constexpr Error_t errorHostMemoryAlreadyRegistered
static Error_t getDeviceProperties(DeviceProp_t *prop, int device)
static constexpr Flag_t hostMallocCoherent
static Error_t getSymbolAddress(void **devPtr, void const *symbol)
static Error_t memcpy3DAsync(Memcpy3DParms_t const *p, Stream_t stream)
static Error_t eventRecord(Event_t event, Stream_t stream)
static Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void *userData)
static Error_t eventCreate(Event_t *event)
static constexpr Flag_t hostRegisterIoMemory
static constexpr DeviceAttr_t deviceAttributeMaxGridDimX
static constexpr Flag_t streamDefault
static Error_t hostRegister(void *ptr, size_t size, Flag_t flags)
static Error_t setDevice(int device)
static Error_t freeAsync([[maybe_unused]] void *devPtr, [[maybe_unused]] Stream_t stream)
static constexpr MemcpyKind_t memcpyDeviceToHost
::cudaDeviceAttr DeviceAttr_t
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY
static Error_t malloc3D(PitchedPtr_t *pitchedDevPtr, Extent_t extent)
static Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
::cudaDeviceProp DeviceProp_t
static Error_t memcpy(void *dst, void const *src, size_t count, MemcpyKind_t kind)
static constexpr MemcpyKind_t memcpyDefault
static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ
static Error_t deviceGetLimit(size_t *pValue, Limit_t limit)
static Error_t eventCreateWithFlags(Event_t *event, Flag_t flags)
static constexpr Error_t errorHostMemoryNotRegistered
static constexpr DeviceAttr_t deviceAttributeWarpSize
static Error_t memsetAsync(void *devPtr, int value, size_t count, Stream_t stream)
static constexpr DeviceAttr_t deviceAttributeMaxGridDimY
static Error_t eventDestroy(Event_t event)
static constexpr Error_t errorUnsupportedLimit