alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
ApiCudaRt.hpp
Go to the documentation of this file.
1/* Copyright 2025 Andrea Bocci, Maria Michailidi
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
11# include <cuda_runtime_api.h>
12
13namespace alpaka
14{
15 struct ApiCudaRt
16 {
17 // Names
18 static constexpr char name[] = "Cuda";
19 static constexpr auto version = ALPAKA_LANG_CUDA;
20
21 // Types
22 using DeviceAttr_t = ::cudaDeviceAttr;
23 using DeviceProp_t = ::cudaDeviceProp;
24 using Error_t = ::cudaError_t;
25 using Event_t = ::cudaEvent_t;
26 using Extent_t = ::cudaExtent;
27 using Flag_t = unsigned int;
28 using FuncAttributes_t = ::cudaFuncAttributes;
29 using HostFn_t = void (*)(void* data); // same as cudaHostFn_t, without the CUDART_CB calling convention
30 using Limit_t = ::cudaLimit;
31 using Memcpy3DParms_t = ::cudaMemcpy3DParms;
32 using MemcpyKind_t = ::cudaMemcpyKind;
33 using PitchedPtr_t = ::cudaPitchedPtr;
34 using Pos_t = ::cudaPos;
35 using Stream_t = ::cudaStream_t;
36
37 // Constants
38 static constexpr Error_t success = ::cudaSuccess;
39 static constexpr Error_t errorNotReady = ::cudaErrorNotReady;
40 static constexpr Error_t errorHostMemoryAlreadyRegistered = ::cudaErrorHostMemoryAlreadyRegistered;
41 static constexpr Error_t errorHostMemoryNotRegistered = ::cudaErrorHostMemoryNotRegistered;
42 static constexpr Error_t errorUnsupportedLimit = ::cudaErrorUnsupportedLimit;
43 static constexpr Error_t errorUnknown = ::cudaErrorUnknown;
44
45 static constexpr Flag_t eventDefault = cudaEventDefault;
46 static constexpr Flag_t eventBlockingSync = cudaEventBlockingSync;
47 static constexpr Flag_t eventDisableTiming = cudaEventDisableTiming;
48 static constexpr Flag_t eventInterprocess = cudaEventInterprocess;
49
50 static constexpr Flag_t hostMallocDefault = cudaHostAllocDefault;
51 static constexpr Flag_t hostMallocMapped = cudaHostAllocMapped;
52 static constexpr Flag_t hostMallocPortable = cudaHostAllocPortable;
53 static constexpr Flag_t hostMallocWriteCombined = cudaHostAllocWriteCombined;
54 static constexpr Flag_t hostMallocCoherent = cudaHostAllocDefault; // Not supported.
55 static constexpr Flag_t hostMallocNonCoherent = cudaHostAllocDefault; // Not supported.
56
57 static constexpr Flag_t memAttachGlobal = cudaMemAttachGlobal;
58 static constexpr Flag_t memAttachHost = cudaMemAttachHost;
59
60 static constexpr Flag_t hostRegisterDefault = cudaHostRegisterDefault;
61 static constexpr Flag_t hostRegisterPortable = cudaHostRegisterPortable;
62 static constexpr Flag_t hostRegisterMapped = cudaHostRegisterMapped;
63 static constexpr Flag_t hostRegisterIoMemory = cudaHostRegisterIoMemory;
64
65 static constexpr MemcpyKind_t memcpyDefault = ::cudaMemcpyDefault;
66 static constexpr MemcpyKind_t memcpyDeviceToDevice = ::cudaMemcpyDeviceToDevice;
67 static constexpr MemcpyKind_t memcpyDeviceToHost = ::cudaMemcpyDeviceToHost;
68 static constexpr MemcpyKind_t memcpyHostToDevice = ::cudaMemcpyHostToDevice;
69
70 static constexpr Flag_t streamDefault = cudaStreamDefault;
71 static constexpr Flag_t streamNonBlocking = cudaStreamNonBlocking;
72
73 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::cudaDevAttrMaxBlockDimX;
74 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::cudaDevAttrMaxBlockDimY;
75 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::cudaDevAttrMaxBlockDimZ;
76 static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::cudaDevAttrMaxGridDimX;
77 static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::cudaDevAttrMaxGridDimY;
78 static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::cudaDevAttrMaxGridDimZ;
79 static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock = ::cudaDevAttrMaxSharedMemoryPerBlock;
80 static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::cudaDevAttrMaxThreadsPerBlock;
81 static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::cudaDevAttrMultiProcessorCount;
82 static constexpr DeviceAttr_t deviceAttributeWarpSize = ::cudaDevAttrWarpSize;
83
84 static constexpr Limit_t limitPrintfFifoSize = ::cudaLimitPrintfFifoSize;
85 static constexpr Limit_t limitMallocHeapSize = ::cudaLimitMallocHeapSize;
86
87 // Host function helper
88 // Encapsulates the different function signatures used by cudaStreamAddCallback and cudaLaunchHostFn, and the
89 // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
91 {
93 void* data_;
94
95 static void CUDART_CB hostFunction(void* data)
96 {
97 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
98 ptr->func_(ptr->data_);
99 delete ptr;
100 }
101
102 static void CUDART_CB streamCallback(Stream_t, Error_t, void* data)
103 {
104 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
105 ptr->func_(ptr->data_);
106 delete ptr;
107 }
108 };
109
110 // Runtime API
111 static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
112 {
113 return ::cudaDeviceGetAttribute(value, attr, device);
114 }
115
116 static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
117 {
118 return ::cudaDeviceGetLimit(pValue, limit);
119 }
120
121 static inline Error_t deviceReset()
122 {
123 return ::cudaDeviceReset();
124 }
125
126 static inline Error_t deviceSetLimit(Limit_t limit, size_t value)
127 {
128 return ::cudaDeviceSetLimit(limit, value);
129 }
130
132 {
133 return ::cudaDeviceSynchronize();
134 }
135
136 static inline Error_t eventCreate(Event_t* event)
137 {
138 return ::cudaEventCreate(event);
139 }
140
141 static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
142 {
143 return ::cudaEventCreateWithFlags(event, flags);
144 }
145
146 static inline Error_t eventDestroy(Event_t event)
147 {
148 return ::cudaEventDestroy(event);
149 }
150
151 static inline Error_t eventQuery(Event_t event)
152 {
153 return ::cudaEventQuery(event);
154 }
155
156 static inline Error_t eventRecord(Event_t event, Stream_t stream)
157 {
158 return ::cudaEventRecord(event, stream);
159 }
160
161 static inline Error_t eventSynchronize(Event_t event)
162 {
163 return ::cudaEventSynchronize(event);
164 }
165
166 static inline Error_t free(void* devPtr)
167 {
168 return ::cudaFree(devPtr);
169 }
170
171 static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
172 {
173# if CUDART_VERSION >= 11020
174 return ::cudaFreeAsync(devPtr, stream);
175# else
176 // Not implemented.
177 return errorUnknown;
178# endif
179 }
180
181 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
182 {
183 return ::cudaFuncGetAttributes(attr, func);
184 }
185
186 template<typename T>
187 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
188 {
189# if ALPAKA_COMP_GNUC
190# pragma GCC diagnostic push
191# pragma GCC diagnostic ignored "-Wconditionally-supported"
192# endif
193 return ::cudaFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
194# if ALPAKA_COMP_GNUC
195# pragma GCC diagnostic pop
196# endif
197 }
198
199 static inline int getCurrentDevice()
200 {
201 int device;
202 using TApi = alpaka::ApiCudaRt;
203 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(::cudaGetDevice(&device));
204
205 return device;
206 }
207
208 static inline Error_t getDeviceCount(int* count)
209 {
210 return ::cudaGetDeviceCount(count);
211 }
212
213 static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
214 {
215 return ::cudaGetDeviceProperties(prop, device);
216 }
217
218 static inline char const* getErrorName(Error_t error)
219 {
220 return ::cudaGetErrorName(error);
221 }
222
223 static inline char const* getErrorString(Error_t error)
224 {
225 return ::cudaGetErrorString(error);
226 }
227
228 static inline Error_t getLastError()
229 {
230 return ::cudaGetLastError();
231 }
232
233 static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
234 {
235 return ::cudaGetSymbolAddress(devPtr, symbol);
236 }
237
238 template<class T>
239 static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
240 {
241 return ::cudaGetSymbolAddress(devPtr, symbol);
242 }
243
244 static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
245 {
246 return ::cudaHostGetDevicePointer(pDevice, pHost, flags);
247 }
248
249 static inline Error_t hostFree(void* ptr)
250 {
251 return ::cudaFreeHost(ptr);
252 }
253
254 static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
255 {
256 return ::cudaHostAlloc(ptr, size, flags);
257 }
258
259 static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
260 {
261 return ::cudaHostRegister(ptr, size, flags);
262 }
263
264 static inline Error_t hostUnregister(void* ptr)
265 {
266 return ::cudaHostUnregister(ptr);
267 }
268
269 static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
270 {
271# if CUDART_VERSION >= 10000
272 // Wrap the host function using the proper calling convention
273 return ::cudaLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
274# else
275 // Emulate cudaLaunchHostFunc using cudaStreamAddCallback with a callback adaptor.
276 return ::cudaStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
277# endif
278 }
279
280 static inline Error_t malloc(void** devPtr, size_t size)
281 {
282 return ::cudaMalloc(devPtr, size);
283 }
284
285 static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
286 {
287 return ::cudaMalloc3D(pitchedDevPtr, extent);
288 }
289
290 static inline Error_t mallocAsync(
291 [[maybe_unused]] void** devPtr,
292 [[maybe_unused]] size_t size,
293 [[maybe_unused]] Stream_t stream)
294 {
295# if CUDART_VERSION >= 11020
296 return ::cudaMallocAsync(devPtr, size, stream);
297# else
298 // Not implemented.
299 return errorUnknown;
300# endif
301 }
302
303 static inline Error_t mallocManaged(void** ptr, size_t size, Flag_t flags)
304 {
305 return ::cudaMallocManaged(ptr, size, flags);
306 }
307
308 static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
309 {
310 return ::cudaMallocPitch(devPtr, pitch, width, height);
311 }
312
313 static inline Error_t memGetInfo(size_t* free, size_t* total)
314 {
315 return ::cudaMemGetInfo(free, total);
316 }
317
318 static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
319 {
320 return ::cudaMemcpy(dst, src, count, kind);
321 }
322
323 static inline Error_t memcpy2DAsync(
324 void* dst,
325 size_t dpitch,
326 void const* src,
327 size_t spitch,
328 size_t width,
329 size_t height,
330 MemcpyKind_t kind,
331 Stream_t stream)
332 {
333 return ::cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
334 }
335
336 static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
337 {
338 return ::cudaMemcpy3DAsync(p, stream);
339 }
340
341 static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
342 {
343 return ::cudaMemcpyAsync(dst, src, count, kind, stream);
344 }
345
346 static inline Error_t memset2DAsync(
347 void* devPtr,
348 size_t pitch,
349 int value,
350 size_t width,
351 size_t height,
352 Stream_t stream)
353 {
354 return ::cudaMemset2DAsync(devPtr, pitch, value, width, height, stream);
355 }
356
357 static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
358 {
359 return ::cudaMemset3DAsync(pitchedDevPtr, value, extent, stream);
360 }
361
362 static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
363 {
364 return ::cudaMemsetAsync(devPtr, value, count, stream);
365 }
366
367 static inline Error_t setDevice(int device)
368 {
369 return ::cudaSetDevice(device);
370 }
371
372 static inline Error_t streamCreate(Stream_t* pStream)
373 {
374 return ::cudaStreamCreate(pStream);
375 }
376
377 static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
378 {
379 return ::cudaStreamCreateWithFlags(pStream, flags);
380 }
381
382 static inline Error_t streamDestroy(Stream_t stream)
383 {
384 return ::cudaStreamDestroy(stream);
385 }
386
387 static inline Error_t streamQuery(Stream_t stream)
388 {
389 return ::cudaStreamQuery(stream);
390 }
391
392 static inline Error_t streamSynchronize(Stream_t stream)
393 {
394 return ::cudaStreamSynchronize(stream);
395 }
396
397 static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
398 {
399 return ::cudaStreamWaitEvent(stream, event, flags);
400 }
401
402 static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
403 {
404 return ::make_cudaPitchedPtr(d, p, xsz, ysz);
405 }
406
407 static inline Pos_t makePos(size_t x, size_t y, size_t z)
408 {
409 return ::make_cudaPos(x, y, z);
410 }
411
412 static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
413 {
414 return ::make_cudaExtent(w, h, d);
415 }
416 };
417
418} // namespace alpaka
419
420#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
#define ALPAKA_LANG_CUDA
Definition Config.hpp:218
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The alpaka accelerator library.
static void CUDART_CB streamCallback(Stream_t, Error_t, void *data)
static void CUDART_CB hostFunction(void *data)
Definition ApiCudaRt.hpp:95
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX
Definition ApiCudaRt.hpp:73
static Error_t getDeviceCount(int *count)
static Error_t hostMalloc(void **ptr, size_t size, Flag_t flags)
static constexpr Flag_t eventDefault
Definition ApiCudaRt.hpp:45
static Error_t deviceReset()
static constexpr Flag_t eventInterprocess
Definition ApiCudaRt.hpp:48
static Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
unsigned int Flag_t
Definition ApiCudaRt.hpp:27
static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount
Definition ApiCudaRt.hpp:81
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ
Definition ApiCudaRt.hpp:75
static int getCurrentDevice()
static Error_t funcGetAttributes(FuncAttributes_t *attr, T *func)
::cudaStream_t Stream_t
Definition ApiCudaRt.hpp:35
::cudaMemcpy3DParms Memcpy3DParms_t
Definition ApiCudaRt.hpp:31
static constexpr Flag_t eventDisableTiming
Definition ApiCudaRt.hpp:47
static constexpr Flag_t hostMallocPortable
Definition ApiCudaRt.hpp:52
static Error_t memcpyAsync(void *dst, void const *src, size_t count, MemcpyKind_t kind, Stream_t stream)
static Error_t memset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, Stream_t stream)
static constexpr Flag_t hostMallocWriteCombined
Definition ApiCudaRt.hpp:53
static Error_t freeAsync(void *devPtr, Stream_t stream)
static Error_t streamDestroy(Stream_t stream)
::cudaEvent_t Event_t
Definition ApiCudaRt.hpp:25
static constexpr Flag_t eventBlockingSync
Definition ApiCudaRt.hpp:46
static constexpr char name[]
Definition ApiCudaRt.hpp:18
static Error_t getLastError()
static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
Definition ApiCudaRt.hpp:79
::cudaPitchedPtr PitchedPtr_t
Definition ApiCudaRt.hpp:33
static constexpr Flag_t hostMallocDefault
Definition ApiCudaRt.hpp:50
static Error_t malloc(void **devPtr, size_t size)
static Error_t hostUnregister(void *ptr)
static Error_t hostFree(void *ptr)
static constexpr Flag_t hostRegisterPortable
Definition ApiCudaRt.hpp:61
static Error_t deviceSetLimit(Limit_t limit, size_t value)
static constexpr Limit_t limitMallocHeapSize
Definition ApiCudaRt.hpp:85
static Error_t memGetInfo(size_t *free, size_t *total)
static constexpr Error_t errorNotReady
Definition ApiCudaRt.hpp:39
static Error_t eventSynchronize(Event_t event)
static PitchedPtr_t makePitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
static constexpr Flag_t hostMallocNonCoherent
Definition ApiCudaRt.hpp:55
static constexpr Limit_t limitPrintfFifoSize
Definition ApiCudaRt.hpp:84
static constexpr auto version
Definition ApiCudaRt.hpp:19
static Error_t streamCreate(Stream_t *pStream)
static Error_t mallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height)
static Error_t funcGetAttributes(FuncAttributes_t *attr, void const *func)
static constexpr Flag_t streamNonBlocking
Definition ApiCudaRt.hpp:71
static Error_t hostGetDevicePointer(void **pDevice, void *pHost, Flag_t flags)
static constexpr Flag_t hostMallocMapped
Definition ApiCudaRt.hpp:51
static Extent_t makeExtent(size_t w, size_t h, size_t d)
static Error_t deviceSynchronize()
::cudaMemcpyKind MemcpyKind_t
Definition ApiCudaRt.hpp:32
static constexpr Error_t success
Definition ApiCudaRt.hpp:38
static Error_t eventQuery(Event_t event)
static Error_t deviceGetAttribute(int *value, DeviceAttr_t attr, int device)
static constexpr Flag_t hostRegisterDefault
Definition ApiCudaRt.hpp:60
static constexpr Flag_t memAttachGlobal
Definition ApiCudaRt.hpp:57
static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock
Definition ApiCudaRt.hpp:80
::cudaError_t Error_t
Definition ApiCudaRt.hpp:24
static constexpr Error_t errorUnknown
Definition ApiCudaRt.hpp:43
static Error_t getSymbolAddress(void **devPtr, T const &symbol)
static Error_t memcpy2DAsync(void *dst, size_t dpitch, void const *src, size_t spitch, size_t width, size_t height, MemcpyKind_t kind, Stream_t stream)
::cudaFuncAttributes FuncAttributes_t
Definition ApiCudaRt.hpp:28
static Error_t free(void *devPtr)
static constexpr MemcpyKind_t memcpyDeviceToDevice
Definition ApiCudaRt.hpp:66
static char const * getErrorString(Error_t error)
static Error_t streamSynchronize(Stream_t stream)
static Pos_t makePos(size_t x, size_t y, size_t z)
static constexpr MemcpyKind_t memcpyHostToDevice
Definition ApiCudaRt.hpp:68
static constexpr Flag_t hostRegisterMapped
Definition ApiCudaRt.hpp:62
void(*)(void *data) HostFn_t
Definition ApiCudaRt.hpp:29
static Error_t mallocAsync(void **devPtr, size_t size, Stream_t stream)
static char const * getErrorName(Error_t error)
static Error_t streamCreateWithFlags(Stream_t *pStream, Flag_t flags)
static Error_t streamQuery(Stream_t stream)
static constexpr Error_t errorHostMemoryAlreadyRegistered
Definition ApiCudaRt.hpp:40
static Error_t getDeviceProperties(DeviceProp_t *prop, int device)
static constexpr Flag_t hostMallocCoherent
Definition ApiCudaRt.hpp:54
static Error_t getSymbolAddress(void **devPtr, void const *symbol)
static Error_t memcpy3DAsync(Memcpy3DParms_t const *p, Stream_t stream)
static Error_t eventRecord(Event_t event, Stream_t stream)
static Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void *userData)
::cudaLimit Limit_t
Definition ApiCudaRt.hpp:30
static Error_t eventCreate(Event_t *event)
static constexpr Flag_t hostRegisterIoMemory
Definition ApiCudaRt.hpp:63
::cudaExtent Extent_t
Definition ApiCudaRt.hpp:26
static Error_t mallocManaged(void **ptr, size_t size, Flag_t flags)
static constexpr DeviceAttr_t deviceAttributeMaxGridDimX
Definition ApiCudaRt.hpp:76
static constexpr Flag_t streamDefault
Definition ApiCudaRt.hpp:70
static Error_t hostRegister(void *ptr, size_t size, Flag_t flags)
static Error_t setDevice(int device)
static constexpr MemcpyKind_t memcpyDeviceToHost
Definition ApiCudaRt.hpp:67
::cudaDeviceAttr DeviceAttr_t
Definition ApiCudaRt.hpp:22
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY
Definition ApiCudaRt.hpp:74
static Error_t malloc3D(PitchedPtr_t *pitchedDevPtr, Extent_t extent)
static Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
::cudaDeviceProp DeviceProp_t
Definition ApiCudaRt.hpp:23
static Error_t memcpy(void *dst, void const *src, size_t count, MemcpyKind_t kind)
static constexpr MemcpyKind_t memcpyDefault
Definition ApiCudaRt.hpp:65
static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ
Definition ApiCudaRt.hpp:78
static Error_t deviceGetLimit(size_t *pValue, Limit_t limit)
static constexpr Flag_t memAttachHost
Definition ApiCudaRt.hpp:58
static Error_t eventCreateWithFlags(Event_t *event, Flag_t flags)
static constexpr Error_t errorHostMemoryNotRegistered
Definition ApiCudaRt.hpp:41
static constexpr DeviceAttr_t deviceAttributeWarpSize
Definition ApiCudaRt.hpp:82
static Error_t memsetAsync(void *devPtr, int value, size_t count, Stream_t stream)
static constexpr DeviceAttr_t deviceAttributeMaxGridDimY
Definition ApiCudaRt.hpp:77
static Error_t eventDestroy(Event_t event)
static constexpr Error_t errorUnsupportedLimit
Definition ApiCudaRt.hpp:42