alpaka
Abstraction Library for Parallel Kernel Acceleration
ApiCudaRt.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include <boost/predef.h>
8 
9 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
10 # include <cuda_runtime_api.h>
11 
12 namespace alpaka
13 {
14  struct ApiCudaRt
15  {
16  // Names
17  static constexpr char name[] = "Cuda";
18  static constexpr auto version = BOOST_PREDEF_MAKE_10_VVRRP(CUDART_VERSION);
19 
20  // Types
21  using DeviceAttr_t = ::cudaDeviceAttr;
22  using DeviceProp_t = ::cudaDeviceProp;
23  using Error_t = ::cudaError_t;
24  using Event_t = ::cudaEvent_t;
25  using Extent_t = ::cudaExtent;
26  using Flag_t = unsigned int;
27  using FuncAttributes_t = ::cudaFuncAttributes;
28  using HostFn_t = void (*)(void* data); // same as cudaHostFn_t, without the CUDART_CB calling convention
29  using Limit_t = ::cudaLimit;
30  using Memcpy3DParms_t = ::cudaMemcpy3DParms;
31  using MemcpyKind_t = ::cudaMemcpyKind;
32  using PitchedPtr_t = ::cudaPitchedPtr;
33  using Pos_t = ::cudaPos;
34  using Stream_t = ::cudaStream_t;
35 
36  // Constants
37  static constexpr Error_t success = ::cudaSuccess;
38  static constexpr Error_t errorNotReady = ::cudaErrorNotReady;
39  static constexpr Error_t errorHostMemoryAlreadyRegistered = ::cudaErrorHostMemoryAlreadyRegistered;
40  static constexpr Error_t errorHostMemoryNotRegistered = ::cudaErrorHostMemoryNotRegistered;
41  static constexpr Error_t errorUnsupportedLimit = ::cudaErrorUnsupportedLimit;
42  static constexpr Error_t errorUnknown = ::cudaErrorUnknown;
43 
44  static constexpr Flag_t eventDefault = cudaEventDefault;
45  static constexpr Flag_t eventBlockingSync = cudaEventBlockingSync;
46  static constexpr Flag_t eventDisableTiming = cudaEventDisableTiming;
47  static constexpr Flag_t eventInterprocess = cudaEventInterprocess;
48 
49  static constexpr Flag_t hostMallocDefault = cudaHostAllocDefault;
50  static constexpr Flag_t hostMallocMapped = cudaHostAllocMapped;
51  static constexpr Flag_t hostMallocPortable = cudaHostAllocPortable;
52  static constexpr Flag_t hostMallocWriteCombined = cudaHostAllocWriteCombined;
53  static constexpr Flag_t hostMallocCoherent = cudaHostAllocDefault; // Not supported.
54  static constexpr Flag_t hostMallocNonCoherent = cudaHostAllocDefault; // Not supported.
55 
56  static constexpr Flag_t hostRegisterDefault = cudaHostRegisterDefault;
57  static constexpr Flag_t hostRegisterPortable = cudaHostRegisterPortable;
58  static constexpr Flag_t hostRegisterMapped = cudaHostRegisterMapped;
59  static constexpr Flag_t hostRegisterIoMemory = cudaHostRegisterIoMemory;
60 
61  static constexpr MemcpyKind_t memcpyDefault = ::cudaMemcpyDefault;
62  static constexpr MemcpyKind_t memcpyDeviceToDevice = ::cudaMemcpyDeviceToDevice;
63  static constexpr MemcpyKind_t memcpyDeviceToHost = ::cudaMemcpyDeviceToHost;
64  static constexpr MemcpyKind_t memcpyHostToDevice = ::cudaMemcpyHostToDevice;
65 
66  static constexpr Flag_t streamDefault = cudaStreamDefault;
67  static constexpr Flag_t streamNonBlocking = cudaStreamNonBlocking;
68 
69  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::cudaDevAttrMaxBlockDimX;
70  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::cudaDevAttrMaxBlockDimY;
71  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::cudaDevAttrMaxBlockDimZ;
72  static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::cudaDevAttrMaxGridDimX;
73  static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::cudaDevAttrMaxGridDimY;
74  static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::cudaDevAttrMaxGridDimZ;
75  static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock = ::cudaDevAttrMaxSharedMemoryPerBlock;
76  static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::cudaDevAttrMaxThreadsPerBlock;
77  static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::cudaDevAttrMultiProcessorCount;
78  static constexpr DeviceAttr_t deviceAttributeWarpSize = ::cudaDevAttrWarpSize;
79 
80  static constexpr Limit_t limitPrintfFifoSize = ::cudaLimitPrintfFifoSize;
81  static constexpr Limit_t limitMallocHeapSize = ::cudaLimitMallocHeapSize;
82 
83  // Host function helper
84  // Encapsulates the different function signatures used by cudaStreamAddCallback and cudaLaunchHostFn, and the
85  // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
87  {
89  void* data_;
90 
91  static void CUDART_CB hostFunction(void* data)
92  {
93  auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
94  ptr->func_(ptr->data_);
95  delete ptr;
96  }
97 
98  static void CUDART_CB streamCallback(Stream_t, Error_t, void* data)
99  {
100  auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
101  ptr->func_(ptr->data_);
102  delete ptr;
103  }
104  };
105 
106  // Runtime API
107  static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
108  {
109  return ::cudaDeviceGetAttribute(value, attr, device);
110  }
111 
112  static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
113  {
114  return ::cudaDeviceGetLimit(pValue, limit);
115  }
116 
117  static inline Error_t deviceReset()
118  {
119  return ::cudaDeviceReset();
120  }
121 
122  static inline Error_t deviceSetLimit(Limit_t limit, size_t value)
123  {
124  return ::cudaDeviceSetLimit(limit, value);
125  }
126 
127  static inline Error_t deviceSynchronize()
128  {
129  return ::cudaDeviceSynchronize();
130  }
131 
132  static inline Error_t eventCreate(Event_t* event)
133  {
134  return ::cudaEventCreate(event);
135  }
136 
137  static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
138  {
139  return ::cudaEventCreateWithFlags(event, flags);
140  }
141 
142  static inline Error_t eventDestroy(Event_t event)
143  {
144  return ::cudaEventDestroy(event);
145  }
146 
147  static inline Error_t eventQuery(Event_t event)
148  {
149  return ::cudaEventQuery(event);
150  }
151 
152  static inline Error_t eventRecord(Event_t event, Stream_t stream)
153  {
154  return ::cudaEventRecord(event, stream);
155  }
156 
157  static inline Error_t eventSynchronize(Event_t event)
158  {
159  return ::cudaEventSynchronize(event);
160  }
161 
162  static inline Error_t free(void* devPtr)
163  {
164  return ::cudaFree(devPtr);
165  }
166 
167  static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
168  {
169 # if CUDART_VERSION >= 11020
170  return ::cudaFreeAsync(devPtr, stream);
171 # else
172  // Not implemented.
173  return errorUnknown;
174 # endif
175  }
176 
177  static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
178  {
179  return ::cudaFuncGetAttributes(attr, func);
180  }
181 
182  template<typename T>
183  static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
184  {
185 # if BOOST_COMP_GNUC
186 # pragma GCC diagnostic push
187 # pragma GCC diagnostic ignored "-Wconditionally-supported"
188 # endif
189  return ::cudaFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
190 # if BOOST_COMP_GNUC
191 # pragma GCC diagnostic pop
192 # endif
193  }
194 
195  static inline Error_t getDeviceCount(int* count)
196  {
197  return ::cudaGetDeviceCount(count);
198  }
199 
200  static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
201  {
202  return ::cudaGetDeviceProperties(prop, device);
203  }
204 
205  static inline char const* getErrorName(Error_t error)
206  {
207  return ::cudaGetErrorName(error);
208  }
209 
210  static inline char const* getErrorString(Error_t error)
211  {
212  return ::cudaGetErrorString(error);
213  }
214 
215  static inline Error_t getLastError()
216  {
217  return ::cudaGetLastError();
218  }
219 
220  static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
221  {
222  return ::cudaGetSymbolAddress(devPtr, symbol);
223  }
224 
225  template<class T>
226  static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
227  {
228  return ::cudaGetSymbolAddress(devPtr, symbol);
229  }
230 
231  static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
232  {
233  return ::cudaHostGetDevicePointer(pDevice, pHost, flags);
234  }
235 
236  static inline Error_t hostFree(void* ptr)
237  {
238  return ::cudaFreeHost(ptr);
239  }
240 
241  static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
242  {
243  return ::cudaHostAlloc(ptr, size, flags);
244  }
245 
246  static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
247  {
248  return ::cudaHostRegister(ptr, size, flags);
249  }
250 
251  static inline Error_t hostUnregister(void* ptr)
252  {
253  return ::cudaHostUnregister(ptr);
254  }
255 
256  static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
257  {
258 # if CUDART_VERSION >= 10000
259  // Wrap the host function using the proper calling convention
260  return ::cudaLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
261 # else
262  // Emulate cudaLaunchHostFunc using cudaStreamAddCallback with a callback adaptor.
263  return ::cudaStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
264 # endif
265  }
266 
267  static inline Error_t malloc(void** devPtr, size_t size)
268  {
269  return ::cudaMalloc(devPtr, size);
270  }
271 
272  static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
273  {
274  return ::cudaMalloc3D(pitchedDevPtr, extent);
275  }
276 
277  static inline Error_t mallocAsync(
278  [[maybe_unused]] void** devPtr,
279  [[maybe_unused]] size_t size,
280  [[maybe_unused]] Stream_t stream)
281  {
282 # if CUDART_VERSION >= 11020
283  return ::cudaMallocAsync(devPtr, size, stream);
284 # else
285  // Not implemented.
286  return errorUnknown;
287 # endif
288  }
289 
290  static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
291  {
292  return ::cudaMallocPitch(devPtr, pitch, width, height);
293  }
294 
295  static inline Error_t memGetInfo(size_t* free, size_t* total)
296  {
297  return ::cudaMemGetInfo(free, total);
298  }
299 
300  static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
301  {
302  return ::cudaMemcpy(dst, src, count, kind);
303  }
304 
305  static inline Error_t memcpy2DAsync(
306  void* dst,
307  size_t dpitch,
308  void const* src,
309  size_t spitch,
310  size_t width,
311  size_t height,
312  MemcpyKind_t kind,
313  Stream_t stream)
314  {
315  return ::cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
316  }
317 
318  static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
319  {
320  return ::cudaMemcpy3DAsync(p, stream);
321  }
322 
323  static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
324  {
325  return ::cudaMemcpyAsync(dst, src, count, kind, stream);
326  }
327 
328  static inline Error_t memset2DAsync(
329  void* devPtr,
330  size_t pitch,
331  int value,
332  size_t width,
333  size_t height,
334  Stream_t stream)
335  {
336  return ::cudaMemset2DAsync(devPtr, pitch, value, width, height, stream);
337  }
338 
339  static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
340  {
341  return ::cudaMemset3DAsync(pitchedDevPtr, value, extent, stream);
342  }
343 
344  static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
345  {
346  return ::cudaMemsetAsync(devPtr, value, count, stream);
347  }
348 
349  static inline Error_t setDevice(int device)
350  {
351  return ::cudaSetDevice(device);
352  }
353 
354  static inline Error_t streamCreate(Stream_t* pStream)
355  {
356  return ::cudaStreamCreate(pStream);
357  }
358 
359  static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
360  {
361  return ::cudaStreamCreateWithFlags(pStream, flags);
362  }
363 
364  static inline Error_t streamDestroy(Stream_t stream)
365  {
366  return ::cudaStreamDestroy(stream);
367  }
368 
369  static inline Error_t streamQuery(Stream_t stream)
370  {
371  return ::cudaStreamQuery(stream);
372  }
373 
374  static inline Error_t streamSynchronize(Stream_t stream)
375  {
376  return ::cudaStreamSynchronize(stream);
377  }
378 
379  static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
380  {
381  return ::cudaStreamWaitEvent(stream, event, flags);
382  }
383 
384  static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
385  {
386  return ::make_cudaPitchedPtr(d, p, xsz, ysz);
387  }
388 
389  static inline Pos_t makePos(size_t x, size_t y, size_t z)
390  {
391  return ::make_cudaPos(x, y, z);
392  }
393 
394  static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
395  {
396  return ::make_cudaExtent(w, h, d);
397  }
398  };
399 
400 } // namespace alpaka
401 
402 #endif // ALPAKA_ACC_GPU_CUDA_ENABLED
The alpaka accelerator library.
static void CUDART_CB streamCallback(Stream_t, Error_t, void *data)
Definition: ApiCudaRt.hpp:98
static void CUDART_CB hostFunction(void *data)
Definition: ApiCudaRt.hpp:91
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX
Definition: ApiCudaRt.hpp:69
static Error_t getDeviceCount(int *count)
Definition: ApiCudaRt.hpp:195
static Error_t hostMalloc(void **ptr, size_t size, Flag_t flags)
Definition: ApiCudaRt.hpp:241
static constexpr Flag_t eventDefault
Definition: ApiCudaRt.hpp:44
static Error_t deviceReset()
Definition: ApiCudaRt.hpp:117
static constexpr Flag_t eventInterprocess
Definition: ApiCudaRt.hpp:47
static Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
Definition: ApiCudaRt.hpp:379
unsigned int Flag_t
Definition: ApiCudaRt.hpp:26
static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount
Definition: ApiCudaRt.hpp:77
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ
Definition: ApiCudaRt.hpp:71
static Error_t funcGetAttributes(FuncAttributes_t *attr, T *func)
Definition: ApiCudaRt.hpp:183
::cudaStream_t Stream_t
Definition: ApiCudaRt.hpp:34
::cudaMemcpy3DParms Memcpy3DParms_t
Definition: ApiCudaRt.hpp:30
static constexpr Flag_t eventDisableTiming
Definition: ApiCudaRt.hpp:46
static constexpr Flag_t hostMallocPortable
Definition: ApiCudaRt.hpp:51
static Error_t memcpyAsync(void *dst, void const *src, size_t count, MemcpyKind_t kind, Stream_t stream)
Definition: ApiCudaRt.hpp:323
static Error_t memset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, Stream_t stream)
Definition: ApiCudaRt.hpp:328
static constexpr Flag_t hostMallocWriteCombined
Definition: ApiCudaRt.hpp:52
static Error_t streamDestroy(Stream_t stream)
Definition: ApiCudaRt.hpp:364
::cudaEvent_t Event_t
Definition: ApiCudaRt.hpp:24
static constexpr Flag_t eventBlockingSync
Definition: ApiCudaRt.hpp:45
static constexpr char name[]
Definition: ApiCudaRt.hpp:17
static Error_t getLastError()
Definition: ApiCudaRt.hpp:215
static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
Definition: ApiCudaRt.hpp:75
::cudaPitchedPtr PitchedPtr_t
Definition: ApiCudaRt.hpp:32
static constexpr Flag_t hostMallocDefault
Definition: ApiCudaRt.hpp:49
static Error_t malloc(void **devPtr, size_t size)
Definition: ApiCudaRt.hpp:267
static Error_t hostUnregister(void *ptr)
Definition: ApiCudaRt.hpp:251
static Error_t hostFree(void *ptr)
Definition: ApiCudaRt.hpp:236
static constexpr Flag_t hostRegisterPortable
Definition: ApiCudaRt.hpp:57
static Error_t deviceSetLimit(Limit_t limit, size_t value)
Definition: ApiCudaRt.hpp:122
static constexpr Limit_t limitMallocHeapSize
Definition: ApiCudaRt.hpp:81
static Error_t memGetInfo(size_t *free, size_t *total)
Definition: ApiCudaRt.hpp:295
static constexpr Error_t errorNotReady
Definition: ApiCudaRt.hpp:38
static Error_t eventSynchronize(Event_t event)
Definition: ApiCudaRt.hpp:157
static Error_t mallocAsync([[maybe_unused]] void **devPtr, [[maybe_unused]] size_t size, [[maybe_unused]] Stream_t stream)
Definition: ApiCudaRt.hpp:277
static char const * getErrorName(Error_t error)
Definition: ApiCudaRt.hpp:205
static PitchedPtr_t makePitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
Definition: ApiCudaRt.hpp:384
static constexpr Flag_t hostMallocNonCoherent
Definition: ApiCudaRt.hpp:54
static constexpr Limit_t limitPrintfFifoSize
Definition: ApiCudaRt.hpp:80
static constexpr auto version
Definition: ApiCudaRt.hpp:18
static Error_t streamCreate(Stream_t *pStream)
Definition: ApiCudaRt.hpp:354
static Error_t mallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height)
Definition: ApiCudaRt.hpp:290
static Error_t funcGetAttributes(FuncAttributes_t *attr, void const *func)
Definition: ApiCudaRt.hpp:177
static constexpr Flag_t streamNonBlocking
Definition: ApiCudaRt.hpp:67
static Error_t hostGetDevicePointer(void **pDevice, void *pHost, Flag_t flags)
Definition: ApiCudaRt.hpp:231
static constexpr Flag_t hostMallocMapped
Definition: ApiCudaRt.hpp:50
static Extent_t makeExtent(size_t w, size_t h, size_t d)
Definition: ApiCudaRt.hpp:394
static Error_t deviceSynchronize()
Definition: ApiCudaRt.hpp:127
::cudaMemcpyKind MemcpyKind_t
Definition: ApiCudaRt.hpp:31
static constexpr Error_t success
Definition: ApiCudaRt.hpp:37
::cudaPos Pos_t
Definition: ApiCudaRt.hpp:33
static Error_t eventQuery(Event_t event)
Definition: ApiCudaRt.hpp:147
static Error_t deviceGetAttribute(int *value, DeviceAttr_t attr, int device)
Definition: ApiCudaRt.hpp:107
static char const * getErrorString(Error_t error)
Definition: ApiCudaRt.hpp:210
static constexpr Flag_t hostRegisterDefault
Definition: ApiCudaRt.hpp:56
static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock
Definition: ApiCudaRt.hpp:76
::cudaError_t Error_t
Definition: ApiCudaRt.hpp:23
static constexpr Error_t errorUnknown
Definition: ApiCudaRt.hpp:42
static Error_t getSymbolAddress(void **devPtr, T const &symbol)
Definition: ApiCudaRt.hpp:226
static Error_t memcpy2DAsync(void *dst, size_t dpitch, void const *src, size_t spitch, size_t width, size_t height, MemcpyKind_t kind, Stream_t stream)
Definition: ApiCudaRt.hpp:305
::cudaFuncAttributes FuncAttributes_t
Definition: ApiCudaRt.hpp:27
static Error_t free(void *devPtr)
Definition: ApiCudaRt.hpp:162
static constexpr MemcpyKind_t memcpyDeviceToDevice
Definition: ApiCudaRt.hpp:62
static Error_t streamSynchronize(Stream_t stream)
Definition: ApiCudaRt.hpp:374
static Pos_t makePos(size_t x, size_t y, size_t z)
Definition: ApiCudaRt.hpp:389
static constexpr MemcpyKind_t memcpyHostToDevice
Definition: ApiCudaRt.hpp:64
static constexpr Flag_t hostRegisterMapped
Definition: ApiCudaRt.hpp:58
void(*)(void *data) HostFn_t
Definition: ApiCudaRt.hpp:28
static Error_t streamCreateWithFlags(Stream_t *pStream, Flag_t flags)
Definition: ApiCudaRt.hpp:359
static Error_t streamQuery(Stream_t stream)
Definition: ApiCudaRt.hpp:369
static constexpr Error_t errorHostMemoryAlreadyRegistered
Definition: ApiCudaRt.hpp:39
static Error_t getDeviceProperties(DeviceProp_t *prop, int device)
Definition: ApiCudaRt.hpp:200
static constexpr Flag_t hostMallocCoherent
Definition: ApiCudaRt.hpp:53
static Error_t getSymbolAddress(void **devPtr, void const *symbol)
Definition: ApiCudaRt.hpp:220
static Error_t memcpy3DAsync(Memcpy3DParms_t const *p, Stream_t stream)
Definition: ApiCudaRt.hpp:318
static Error_t eventRecord(Event_t event, Stream_t stream)
Definition: ApiCudaRt.hpp:152
static Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void *userData)
Definition: ApiCudaRt.hpp:256
::cudaLimit Limit_t
Definition: ApiCudaRt.hpp:29
static Error_t eventCreate(Event_t *event)
Definition: ApiCudaRt.hpp:132
static constexpr Flag_t hostRegisterIoMemory
Definition: ApiCudaRt.hpp:59
::cudaExtent Extent_t
Definition: ApiCudaRt.hpp:25
static constexpr DeviceAttr_t deviceAttributeMaxGridDimX
Definition: ApiCudaRt.hpp:72
static constexpr Flag_t streamDefault
Definition: ApiCudaRt.hpp:66
static Error_t hostRegister(void *ptr, size_t size, Flag_t flags)
Definition: ApiCudaRt.hpp:246
static Error_t setDevice(int device)
Definition: ApiCudaRt.hpp:349
static Error_t freeAsync([[maybe_unused]] void *devPtr, [[maybe_unused]] Stream_t stream)
Definition: ApiCudaRt.hpp:167
static constexpr MemcpyKind_t memcpyDeviceToHost
Definition: ApiCudaRt.hpp:63
::cudaDeviceAttr DeviceAttr_t
Definition: ApiCudaRt.hpp:21
static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY
Definition: ApiCudaRt.hpp:70
static Error_t malloc3D(PitchedPtr_t *pitchedDevPtr, Extent_t extent)
Definition: ApiCudaRt.hpp:272
static Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
Definition: ApiCudaRt.hpp:339
::cudaDeviceProp DeviceProp_t
Definition: ApiCudaRt.hpp:22
static Error_t memcpy(void *dst, void const *src, size_t count, MemcpyKind_t kind)
Definition: ApiCudaRt.hpp:300
static constexpr MemcpyKind_t memcpyDefault
Definition: ApiCudaRt.hpp:61
static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ
Definition: ApiCudaRt.hpp:74
static Error_t deviceGetLimit(size_t *pValue, Limit_t limit)
Definition: ApiCudaRt.hpp:112
static Error_t eventCreateWithFlags(Event_t *event, Flag_t flags)
Definition: ApiCudaRt.hpp:137
static constexpr Error_t errorHostMemoryNotRegistered
Definition: ApiCudaRt.hpp:40
static constexpr DeviceAttr_t deviceAttributeWarpSize
Definition: ApiCudaRt.hpp:78
static Error_t memsetAsync(void *devPtr, int value, size_t count, Stream_t stream)
Definition: ApiCudaRt.hpp:344
static constexpr DeviceAttr_t deviceAttributeMaxGridDimY
Definition: ApiCudaRt.hpp:73
static Error_t eventDestroy(Event_t event)
Definition: ApiCudaRt.hpp:142
static constexpr Error_t errorUnsupportedLimit
Definition: ApiCudaRt.hpp:41