alpaka
Abstraction Library for Parallel Kernel Acceleration
ApiHipRt.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include <boost/predef.h>
8 
9 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
10 
11 # include <hip/hip_runtime_api.h>
12 # include <hip/hip_version.h>
13 
14 namespace alpaka
15 {
16  struct ApiHipRt
17  {
18  // Names
19  static constexpr char name[] = "Hip";
20  static constexpr auto version = BOOST_VERSION_NUMBER(HIP_VERSION_MAJOR, HIP_VERSION_MINOR, 0);
21 
22  // Types
23  using DeviceAttr_t = ::hipDeviceAttribute_t;
24  using DeviceProp_t = ::hipDeviceProp_t;
25  using Error_t = ::hipError_t;
26  using Event_t = ::hipEvent_t;
27  using Extent_t = ::hipExtent;
28  using Flag_t = unsigned int;
29  using FuncAttributes_t = ::hipFuncAttributes;
30  using HostFn_t = void (*)(void* data); // same as hipHostFn_t
31  using Limit_t = ::hipLimit_t;
32  using Memcpy3DParms_t = ::hipMemcpy3DParms;
33  using MemcpyKind_t = ::hipMemcpyKind;
34  using PitchedPtr_t = ::hipPitchedPtr;
35  using Pos_t = ::hipPos;
36  using Stream_t = ::hipStream_t;
37 
38  // Constants
39  static constexpr Error_t success = ::hipSuccess;
40  static constexpr Error_t errorNotReady = ::hipErrorNotReady;
41  static constexpr Error_t errorHostMemoryAlreadyRegistered = ::hipErrorHostMemoryAlreadyRegistered;
42  static constexpr Error_t errorHostMemoryNotRegistered = ::hipErrorHostMemoryNotRegistered;
43  static constexpr Error_t errorUnsupportedLimit = ::hipErrorUnsupportedLimit;
44  static constexpr Error_t errorUnknown = ::hipErrorUnknown;
45 
46  static constexpr Flag_t eventDefault = hipEventDefault;
47  static constexpr Flag_t eventBlockingSync = hipEventBlockingSync;
48  static constexpr Flag_t eventDisableTiming = hipEventDisableTiming;
49  static constexpr Flag_t eventInterprocess = hipEventInterprocess;
50 
51  static constexpr Flag_t hostMallocDefault = hipHostMallocDefault;
52  static constexpr Flag_t hostMallocMapped = hipHostMallocMapped;
53  static constexpr Flag_t hostMallocPortable = hipHostMallocPortable;
54  static constexpr Flag_t hostMallocWriteCombined = hipHostMallocWriteCombined;
55  static constexpr Flag_t hostMallocCoherent = hipHostMallocCoherent;
56  static constexpr Flag_t hostMallocNonCoherent = hipHostMallocNonCoherent;
57 
58  static constexpr Flag_t hostRegisterDefault = hipHostRegisterDefault;
59  static constexpr Flag_t hostRegisterPortable = hipHostRegisterPortable;
60  static constexpr Flag_t hostRegisterMapped = hipHostRegisterMapped;
61  static constexpr Flag_t hostRegisterIoMemory = hipHostRegisterIoMemory;
62 
63  static constexpr MemcpyKind_t memcpyDefault = ::hipMemcpyDefault;
64  static constexpr MemcpyKind_t memcpyDeviceToDevice = ::hipMemcpyDeviceToDevice;
65  static constexpr MemcpyKind_t memcpyDeviceToHost = ::hipMemcpyDeviceToHost;
66  static constexpr MemcpyKind_t memcpyHostToDevice = ::hipMemcpyHostToDevice;
67 
68  static constexpr Flag_t streamDefault = hipStreamDefault;
69  static constexpr Flag_t streamNonBlocking = hipStreamNonBlocking;
70 
71  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::hipDeviceAttributeMaxBlockDimX;
72  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::hipDeviceAttributeMaxBlockDimY;
73  static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::hipDeviceAttributeMaxBlockDimZ;
74  static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::hipDeviceAttributeMaxGridDimX;
75  static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::hipDeviceAttributeMaxGridDimY;
76  static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::hipDeviceAttributeMaxGridDimZ;
77  static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
78  = ::hipDeviceAttributeMaxSharedMemoryPerBlock;
79  static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::hipDeviceAttributeMaxThreadsPerBlock;
80  static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::hipDeviceAttributeMultiprocessorCount;
81  static constexpr DeviceAttr_t deviceAttributeWarpSize = ::hipDeviceAttributeWarpSize;
82 
83 # if HIP_VERSION >= 40'500'000
84  static constexpr Limit_t limitPrintfFifoSize = ::hipLimitPrintfFifoSize;
85 # else
86  static constexpr Limit_t limitPrintfFifoSize
87  = static_cast<Limit_t>(0x01); // Implemented only in ROCm 4.5.0 and later.
88 # endif
89  static constexpr Limit_t limitMallocHeapSize = ::hipLimitMallocHeapSize;
90 
91  // Host function helper
92  // Encapsulates the different function signatures used by hipStreamAddCallback and hipLaunchHostFn, and the
93  // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
94  struct HostFnAdaptor
95  {
96  HostFn_t func_;
97  void* data_;
98 
99  static void hostFunction(void* data)
100  {
101  auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
102  ptr->func_(ptr->data_);
103  delete ptr;
104  }
105 
106  static void streamCallback(Stream_t, Error_t, void* data)
107  {
108  auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
109  ptr->func_(ptr->data_);
110  delete ptr;
111  }
112  };
113 
114  // Runtime API
115  static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
116  {
117  return ::hipDeviceGetAttribute(value, attr, device);
118  }
119 
120  static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
121  {
122 # if HIP_VERSION < 40'500'000
123  if(limit == limitPrintfFifoSize)
124  {
125  // Implemented only in ROCm 4.5.0 and later.
126  return errorUnsupportedLimit;
127  }
128 # endif
129  return ::hipDeviceGetLimit(pValue, limit);
130  }
131 
132  static inline Error_t deviceReset()
133  {
134  return ::hipDeviceReset();
135  }
136 
137  static inline Error_t deviceSetLimit(Limit_t /* limit */, size_t /* value */)
138  {
139  // Not implemented.
140  return errorUnsupportedLimit;
141  }
142 
143  static inline Error_t deviceSynchronize()
144  {
145  return ::hipDeviceSynchronize();
146  }
147 
148  static inline Error_t eventCreate(Event_t* event)
149  {
150  return ::hipEventCreate(event);
151  }
152 
153  static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
154  {
155  return ::hipEventCreateWithFlags(event, flags);
156  }
157 
158  static inline Error_t eventDestroy(Event_t event)
159  {
160  return ::hipEventDestroy(event);
161  }
162 
163  static inline Error_t eventQuery(Event_t event)
164  {
165  return ::hipEventQuery(event);
166  }
167 
168  static inline Error_t eventRecord(Event_t event, Stream_t stream)
169  {
170  return ::hipEventRecord(event, stream);
171  }
172 
173  static inline Error_t eventSynchronize(Event_t event)
174  {
175  return ::hipEventSynchronize(event);
176  }
177 
178  static inline Error_t free(void* devPtr)
179  {
180  return ::hipFree(devPtr);
181  }
182 
183  static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
184  {
185  // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
186 # if HIP_VERSION >= 50'300'000
187  // hipFreeAsync fails on a null pointer deallocation
188  if(devPtr)
189  {
190  return ::hipFreeAsync(devPtr, stream);
191  }
192  else
193  {
194  return ::hipSuccess;
195  }
196 # else
197  // Not implemented.
198  return errorUnknown;
199 # endif
200  }
201 
202  static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
203  {
204  return ::hipFuncGetAttributes(attr, func);
205  }
206 
207  template<typename T>
208  static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
209  {
210 # if BOOST_COMP_GNUC
211 # pragma GCC diagnostic push
212 # pragma GCC diagnostic ignored "-Wconditionally-supported"
213 # endif
214  return ::hipFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
215 # if BOOST_COMP_GNUC
216 # pragma GCC diagnostic pop
217 # endif
218  }
219 
220  static inline Error_t getDeviceCount(int* count)
221  {
222  return ::hipGetDeviceCount(count);
223  }
224 
225  static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
226  {
227  return ::hipGetDeviceProperties(prop, device);
228  }
229 
230  static inline char const* getErrorName(Error_t error)
231  {
232  return ::hipGetErrorName(error);
233  }
234 
235  static inline char const* getErrorString(Error_t error)
236  {
237  return ::hipGetErrorString(error);
238  }
239 
240  static inline Error_t getLastError()
241  {
242  return ::hipGetLastError();
243  }
244 
245  static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
246  {
247  return ::hipGetSymbolAddress(devPtr, symbol);
248  }
249 
250  template<class T>
251  static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
252  {
253  return ::hipGetSymbolAddress(devPtr, symbol);
254  }
255 
256  static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
257  {
258  return ::hipHostGetDevicePointer(pDevice, pHost, flags);
259  }
260 
261  static inline Error_t hostFree(void* ptr)
262  {
263  return ::hipHostFree(ptr);
264  }
265 
266  static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
267  {
268  return ::hipHostMalloc(ptr, size, flags);
269  }
270 
271  static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
272  {
273  return ::hipHostRegister(ptr, size, flags);
274  }
275 
276  static inline Error_t hostUnregister(void* ptr)
277  {
278  return ::hipHostUnregister(ptr);
279  }
280 
281  static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
282  {
283  // hipLaunchHostFunc is implemented only in ROCm 5.4.0 and later.
284 # if HIP_VERSION >= 50'400'000
285  // Wrap the host function using the proper calling convention.
286  return ::hipLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
287 # else
288  // Emulate hipLaunchHostFunc using hipStreamAddCallback with a callback adaptor.
289  return ::hipStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
290 # endif
291  }
292 
293  static inline Error_t malloc(void** devPtr, size_t size)
294  {
295  return ::hipMalloc(devPtr, size);
296  }
297 
298  static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
299  {
300  return ::hipMalloc3D(pitchedDevPtr, extent);
301  }
302 
303  static inline Error_t mallocAsync(
304  [[maybe_unused]] void** devPtr,
305  [[maybe_unused]] size_t size,
306  [[maybe_unused]] Stream_t stream)
307  {
308  // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
309 # if HIP_VERSION >= 50'600'000
310  return ::hipMallocAsync(devPtr, size, stream);
311 # elif HIP_VERSION >= 50'300'000
312  // before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes
313  if(size > 0)
314  {
315  return ::hipMallocAsync(devPtr, size, stream);
316  }
317  else
318  {
319  // make sure the pointer can safely be passed to hipFreeAsync
320  *devPtr = nullptr;
321  return ::hipSuccess;
322  }
323 # else
324  // Not implemented.
325  return errorUnknown;
326 # endif
327  }
328 
329  static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
330  {
331  return ::hipMallocPitch(devPtr, pitch, width, height);
332  }
333 
334  static inline Error_t memGetInfo(size_t* free, size_t* total)
335  {
336  return ::hipMemGetInfo(free, total);
337  }
338 
339  static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
340  {
341  return ::hipMemcpy(dst, src, count, kind);
342  }
343 
344  static inline Error_t memcpy2DAsync(
345  void* dst,
346  size_t dpitch,
347  void const* src,
348  size_t spitch,
349  size_t width,
350  size_t height,
351  MemcpyKind_t kind,
352  Stream_t stream)
353  {
354  return ::hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
355  }
356 
357  static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
358  {
359  return ::hipMemcpy3DAsync(p, stream);
360  }
361 
362  static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
363  {
364  return ::hipMemcpyAsync(dst, src, count, kind, stream);
365  }
366 
367  static inline Error_t memset2DAsync(
368  void* devPtr,
369  size_t pitch,
370  int value,
371  size_t width,
372  size_t height,
373  Stream_t stream)
374  {
375  return ::hipMemset2DAsync(devPtr, pitch, value, width, height, stream);
376  }
377 
378  static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
379  {
380  return ::hipMemset3DAsync(pitchedDevPtr, value, extent, stream);
381  }
382 
383  static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
384  {
385  return ::hipMemsetAsync(devPtr, value, count, stream);
386  }
387 
388  static inline Error_t setDevice(int device)
389  {
390  return ::hipSetDevice(device);
391  }
392 
393  static inline Error_t streamCreate(Stream_t* pStream)
394  {
395  return ::hipStreamCreate(pStream);
396  }
397 
398  static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
399  {
400  return ::hipStreamCreateWithFlags(pStream, flags);
401  }
402 
403  static inline Error_t streamDestroy(Stream_t stream)
404  {
405  return ::hipStreamDestroy(stream);
406  }
407 
408  static inline Error_t streamQuery(Stream_t stream)
409  {
410  return ::hipStreamQuery(stream);
411  }
412 
413  static inline Error_t streamSynchronize(Stream_t stream)
414  {
415  return ::hipStreamSynchronize(stream);
416  }
417 
418  static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
419  {
420  return ::hipStreamWaitEvent(stream, event, flags);
421  }
422 
423  static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
424  {
425  return ::make_hipPitchedPtr(d, p, xsz, ysz);
426  }
427 
428  static inline Pos_t makePos(size_t x, size_t y, size_t z)
429  {
430  return ::make_hipPos(x, y, z);
431  }
432 
433  static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
434  {
435  return ::make_hipExtent(w, h, d);
436  }
437  };
438 
439 } // namespace alpaka
440 
441 #endif // ALPAKA_ACC_GPU_HIP_ENABLED
The alpaka accelerator library.
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition: Traits.hpp:41
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
ALPAKA_FN_HOST auto malloc(TAlloc const &alloc, std::size_t const &sizeElems) -> T *
Definition: Traits.hpp:33