alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
ApiHipRt.hpp
Go to the documentation of this file.
1/* Copyright 2025 Andrea Bocci, Maria Michailidi
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
11
12# include <hip/hip_runtime_api.h>
13# include <hip/hip_version.h>
14
15namespace alpaka
16{
17 struct ApiHipRt
18 {
19 // Names
20 static constexpr char name[] = "Hip";
21 static constexpr auto version = ALPAKA_VERSION_NUMBER(HIP_VERSION_MAJOR, HIP_VERSION_MINOR, 0);
22
23 // Types
24 using DeviceAttr_t = ::hipDeviceAttribute_t;
25 using DeviceProp_t = ::hipDeviceProp_t;
26 using Error_t = ::hipError_t;
27 using Event_t = ::hipEvent_t;
28 using Extent_t = ::hipExtent;
29 using Flag_t = unsigned int;
30 using FuncAttributes_t = ::hipFuncAttributes;
31 using HostFn_t = void (*)(void* data); // same as hipHostFn_t
32 using Limit_t = ::hipLimit_t;
33 using Memcpy3DParms_t = ::hipMemcpy3DParms;
34 using MemcpyKind_t = ::hipMemcpyKind;
35 using PitchedPtr_t = ::hipPitchedPtr;
36 using Pos_t = ::hipPos;
37 using Stream_t = ::hipStream_t;
38
39 // Constants
40 static constexpr Error_t success = ::hipSuccess;
41 static constexpr Error_t errorNotReady = ::hipErrorNotReady;
42 static constexpr Error_t errorHostMemoryAlreadyRegistered = ::hipErrorHostMemoryAlreadyRegistered;
43 static constexpr Error_t errorHostMemoryNotRegistered = ::hipErrorHostMemoryNotRegistered;
44 static constexpr Error_t errorUnsupportedLimit = ::hipErrorUnsupportedLimit;
45 static constexpr Error_t errorUnknown = ::hipErrorUnknown;
46
47 static constexpr Flag_t eventDefault = hipEventDefault;
48 static constexpr Flag_t eventBlockingSync = hipEventBlockingSync;
49 static constexpr Flag_t eventDisableTiming = hipEventDisableTiming;
50 static constexpr Flag_t eventInterprocess = hipEventInterprocess;
51
52 static constexpr Flag_t hostMallocDefault = hipHostMallocDefault;
53 static constexpr Flag_t hostMallocMapped = hipHostMallocMapped;
54 static constexpr Flag_t hostMallocPortable = hipHostMallocPortable;
55 static constexpr Flag_t hostMallocWriteCombined = hipHostMallocWriteCombined;
56 static constexpr Flag_t hostMallocCoherent = hipHostMallocCoherent;
57 static constexpr Flag_t hostMallocNonCoherent = hipHostMallocNonCoherent;
58
59 static constexpr Flag_t memAttachGlobal = hipMemAttachGlobal;
60 static constexpr Flag_t memAttachHost = hipMemAttachHost;
61
62 static constexpr Flag_t hostRegisterDefault = hipHostRegisterDefault;
63 static constexpr Flag_t hostRegisterPortable = hipHostRegisterPortable;
64 static constexpr Flag_t hostRegisterMapped = hipHostRegisterMapped;
65 static constexpr Flag_t hostRegisterIoMemory = hipHostRegisterIoMemory;
66
67 static constexpr MemcpyKind_t memcpyDefault = ::hipMemcpyDefault;
68 static constexpr MemcpyKind_t memcpyDeviceToDevice = ::hipMemcpyDeviceToDevice;
69 static constexpr MemcpyKind_t memcpyDeviceToHost = ::hipMemcpyDeviceToHost;
70 static constexpr MemcpyKind_t memcpyHostToDevice = ::hipMemcpyHostToDevice;
71
72 static constexpr Flag_t streamDefault = hipStreamDefault;
73 static constexpr Flag_t streamNonBlocking = hipStreamNonBlocking;
74
75 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::hipDeviceAttributeMaxBlockDimX;
76 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::hipDeviceAttributeMaxBlockDimY;
77 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::hipDeviceAttributeMaxBlockDimZ;
78 static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::hipDeviceAttributeMaxGridDimX;
79 static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::hipDeviceAttributeMaxGridDimY;
80 static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::hipDeviceAttributeMaxGridDimZ;
81 static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
82 = ::hipDeviceAttributeMaxSharedMemoryPerBlock;
83 static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::hipDeviceAttributeMaxThreadsPerBlock;
84 static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::hipDeviceAttributeMultiprocessorCount;
85 static constexpr DeviceAttr_t deviceAttributeWarpSize = ::hipDeviceAttributeWarpSize;
86
87# if HIP_VERSION >= 40'500'000
88 static constexpr Limit_t limitPrintfFifoSize = ::hipLimitPrintfFifoSize;
89# else
90 static constexpr Limit_t limitPrintfFifoSize
91 = static_cast<Limit_t>(0x01); // Implemented only in ROCm 4.5.0 and later.
92# endif
93 static constexpr Limit_t limitMallocHeapSize = ::hipLimitMallocHeapSize;
94
95 // Host function helper
96 // Encapsulates the different function signatures used by hipStreamAddCallback and hipLaunchHostFn, and the
97 // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
98 struct HostFnAdaptor
99 {
100 HostFn_t func_;
101 void* data_;
102
103 static void hostFunction(void* data)
104 {
105 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
106 ptr->func_(ptr->data_);
107 delete ptr;
108 }
109
110 static void streamCallback(Stream_t, Error_t, void* data)
111 {
112 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
113 ptr->func_(ptr->data_);
114 delete ptr;
115 }
116 };
117
118 // Runtime API
119 static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
120 {
121 return ::hipDeviceGetAttribute(value, attr, device);
122 }
123
124 static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
125 {
126# if HIP_VERSION < 40'500'000
127 if(limit == limitPrintfFifoSize)
128 {
129 // Implemented only in ROCm 4.5.0 and later.
130 return errorUnsupportedLimit;
131 }
132# endif
133 return ::hipDeviceGetLimit(pValue, limit);
134 }
135
136 static inline Error_t deviceReset()
137 {
138 return ::hipDeviceReset();
139 }
140
141 static inline Error_t deviceSetLimit(Limit_t /* limit */, size_t /* value */)
142 {
143 // Not implemented.
144 return errorUnsupportedLimit;
145 }
146
147 static inline Error_t deviceSynchronize()
148 {
149 return ::hipDeviceSynchronize();
150 }
151
152 static inline Error_t eventCreate(Event_t* event)
153 {
154 return ::hipEventCreate(event);
155 }
156
157 static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
158 {
159 return ::hipEventCreateWithFlags(event, flags);
160 }
161
162 static inline Error_t eventDestroy(Event_t event)
163 {
164 return ::hipEventDestroy(event);
165 }
166
167 static inline Error_t eventQuery(Event_t event)
168 {
169 return ::hipEventQuery(event);
170 }
171
172 static inline Error_t eventRecord(Event_t event, Stream_t stream)
173 {
174 return ::hipEventRecord(event, stream);
175 }
176
177 static inline Error_t eventSynchronize(Event_t event)
178 {
179 return ::hipEventSynchronize(event);
180 }
181
182 static inline Error_t free(void* devPtr)
183 {
184 return ::hipFree(devPtr);
185 }
186
187 static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
188 {
189 // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
190# if HIP_VERSION >= 50'300'000
191 // hipFreeAsync fails on a null pointer deallocation
192 if(devPtr)
193 {
194 return ::hipFreeAsync(devPtr, stream);
195 }
196 else
197 {
198 return ::hipSuccess;
199 }
200# else
201 // Not implemented.
202 return errorUnknown;
203# endif
204 }
205
206 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
207 {
208 return ::hipFuncGetAttributes(attr, func);
209 }
210
211 template<typename T>
212 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
213 {
214# if ALPAKA_COMP_GNUC
215# pragma GCC diagnostic push
216# pragma GCC diagnostic ignored "-Wconditionally-supported"
217# endif
218 return ::hipFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
219# if ALPAKA_COMP_GNUC
220# pragma GCC diagnostic pop
221# endif
222 }
223
224 static inline int getCurrentDevice()
225 {
226 int device;
227 using TApi = alpaka::ApiHipRt;
228 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(::hipGetDevice(&device));
229
230 return device;
231 }
232
233 static inline Error_t getDeviceCount(int* count)
234 {
235 return ::hipGetDeviceCount(count);
236 }
237
238 static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
239 {
240 return ::hipGetDeviceProperties(prop, device);
241 }
242
243 static inline char const* getErrorName(Error_t error)
244 {
245 return ::hipGetErrorName(error);
246 }
247
248 static inline char const* getErrorString(Error_t error)
249 {
250 return ::hipGetErrorString(error);
251 }
252
253 static inline Error_t getLastError()
254 {
255 return ::hipGetLastError();
256 }
257
258 static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
259 {
260 return ::hipGetSymbolAddress(devPtr, symbol);
261 }
262
263 template<class T>
264 static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
265 {
266 return ::hipGetSymbolAddress(devPtr, symbol);
267 }
268
269 static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
270 {
271 return ::hipHostGetDevicePointer(pDevice, pHost, flags);
272 }
273
274 static inline Error_t hostFree(void* ptr)
275 {
276 return ::hipHostFree(ptr);
277 }
278
279 static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
280 {
281 return ::hipHostMalloc(ptr, size, flags);
282 }
283
284 static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
285 {
286 return ::hipHostRegister(ptr, size, flags);
287 }
288
289 static inline Error_t hostUnregister(void* ptr)
290 {
291 return ::hipHostUnregister(ptr);
292 }
293
294 static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
295 {
296 // hipLaunchHostFunc is implemented only in ROCm 5.4.0 and later.
297# if HIP_VERSION >= 50'400'000
298 // Wrap the host function using the proper calling convention.
299 return ::hipLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
300# else
301 // Emulate hipLaunchHostFunc using hipStreamAddCallback with a callback adaptor.
302 return ::hipStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
303# endif
304 }
305
306 static inline Error_t malloc(void** devPtr, size_t size)
307 {
308 return ::hipMalloc(devPtr, size);
309 }
310
311 static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
312 {
313 return ::hipMalloc3D(pitchedDevPtr, extent);
314 }
315
316 static inline Error_t mallocAsync(
317 [[maybe_unused]] void** devPtr,
318 [[maybe_unused]] size_t size,
319 [[maybe_unused]] Stream_t stream)
320 {
321 // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
322# if HIP_VERSION >= 50'600'000
323 return ::hipMallocAsync(devPtr, size, stream);
324# elif HIP_VERSION >= 50'300'000
325 // before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes
326 if(size > 0)
327 {
328 return ::hipMallocAsync(devPtr, size, stream);
329 }
330 else
331 {
332 // make sure the pointer can safely be passed to hipFreeAsync
333 *devPtr = nullptr;
334 return ::hipSuccess;
335 }
336# else
337 // Not implemented.
338 return errorUnknown;
339# endif
340 }
341
342 static inline Error_t mallocManaged(void** ptr, size_t size, Flag_t flags)
343 {
344 return ::hipMallocManaged(ptr, size, flags);
345 }
346
347 static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
348 {
349 return ::hipMallocPitch(devPtr, pitch, width, height);
350 }
351
352 static inline Error_t memGetInfo(size_t* free, size_t* total)
353 {
354 return ::hipMemGetInfo(free, total);
355 }
356
357 static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
358 {
359 return ::hipMemcpy(dst, src, count, kind);
360 }
361
362 static inline Error_t memcpy2DAsync(
363 void* dst,
364 size_t dpitch,
365 void const* src,
366 size_t spitch,
367 size_t width,
368 size_t height,
369 MemcpyKind_t kind,
370 Stream_t stream)
371 {
372 return ::hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
373 }
374
375 static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
376 {
377 return ::hipMemcpy3DAsync(p, stream);
378 }
379
380 static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
381 {
382 return ::hipMemcpyAsync(dst, src, count, kind, stream);
383 }
384
385 static inline Error_t memset2DAsync(
386 void* devPtr,
387 size_t pitch,
388 int value,
389 size_t width,
390 size_t height,
391 Stream_t stream)
392 {
393 return ::hipMemset2DAsync(devPtr, pitch, value, width, height, stream);
394 }
395
396 static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
397 {
398 return ::hipMemset3DAsync(pitchedDevPtr, value, extent, stream);
399 }
400
401 static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
402 {
403 return ::hipMemsetAsync(devPtr, value, count, stream);
404 }
405
406 static inline Error_t setDevice(int device)
407 {
408 return ::hipSetDevice(device);
409 }
410
411 static inline Error_t streamCreate(Stream_t* pStream)
412 {
413 return ::hipStreamCreate(pStream);
414 }
415
416 static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
417 {
418 return ::hipStreamCreateWithFlags(pStream, flags);
419 }
420
421 static inline Error_t streamDestroy(Stream_t stream)
422 {
423 return ::hipStreamDestroy(stream);
424 }
425
426 static inline Error_t streamQuery(Stream_t stream)
427 {
428 return ::hipStreamQuery(stream);
429 }
430
431 static inline Error_t streamSynchronize(Stream_t stream)
432 {
433 return ::hipStreamSynchronize(stream);
434 }
435
436 static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
437 {
438 return ::hipStreamWaitEvent(stream, event, flags);
439 }
440
441 static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
442 {
443 return ::make_hipPitchedPtr(d, p, xsz, ysz);
444 }
445
446 static inline Pos_t makePos(size_t x, size_t y, size_t z)
447 {
448 return ::make_hipPos(x, y, z);
449 }
450
451 static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
452 {
453 return ::make_hipExtent(w, h, d);
454 }
455 };
456
457} // namespace alpaka
458
459#endif // ALPAKA_ACC_GPU_HIP_ENABLED
#define ALPAKA_VERSION_NUMBER(major, minor, patch)
Definition PP.hpp:15
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The alpaka accelerator library.
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition Traits.hpp:41
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
ALPAKA_FN_HOST auto malloc(TAlloc const &alloc, std::size_t const &sizeElems) -> T *
Definition Traits.hpp:33