alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
ApiHipRt.hpp
Go to the documentation of this file.
1/* Copyright 2022 Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
11
12# include <hip/hip_runtime_api.h>
13# include <hip/hip_version.h>
14
15namespace alpaka
16{
17 struct ApiHipRt
18 {
19 // Names
20 static constexpr char name[] = "Hip";
21 static constexpr auto version = ALPAKA_VERSION_NUMBER(HIP_VERSION_MAJOR, HIP_VERSION_MINOR, 0);
22
23 // Types
24 using DeviceAttr_t = ::hipDeviceAttribute_t;
25 using DeviceProp_t = ::hipDeviceProp_t;
26 using Error_t = ::hipError_t;
27 using Event_t = ::hipEvent_t;
28 using Extent_t = ::hipExtent;
29 using Flag_t = unsigned int;
30 using FuncAttributes_t = ::hipFuncAttributes;
31 using HostFn_t = void (*)(void* data); // same as hipHostFn_t
32 using Limit_t = ::hipLimit_t;
33 using Memcpy3DParms_t = ::hipMemcpy3DParms;
34 using MemcpyKind_t = ::hipMemcpyKind;
35 using PitchedPtr_t = ::hipPitchedPtr;
36 using Pos_t = ::hipPos;
37 using Stream_t = ::hipStream_t;
38
39 // Constants
40 static constexpr Error_t success = ::hipSuccess;
41 static constexpr Error_t errorNotReady = ::hipErrorNotReady;
42 static constexpr Error_t errorHostMemoryAlreadyRegistered = ::hipErrorHostMemoryAlreadyRegistered;
43 static constexpr Error_t errorHostMemoryNotRegistered = ::hipErrorHostMemoryNotRegistered;
44 static constexpr Error_t errorUnsupportedLimit = ::hipErrorUnsupportedLimit;
45 static constexpr Error_t errorUnknown = ::hipErrorUnknown;
46
47 static constexpr Flag_t eventDefault = hipEventDefault;
48 static constexpr Flag_t eventBlockingSync = hipEventBlockingSync;
49 static constexpr Flag_t eventDisableTiming = hipEventDisableTiming;
50 static constexpr Flag_t eventInterprocess = hipEventInterprocess;
51
52 static constexpr Flag_t hostMallocDefault = hipHostMallocDefault;
53 static constexpr Flag_t hostMallocMapped = hipHostMallocMapped;
54 static constexpr Flag_t hostMallocPortable = hipHostMallocPortable;
55 static constexpr Flag_t hostMallocWriteCombined = hipHostMallocWriteCombined;
56 static constexpr Flag_t hostMallocCoherent = hipHostMallocCoherent;
57 static constexpr Flag_t hostMallocNonCoherent = hipHostMallocNonCoherent;
58
59 static constexpr Flag_t hostRegisterDefault = hipHostRegisterDefault;
60 static constexpr Flag_t hostRegisterPortable = hipHostRegisterPortable;
61 static constexpr Flag_t hostRegisterMapped = hipHostRegisterMapped;
62 static constexpr Flag_t hostRegisterIoMemory = hipHostRegisterIoMemory;
63
64 static constexpr MemcpyKind_t memcpyDefault = ::hipMemcpyDefault;
65 static constexpr MemcpyKind_t memcpyDeviceToDevice = ::hipMemcpyDeviceToDevice;
66 static constexpr MemcpyKind_t memcpyDeviceToHost = ::hipMemcpyDeviceToHost;
67 static constexpr MemcpyKind_t memcpyHostToDevice = ::hipMemcpyHostToDevice;
68
69 static constexpr Flag_t streamDefault = hipStreamDefault;
70 static constexpr Flag_t streamNonBlocking = hipStreamNonBlocking;
71
72 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimX = ::hipDeviceAttributeMaxBlockDimX;
73 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimY = ::hipDeviceAttributeMaxBlockDimY;
74 static constexpr DeviceAttr_t deviceAttributeMaxBlockDimZ = ::hipDeviceAttributeMaxBlockDimZ;
75 static constexpr DeviceAttr_t deviceAttributeMaxGridDimX = ::hipDeviceAttributeMaxGridDimX;
76 static constexpr DeviceAttr_t deviceAttributeMaxGridDimY = ::hipDeviceAttributeMaxGridDimY;
77 static constexpr DeviceAttr_t deviceAttributeMaxGridDimZ = ::hipDeviceAttributeMaxGridDimZ;
78 static constexpr DeviceAttr_t deviceAttributeMaxSharedMemoryPerBlock
79 = ::hipDeviceAttributeMaxSharedMemoryPerBlock;
80 static constexpr DeviceAttr_t deviceAttributeMaxThreadsPerBlock = ::hipDeviceAttributeMaxThreadsPerBlock;
81 static constexpr DeviceAttr_t deviceAttributeMultiprocessorCount = ::hipDeviceAttributeMultiprocessorCount;
82 static constexpr DeviceAttr_t deviceAttributeWarpSize = ::hipDeviceAttributeWarpSize;
83
84# if HIP_VERSION >= 40'500'000
85 static constexpr Limit_t limitPrintfFifoSize = ::hipLimitPrintfFifoSize;
86# else
87 static constexpr Limit_t limitPrintfFifoSize
88 = static_cast<Limit_t>(0x01); // Implemented only in ROCm 4.5.0 and later.
89# endif
90 static constexpr Limit_t limitMallocHeapSize = ::hipLimitMallocHeapSize;
91
92 // Host function helper
93 // Encapsulates the different function signatures used by hipStreamAddCallback and hipLaunchHostFn, and the
94 // different calling conventions used by CUDA (__stdcall on Win32) and HIP (standard).
95 struct HostFnAdaptor
96 {
97 HostFn_t func_;
98 void* data_;
99
100 static void hostFunction(void* data)
101 {
102 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
103 ptr->func_(ptr->data_);
104 delete ptr;
105 }
106
107 static void streamCallback(Stream_t, Error_t, void* data)
108 {
109 auto ptr = reinterpret_cast<HostFnAdaptor*>(data);
110 ptr->func_(ptr->data_);
111 delete ptr;
112 }
113 };
114
115 // Runtime API
116 static inline Error_t deviceGetAttribute(int* value, DeviceAttr_t attr, int device)
117 {
118 return ::hipDeviceGetAttribute(value, attr, device);
119 }
120
121 static inline Error_t deviceGetLimit(size_t* pValue, Limit_t limit)
122 {
123# if HIP_VERSION < 40'500'000
124 if(limit == limitPrintfFifoSize)
125 {
126 // Implemented only in ROCm 4.5.0 and later.
127 return errorUnsupportedLimit;
128 }
129# endif
130 return ::hipDeviceGetLimit(pValue, limit);
131 }
132
133 static inline Error_t deviceReset()
134 {
135 return ::hipDeviceReset();
136 }
137
138 static inline Error_t deviceSetLimit(Limit_t /* limit */, size_t /* value */)
139 {
140 // Not implemented.
141 return errorUnsupportedLimit;
142 }
143
144 static inline Error_t deviceSynchronize()
145 {
146 return ::hipDeviceSynchronize();
147 }
148
149 static inline Error_t eventCreate(Event_t* event)
150 {
151 return ::hipEventCreate(event);
152 }
153
154 static inline Error_t eventCreateWithFlags(Event_t* event, Flag_t flags)
155 {
156 return ::hipEventCreateWithFlags(event, flags);
157 }
158
159 static inline Error_t eventDestroy(Event_t event)
160 {
161 return ::hipEventDestroy(event);
162 }
163
164 static inline Error_t eventQuery(Event_t event)
165 {
166 return ::hipEventQuery(event);
167 }
168
169 static inline Error_t eventRecord(Event_t event, Stream_t stream)
170 {
171 return ::hipEventRecord(event, stream);
172 }
173
174 static inline Error_t eventSynchronize(Event_t event)
175 {
176 return ::hipEventSynchronize(event);
177 }
178
179 static inline Error_t free(void* devPtr)
180 {
181 return ::hipFree(devPtr);
182 }
183
184 static inline Error_t freeAsync([[maybe_unused]] void* devPtr, [[maybe_unused]] Stream_t stream)
185 {
186 // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
187# if HIP_VERSION >= 50'300'000
188 // hipFreeAsync fails on a null pointer deallocation
189 if(devPtr)
190 {
191 return ::hipFreeAsync(devPtr, stream);
192 }
193 else
194 {
195 return ::hipSuccess;
196 }
197# else
198 // Not implemented.
199 return errorUnknown;
200# endif
201 }
202
203 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, void const* func)
204 {
205 return ::hipFuncGetAttributes(attr, func);
206 }
207
208 template<typename T>
209 static inline Error_t funcGetAttributes(FuncAttributes_t* attr, T* func)
210 {
211# if ALPAKA_COMP_GNUC
212# pragma GCC diagnostic push
213# pragma GCC diagnostic ignored "-Wconditionally-supported"
214# endif
215 return ::hipFuncGetAttributes(attr, reinterpret_cast<void const*>(func));
216# if ALPAKA_COMP_GNUC
217# pragma GCC diagnostic pop
218# endif
219 }
220
221 static inline int getCurrentDevice()
222 {
223 int device;
224 using TApi = alpaka::ApiHipRt;
225 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(::hipGetDevice(&device));
226
227 return device;
228 }
229
230 static inline Error_t getDeviceCount(int* count)
231 {
232 return ::hipGetDeviceCount(count);
233 }
234
235 static inline Error_t getDeviceProperties(DeviceProp_t* prop, int device)
236 {
237 return ::hipGetDeviceProperties(prop, device);
238 }
239
240 static inline char const* getErrorName(Error_t error)
241 {
242 return ::hipGetErrorName(error);
243 }
244
245 static inline char const* getErrorString(Error_t error)
246 {
247 return ::hipGetErrorString(error);
248 }
249
250 static inline Error_t getLastError()
251 {
252 return ::hipGetLastError();
253 }
254
255 static inline Error_t getSymbolAddress(void** devPtr, void const* symbol)
256 {
257 return ::hipGetSymbolAddress(devPtr, symbol);
258 }
259
260 template<class T>
261 static inline Error_t getSymbolAddress(void** devPtr, T const& symbol)
262 {
263 return ::hipGetSymbolAddress(devPtr, symbol);
264 }
265
266 static inline Error_t hostGetDevicePointer(void** pDevice, void* pHost, Flag_t flags)
267 {
268 return ::hipHostGetDevicePointer(pDevice, pHost, flags);
269 }
270
271 static inline Error_t hostFree(void* ptr)
272 {
273 return ::hipHostFree(ptr);
274 }
275
276 static inline Error_t hostMalloc(void** ptr, size_t size, Flag_t flags)
277 {
278 return ::hipHostMalloc(ptr, size, flags);
279 }
280
281 static inline Error_t hostRegister(void* ptr, size_t size, Flag_t flags)
282 {
283 return ::hipHostRegister(ptr, size, flags);
284 }
285
286 static inline Error_t hostUnregister(void* ptr)
287 {
288 return ::hipHostUnregister(ptr);
289 }
290
291 static inline Error_t launchHostFunc(Stream_t stream, HostFn_t fn, void* userData)
292 {
293 // hipLaunchHostFunc is implemented only in ROCm 5.4.0 and later.
294# if HIP_VERSION >= 50'400'000
295 // Wrap the host function using the proper calling convention.
296 return ::hipLaunchHostFunc(stream, HostFnAdaptor::hostFunction, new HostFnAdaptor{fn, userData});
297# else
298 // Emulate hipLaunchHostFunc using hipStreamAddCallback with a callback adaptor.
299 return ::hipStreamAddCallback(stream, HostFnAdaptor::streamCallback, new HostFnAdaptor{fn, userData}, 0);
300# endif
301 }
302
303 static inline Error_t malloc(void** devPtr, size_t size)
304 {
305 return ::hipMalloc(devPtr, size);
306 }
307
308 static inline Error_t malloc3D(PitchedPtr_t* pitchedDevPtr, Extent_t extent)
309 {
310 return ::hipMalloc3D(pitchedDevPtr, extent);
311 }
312
313 static inline Error_t mallocAsync(
314 [[maybe_unused]] void** devPtr,
315 [[maybe_unused]] size_t size,
316 [[maybe_unused]] Stream_t stream)
317 {
318 // stream-ordered memory operations are fully implemented only in ROCm 5.3.0 and later.
319# if HIP_VERSION >= 50'600'000
320 return ::hipMallocAsync(devPtr, size, stream);
321# elif HIP_VERSION >= 50'300'000
322 // before ROCm 5.6.0, hipMallocAsync fails for an allocation of 0 bytes
323 if(size > 0)
324 {
325 return ::hipMallocAsync(devPtr, size, stream);
326 }
327 else
328 {
329 // make sure the pointer can safely be passed to hipFreeAsync
330 *devPtr = nullptr;
331 return ::hipSuccess;
332 }
333# else
334 // Not implemented.
335 return errorUnknown;
336# endif
337 }
338
339 static inline Error_t mallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height)
340 {
341 return ::hipMallocPitch(devPtr, pitch, width, height);
342 }
343
344 static inline Error_t memGetInfo(size_t* free, size_t* total)
345 {
346 return ::hipMemGetInfo(free, total);
347 }
348
349 static inline Error_t memcpy(void* dst, void const* src, size_t count, MemcpyKind_t kind)
350 {
351 return ::hipMemcpy(dst, src, count, kind);
352 }
353
354 static inline Error_t memcpy2DAsync(
355 void* dst,
356 size_t dpitch,
357 void const* src,
358 size_t spitch,
359 size_t width,
360 size_t height,
361 MemcpyKind_t kind,
362 Stream_t stream)
363 {
364 return ::hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream);
365 }
366
367 static inline Error_t memcpy3DAsync(Memcpy3DParms_t const* p, Stream_t stream)
368 {
369 return ::hipMemcpy3DAsync(p, stream);
370 }
371
372 static inline Error_t memcpyAsync(void* dst, void const* src, size_t count, MemcpyKind_t kind, Stream_t stream)
373 {
374 return ::hipMemcpyAsync(dst, src, count, kind, stream);
375 }
376
377 static inline Error_t memset2DAsync(
378 void* devPtr,
379 size_t pitch,
380 int value,
381 size_t width,
382 size_t height,
383 Stream_t stream)
384 {
385 return ::hipMemset2DAsync(devPtr, pitch, value, width, height, stream);
386 }
387
388 static inline Error_t memset3DAsync(PitchedPtr_t pitchedDevPtr, int value, Extent_t extent, Stream_t stream)
389 {
390 return ::hipMemset3DAsync(pitchedDevPtr, value, extent, stream);
391 }
392
393 static inline Error_t memsetAsync(void* devPtr, int value, size_t count, Stream_t stream)
394 {
395 return ::hipMemsetAsync(devPtr, value, count, stream);
396 }
397
398 static inline Error_t setDevice(int device)
399 {
400 return ::hipSetDevice(device);
401 }
402
403 static inline Error_t streamCreate(Stream_t* pStream)
404 {
405 return ::hipStreamCreate(pStream);
406 }
407
408 static inline Error_t streamCreateWithFlags(Stream_t* pStream, Flag_t flags)
409 {
410 return ::hipStreamCreateWithFlags(pStream, flags);
411 }
412
413 static inline Error_t streamDestroy(Stream_t stream)
414 {
415 return ::hipStreamDestroy(stream);
416 }
417
418 static inline Error_t streamQuery(Stream_t stream)
419 {
420 return ::hipStreamQuery(stream);
421 }
422
423 static inline Error_t streamSynchronize(Stream_t stream)
424 {
425 return ::hipStreamSynchronize(stream);
426 }
427
428 static inline Error_t streamWaitEvent(Stream_t stream, Event_t event, Flag_t flags)
429 {
430 return ::hipStreamWaitEvent(stream, event, flags);
431 }
432
433 static inline PitchedPtr_t makePitchedPtr(void* d, size_t p, size_t xsz, size_t ysz)
434 {
435 return ::make_hipPitchedPtr(d, p, xsz, ysz);
436 }
437
438 static inline Pos_t makePos(size_t x, size_t y, size_t z)
439 {
440 return ::make_hipPos(x, y, z);
441 }
442
443 static inline Extent_t makeExtent(size_t w, size_t h, size_t d)
444 {
445 return ::make_hipExtent(w, h, d);
446 }
447 };
448
449} // namespace alpaka
450
451#endif // ALPAKA_ACC_GPU_HIP_ENABLED
#define ALPAKA_VERSION_NUMBER(major, minor, patch)
Definition Config.hpp:13
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The alpaka accelerator library.
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition Traits.hpp:41
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
ALPAKA_FN_HOST auto malloc(TAlloc const &alloc, std::size_t const &sizeElems) -> T *
Definition Traits.hpp:33