alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BufUniformCudaHipRtTraits.hpp
Go to the documentation of this file.
1/* Copyright 2025 Anton Reinhard
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12
13namespace alpaka::trait
14{
15 //! The CUDA/HIP RT device memory buffer type trait specialization.
16 template<typename TApi, typename TElem, typename TDim, typename TIdx>
17 struct BufType<DevUniformCudaHipRt<TApi>, TElem, TDim, TIdx>
18 {
20 };
21
22 //! The BufUniformCudaHipRt device type trait specialization.
23 template<typename TApi, typename TElem, typename TDim, typename TIdx>
24 struct DevType<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
25 {
27 };
28
29 //! The BufUniformCudaHipRt device get trait specialization.
30 template<typename TApi, typename TElem, typename TDim, typename TIdx>
31 struct GetDev<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
32 {
35 {
36 return buf.m_spBufImpl->m_dev;
37 }
38 };
39
40 //! The BufUniformCudaHipRt dimension getter trait.
41 template<typename TApi, typename TElem, typename TDim, typename TIdx>
42 struct DimType<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
43 {
44 using type = TDim;
45 };
46
47 //! The BufUniformCudaHipRt memory element type get trait specialization.
48 template<typename TApi, typename TElem, typename TDim, typename TIdx>
49 struct ElemType<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
50 {
51 using type = TElem;
52 };
53
54 //! The BufUniformCudaHipRt width get trait specialization.
55 template<typename TApi, typename TElem, typename TDim, typename TIdx>
56 struct GetExtents<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
57 {
59 {
60 return buf.m_spBufImpl->m_extentElements;
61 }
62 };
63
64 //! The BufUniformCudaHipRt native pointer get trait specialization.
65 template<typename TApi, typename TElem, typename TDim, typename TIdx>
66 struct GetPtrNative<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
67 {
69 -> TElem const*
70 {
71 return buf.m_spBufImpl->m_pMem;
72 }
73
75 {
76 return buf.m_spBufImpl->m_pMem;
77 }
78 };
79
80 //! The BufUniformCudaHipRt pointer on device get trait specialization.
81 template<typename TApi, typename TElem, typename TDim, typename TIdx>
82 struct GetPtrDev<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>, DevUniformCudaHipRt<TApi>>
83 {
86 DevUniformCudaHipRt<TApi> const& dev) -> TElem const*
87 {
88 if(dev == getDev(buf))
89 {
90 return buf.m_spBufImpl->m_pMem;
91 }
92 else
93 {
94 throw std::runtime_error("The buffer is not accessible from the given device!");
95 }
96 }
97
100 DevUniformCudaHipRt<TApi> const& dev) -> TElem*
101 {
102 if(dev == getDev(buf))
103 {
104 return buf.m_spBufImpl->m_pMem;
105 }
106 else
107 {
108 throw std::runtime_error("The buffer is not accessible from the given device!");
109 }
110 }
111 };
112
113 template<typename TApi, typename TElem, typename TDim, typename TIdx>
123
124 //! The BufUniformCudaHipRt offset get trait specialization.
125 template<typename TApi, typename TElem, typename TDim, typename TIdx>
126 struct GetOffsets<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
127 {
133 };
134
135 //! The BufUniformCudaHipRt idx type trait specialization.
136 template<typename TApi, typename TElem, typename TDim, typename TIdx>
137 struct IdxType<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
138 {
139 using type = TIdx;
140 };
141
142 //! The BufCpu pointer on CUDA/HIP device get trait specialization.
143 template<typename TApi, typename TElem, typename TDim, typename TIdx>
144 struct GetPtrDev<BufCpu<TElem, TDim, TIdx>, DevUniformCudaHipRt<TApi>>
145 {
147 -> TElem const*
148 {
149 // TODO: Check if the memory is mapped at all!
150 TElem* pDev(nullptr);
151
152 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::hostGetDevicePointer(
153 &pDev,
154 const_cast<void*>(reinterpret_cast<void const*>(getPtrNative(buf))),
155 0));
156
157 return pDev;
158 }
159
161 -> TElem*
162 {
163 // TODO: Check if the memory is mapped at all!
164 TElem* pDev(nullptr);
165
166 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::hostGetDevicePointer(&pDev, getPtrNative(buf), 0));
167
168 return pDev;
169 }
170 };
171
172 //! The MakeConstBuf trait for CUDA/HIP buffers.
173 template<typename TApi, typename TElem, typename TDim, typename TIdx>
188
189 //! The CUDA/HIP memory allocation trait specialization.
190 template<typename TApi, typename TElem, typename Dim, typename TIdx>
191 struct BufAlloc<TElem, Dim, TIdx, DevUniformCudaHipRt<TApi>>
192 {
193 template<typename TExtent>
194 ALPAKA_FN_HOST static auto allocBuf(DevUniformCudaHipRt<TApi> const& dev, TExtent const& extent)
196 {
198
199 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
200
201 void* memPtr = nullptr;
202 std::size_t rowPitchInBytes = 0u;
203 if(getExtentProduct(extent) != 0)
204 {
205 if constexpr(Dim::value == 0)
206 {
207 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc(&memPtr, sizeof(TElem)));
208 }
209 else if constexpr(Dim::value == 1)
210 {
212 TApi::malloc(&memPtr, static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem)));
213 }
214 else if constexpr(Dim::value == 2)
215 {
216 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::mallocPitch(
217 &memPtr,
218 &rowPitchInBytes,
219 static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem),
220 static_cast<std::size_t>(getHeight(extent))));
221 }
222 else if constexpr(Dim::value == 3)
223 {
224 typename TApi::Extent_t const extentVal = TApi::makeExtent(
225 static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem),
226 static_cast<std::size_t>(getHeight(extent)),
227 static_cast<std::size_t>(getDepth(extent)));
228 typename TApi::PitchedPtr_t pitchedPtrVal;
229 pitchedPtrVal.ptr = nullptr;
230 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc3D(&pitchedPtrVal, extentVal));
231 memPtr = pitchedPtrVal.ptr;
232 rowPitchInBytes = pitchedPtrVal.pitch;
233 }
234 }
235# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
236 std::cout << __func__;
237 if constexpr(Dim::value >= 1)
238 std::cout << " ew: " << getWidth(extent);
239 if constexpr(Dim::value >= 2)
240 std::cout << " eh: " << getHeight(extent);
241 if constexpr(Dim::value >= 3)
242 std::cout << " ed: " << getDepth(extent);
243 std::cout << " ptr: " << memPtr;
244 if constexpr(Dim::value >= 2)
245 std::cout << " rowpitch: " << rowPitchInBytes;
246 std::cout << std::endl;
247# endif
248 return {
249 dev,
250 reinterpret_cast<TElem*>(memPtr),
251 [](TElem* ptr) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::free(ptr)); },
252 extent,
253 rowPitchInBytes};
254 }
255 };
256
257 //! The CUDA/HIP stream-ordered memory allocation capability trait specialization.
258 template<typename TApi, typename TDim>
259 struct HasAsyncBufSupport<TDim, DevUniformCudaHipRt<TApi>> : std::bool_constant<TDim::value <= 1>
260 {
261 };
262
263 //! The CUDA/HIP stream-ordered memory allocation trait specialization.
264 template<typename TApi, typename TElem, typename TDim, typename TIdx>
265 struct AsyncBufAlloc<TElem, TDim, TIdx, DevUniformCudaHipRt<TApi>>
266 {
267 static_assert(
268 TDim::value <= 1,
269 "CUDA/HIP devices support only one-dimensional stream-ordered memory buffers.");
270
271 template<typename TQueue, typename TExtent>
272 ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, [[maybe_unused]] TExtent const& extent)
273 -> BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>
274 {
275 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
276
277 static_assert(TDim::value == Dim<TExtent>::value, "extent must have the same dimension as the buffer");
278 auto const width = getExtentProduct(extent); // handles 1D and 0D buffers
279
280 auto const& dev = getDev(queue);
281 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
282 void* memPtr = nullptr;
283 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
284 TApi::mallocAsync(&memPtr, static_cast<std::size_t>(width) * sizeof(TElem), queue.getNativeHandle()));
285
286# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
287 std::cout << __func__ << " ew: " << width << " ptr: " << memPtr << std::endl;
288# endif
289 return {
290 dev,
291 reinterpret_cast<TElem*>(memPtr),
292 [q = std::move(queue)](TElem* ptr)
293 { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::freeAsync(ptr, q.getNativeHandle())); },
294 extent,
295 static_cast<std::size_t>(width) * sizeof(TElem)};
296 }
297 };
298
299 //! The pinned/mapped memory allocation capability trait specialization.
300 template<typename TApi>
301 struct HasMappedBufSupport<PlatformUniformCudaHipRt<TApi>> : public std::true_type
302 {
303 };
304
305 //! The pinned/mapped memory allocation trait specialization for the CUDA/HIP devices.
306 template<typename TApi, typename TElem, typename TDim, typename TIdx>
307 struct BufAllocMapped<PlatformUniformCudaHipRt<TApi>, TElem, TDim, TIdx>
308 {
309 template<typename TExtent>
310 ALPAKA_FN_HOST static auto allocMappedBuf(
311 DevCpu const& host,
312 PlatformUniformCudaHipRt<TApi> const& /*platform*/,
313 TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
314 {
315 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
316
317 // Allocate CUDA/HIP page-locked memory on the host, mapped into the CUDA/HIP address space and
318 // accessible to all CUDA/HIP devices.
319 TElem* memPtr = nullptr;
320 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::hostMalloc(
321 reinterpret_cast<void**>(&memPtr),
322 sizeof(TElem) * static_cast<std::size_t>(getExtentProduct(extent)),
323 TApi::hostMallocMapped | TApi::hostMallocPortable));
324 auto deleter = [](TElem* ptr) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::hostFree(ptr)); };
325
326 return BufCpu<TElem, TDim, TIdx>(host, memPtr, std::move(deleter), extent);
327 }
328 };
329
330} // namespace alpaka::trait
331
332#endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(cmd)
CUDA/HIP runtime error checking with log.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CPU memory buffer template implementing muting accessors.
Definition BufCpu.hpp:24
The generic memory buffer template implementing muting accessors.
std::shared_ptr< TBufImpl > m_spBufImpl
The CUDA/HIP RT device handle.
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
Definition Vec.hpp:99
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The accelerator traits.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
Definition Traits.hpp:134
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:108
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:121
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:95
static ALPAKA_FN_HOST auto allocBuf(DevUniformCudaHipRt< TApi > const &dev, TExtent const &extent) -> BufUniformCudaHipRt< TApi, TElem, Dim, TIdx >
The memory allocator trait.
Definition Traits.hpp:31
The memory buffer type trait.
Definition Traits.hpp:23
The device type trait.
Definition Traits.hpp:23
The dimension getter type trait.
Definition Traits.hpp:14
The element type trait.
Definition Traits.hpp:16
static ALPAKA_FN_HOST auto getDev(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf) -> DevUniformCudaHipRt< TApi >
The device get trait.
Definition Traits.hpp:27
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf)
The GetExtents trait for getting the extents of an object as an alpaka::Vec.
Definition Traits.hpp:37
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &) const -> Vec< TDim, TIdx >
The GetOffsets trait for getting the offsets of an object as an alpaka::Vec.
Definition Traits.hpp:33
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf) const -> Vec< TDim, TIdx >
Customization point for getPitchesInBytes. The default implementation uses the extent to calculate th...
Definition Traits.hpp:103
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > const &buf, DevUniformCudaHipRt< TApi > const &) -> TElem const *
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > &buf, DevUniformCudaHipRt< TApi > const &) -> TElem *
static ALPAKA_FN_HOST auto getPtrDev(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf, DevUniformCudaHipRt< TApi > const &dev) -> TElem const *
static ALPAKA_FN_HOST auto getPtrDev(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > &buf, DevUniformCudaHipRt< TApi > const &dev) -> TElem *
The pointer on device get trait.
Definition Traits.hpp:58
static ALPAKA_FN_HOST auto getPtrNative(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > &buf) -> TElem *
static ALPAKA_FN_HOST auto getPtrNative(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf) -> TElem const *
The native pointer get trait.
Definition Traits.hpp:54
The stream-ordered memory allocation capability trait.
Definition Traits.hpp:40
The idx type trait.
Definition Traits.hpp:25
static ALPAKA_FN_HOST auto makeConstBuf(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > &&buf) -> ConstBufUniformCudaHipRt< TApi, TElem, TDim, TIdx >
static ALPAKA_FN_HOST auto makeConstBuf(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf) -> ConstBufUniformCudaHipRt< TApi, TElem, TDim, TIdx >
The trait to transform a mutable buffer into a constant one.
Definition Traits.hpp:55