11#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
16 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
23 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
30 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
36 return buf.m_spBufImpl->m_dev;
41 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
48 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
55 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
65 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
71 return buf.m_spBufImpl->m_pMem;
76 return buf.m_spBufImpl->m_pMem;
81 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
90 return buf.m_spBufImpl->m_pMem;
94 throw std::runtime_error(
"The buffer is not accessible from the given device!");
104 return buf.m_spBufImpl->m_pMem;
108 throw std::runtime_error(
"The buffer is not accessible from the given device!");
113 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
125 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
136 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
143 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
150 TElem* pDev(
nullptr);
154 const_cast<void*
>(
reinterpret_cast<void const*
>(
getPtrNative(buf))),
164 TElem* pDev(
nullptr);
173 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
190 template<
typename TApi,
typename TElem,
typename Dim,
typename TIdx>
193 template<
typename TExtent>
201 void* memPtr =
nullptr;
202 std::size_t rowPitchInBytes = 0u;
205 if constexpr(Dim::value == 0)
209 else if constexpr(Dim::value == 1)
212 TApi::malloc(&memPtr,
static_cast<std::size_t
>(
getWidth(extent)) *
sizeof(TElem)));
214 else if constexpr(Dim::value == 2)
219 static_cast<std::size_t
>(
getWidth(extent)) *
sizeof(TElem),
220 static_cast<std::size_t
>(
getHeight(extent))));
222 else if constexpr(Dim::value == 3)
224 typename TApi::Extent_t
const extentVal = TApi::makeExtent(
225 static_cast<std::size_t
>(
getWidth(extent)) *
sizeof(TElem),
226 static_cast<std::size_t
>(
getHeight(extent)),
227 static_cast<std::size_t
>(
getDepth(extent)));
228 typename TApi::PitchedPtr_t pitchedPtrVal;
229 pitchedPtrVal.ptr =
nullptr;
231 memPtr = pitchedPtrVal.ptr;
232 rowPitchInBytes = pitchedPtrVal.pitch;
235# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
236 std::cout << __func__;
237 if constexpr(Dim::value >= 1)
238 std::cout <<
" ew: " <<
getWidth(extent);
239 if constexpr(Dim::value >= 2)
240 std::cout <<
" eh: " <<
getHeight(extent);
241 if constexpr(Dim::value >= 3)
242 std::cout <<
" ed: " <<
getDepth(extent);
243 std::cout <<
" ptr: " << memPtr;
244 if constexpr(Dim::value >= 2)
245 std::cout <<
" rowpitch: " << rowPitchInBytes;
246 std::cout << std::endl;
250 reinterpret_cast<TElem*
>(memPtr),
258 template<
typename TApi,
typename TDim>
264 template<typename TApi, typename TElem, typename TDim, typename TIdx>
265 struct AsyncBufAlloc<TElem, TDim, TIdx, DevUniformCudaHipRt<TApi>>
269 "CUDA/HIP devices support only one-dimensional stream-ordered memory buffers.");
271 template<typename TQueue, typename TExtent>
272 ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, [[maybe_unused]] TExtent const& extent)
273 -> BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>
275 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
277 static_assert(TDim::value == Dim<TExtent>::value, "extent must have the same dimension as the buffer");
278 auto const width = getExtentProduct(extent);
280 auto const& dev = getDev(queue);
281 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
282 void* memPtr = nullptr;
283 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
284 TApi::mallocAsync(&memPtr, static_cast<std::size_t>(width) * sizeof(TElem), queue.getNativeHandle()));
287 std::cout << __func__ << " ew: " << width << " ptr: " << memPtr << std::endl;
291 reinterpret_cast<TElem*>(memPtr),
292 [q = std::move(queue)](TElem* ptr)
293 { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::freeAsync(ptr, q.getNativeHandle())); },
295 static_cast<std::size_t>(width) * sizeof(TElem)};
300 template<typename TApi>
301 struct HasMappedBufSupport<PlatformUniformCudaHipRt<TApi>> : public std::true_type
306 template<typename TApi, typename TElem, typename TDim, typename TIdx>
307 struct BufAllocMapped<PlatformUniformCudaHipRt<TApi>, TElem, TDim, TIdx>
309 template<typename TExtent>
310 ALPAKA_FN_HOST static auto allocMappedBuf(
312 PlatformUniformCudaHipRt<TApi> const& ,
313 TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
315 ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
319 TElem* memPtr = nullptr;
320 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::hostMalloc(
321 reinterpret_cast<void**>(&memPtr),
322 sizeof(TElem) * static_cast<std::size_t>(getExtentProduct(extent)),
323 TApi::hostMallocMapped | TApi::hostMallocPortable));
324 auto deleter = [](TElem* ptr) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::hostFree(ptr)); };
326 return BufCpu<TElem, TDim, TIdx>(host, memPtr, std::move(deleter), extent);
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
#define ALPAKA_DEBUG_FULL
The full debug level.
The CPU memory buffer template implementing muting accessors.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
ALPAKA_FN_HOST auto getDev(T const &t)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
The memory allocator trait.
The memory buffer type trait.
The dimension getter type trait.
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buf)
The GetExtents trait for getting the extents of an object as an alpaka::Vec.
The GetOffsets trait for getting the offsets of an object as an alpaka::Vec.
Customization point for getPitchesInBytes. The default implementation uses the extent to calculate th...
The pointer on device get trait.
The native pointer get trait.
The stream-ordered memory allocation capability trait.
The trait to transform a mutable buffer into a constant one.