22 #include <type_traits>
24 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
32 template<
typename TElem,
typename TDim,
typename TIdx>
37 template<
typename TDim,
typename SFINAE =
void>
45 template<
typename TDim>
53 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
58 static_assert(!std::is_const_v<TElem>,
"The elem type of the buffer must not be const");
59 static_assert(!std::is_const_v<TIdx>,
"The idx type of the buffer must not be const!");
62 template<
typename TExtent,
typename Deleter>
67 TExtent
const& extent,
68 std::size_t pitchBytes)
72 ,
m_spMem(pMem, std::move(deleter))
78 "The dimensionality of TExtent and the dimensionality of the TDim template parameter have to be "
82 "The idx type of TExtent and the TIdx template parameter have to be identical!");
93 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
100 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
111 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
118 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
125 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
135 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
141 return buf.m_spMem.get();
146 return buf.m_spMem.get();
151 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
160 return buf.m_spMem.get();
164 throw std::runtime_error(
"The buffer is not accessible from the given device!");
174 return buf.m_spMem.get();
178 throw std::runtime_error(
"The buffer is not accessible from the given device!");
183 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
190 if constexpr(TDim::value > 0)
192 v.
back() =
sizeof(TElem);
193 if constexpr(TDim::value > 1)
195 v[TDim::value - 2] =
static_cast<TIdx
>(buf.m_rowPitchInBytes);
196 for(TIdx i = TDim::value - 2; i > 0; i--)
197 v[i - 1] = buf.m_extentElements[i] * v[i];
205 template<
typename TApi,
typename TElem,
typename Dim,
typename TIdx>
208 template<
typename TExtent>
216 void* memPtr =
nullptr;
217 std::size_t rowPitchInBytes = 0u;
220 if constexpr(Dim::value == 0)
224 else if constexpr(Dim::value == 1)
229 else if constexpr(Dim::value == 2)
234 static_cast<std::size_t
>(
getWidth(extent)) *
sizeof(TElem),
235 static_cast<std::size_t
>(
getHeight(extent))));
237 else if constexpr(Dim::value == 3)
239 typename TApi::Extent_t
const extentVal = TApi::makeExtent(
240 static_cast<std::size_t
>(
getWidth(extent)) *
sizeof(TElem),
241 static_cast<std::size_t
>(
getHeight(extent)),
242 static_cast<std::size_t
>(
getDepth(extent)));
243 typename TApi::PitchedPtr_t pitchedPtrVal;
244 pitchedPtrVal.ptr =
nullptr;
246 memPtr = pitchedPtrVal.ptr;
247 rowPitchInBytes = pitchedPtrVal.pitch;
250 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
251 std::cout << __func__;
252 if constexpr(Dim::value >= 1)
253 std::cout <<
" ew: " <<
getWidth(extent);
254 if constexpr(Dim::value >= 2)
255 std::cout <<
" eh: " <<
getHeight(extent);
256 if constexpr(Dim::value >= 3)
257 std::cout <<
" ed: " <<
getDepth(extent);
258 std::cout <<
" ptr: " << memPtr;
259 if constexpr(Dim::value >= 2)
260 std::cout <<
" rowpitch: " << rowPitchInBytes;
261 std::cout << std::endl;
265 reinterpret_cast<TElem*
>(memPtr),
273 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
276 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
278 std::is_same_v<TApi, ApiCudaRt> && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0),
279 "Support for stream-ordered memory buffers requires CUDA 11.2 or higher.");
281 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED)
283 std::is_same_v<TApi, ApiHipRt> && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0),
284 "Support for stream-ordered memory buffers requires HIP/ROCm 5.3 or higher.");
288 "CUDA/HIP devices support only one-dimensional stream-ordered memory buffers.");
290 template<
typename TQueue,
typename TExtent>
296 static_assert(TDim::value ==
Dim<TExtent>::value,
"extent must have the same dimension as the buffer");
299 auto const& dev =
getDev(queue);
301 void* memPtr =
nullptr;
304 static_cast<std::size_t
>(width) *
sizeof(TElem),
305 queue.getNativeHandle()));
307 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
308 std::cout << __func__ <<
" ew: " << width <<
" ptr: " << memPtr << std::endl;
312 reinterpret_cast<TElem*
>(memPtr),
313 [q = std::move(queue)](TElem* ptr)
316 static_cast<std::size_t
>(width) *
sizeof(TElem)};
321 template<
typename TApi,
typename TDim>
323 : std::bool_constant<
326 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
327 std::is_same_v<TApi, ApiCudaRt> && TApi::version >= BOOST_VERSION_NUMBER(11, 2, 0)
328 # elif defined(ALPAKA_ACC_GPU_HIP_ENABLED)
329 std::is_same_v<TApi, ApiHipRt> && TApi::version >= BOOST_VERSION_NUMBER(5, 3, 0)
338 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
341 template<
typename TExtent>
351 TElem* memPtr =
nullptr;
353 reinterpret_cast<void**
>(&memPtr),
355 TApi::hostMallocMapped | TApi::hostMallocPortable));
363 template<
typename TApi>
369 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
380 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
387 template<
typename TApi,
typename TElem,
typename TDim,
typename TIdx>
395 TElem* pDev(
nullptr);
399 const_cast<void*
>(
reinterpret_cast<void const*
>(
getPtrNative(buf))),
409 TElem* pDev(
nullptr);
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
constexpr ALPAKA_FN_HOST_ACC auto back() -> TVal &
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
ALPAKA_FN_HOST auto malloc(TAlloc const &alloc, std::size_t const &sizeElems) -> T *
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
ALPAKA_FN_HOST auto allocMappedBuf(DevCpu const &host, TPlatform const &platform, TExtent const &extent=TExtent())
Allocates pinned/mapped host memory, accessible by all devices in the given platform.
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
ALPAKA_FN_HOST auto getDev(T const &t)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
std::size_t m_rowPitchInBytes
The stream-ordered memory allocator trait.
The pinned/mapped memory allocator trait.
The memory allocator trait.
The dimension getter type trait.
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt< TApi, TElem, TDim, TIdx > const &buffer) const
The GetExtents trait for getting the extents of an object as an alpaka::Vec.
The GetOffsets trait for getting the offsets of an object as an alpaka::Vec.
Customization point for getPitchesInBytes. The default implementation uses the extent to calculate th...
The pointer on device get trait.
The native pointer get trait.
The stream-ordered memory allocation capability trait.
The pinned/mapped memory allocation capability trait.