26#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
30 template<
typename TApi>
31 class DevUniformCudaHipRt;
35 template<
typename TElem,
typename TExtent,
typename TPitchBytes>
38 template<
typename TAcc>
44 TPitchBytes pitchBytes)
const
49 std::uintptr_t offsetBytes =
static_cast<std::uintptr_t
>((pitchBytes * idx).sum());
50 TElem* elem =
reinterpret_cast<TElem*
>(
51 __builtin_assume_aligned(
reinterpret_cast<std::uint8_t*
>(ptr) + offsetBytes,
alignof(TElem)));
59 template<
typename TElem>
62 template<
typename TAcc>
75 template<
typename TDim,
typename TApi>
78 template<
typename TExtent,
typename TViewFwd,
typename TValue>
81 using View = std::remove_reference_t<TViewFwd>;
88 std::is_trivially_copyable_v<Elem>,
89 "Only trivially copyable types are supported for fill");
91 if constexpr(TDim::value == 0)
95 return alpaka::createTaskKernel<Acc>(
108 WorkDiv grid = WorkDiv(blocks, threads, elements);
112 for([[maybe_unused]]
auto pitch : pitches)
116 return alpaka::createTaskKernel<Acc>(
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
A basic class holding the work division as grid block extent, block thread and thread element extent.
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
ALPAKA_FN_ACC auto uniformElementsND(TAcc const &acc)
typename trait::AccType< T >::type Acc
The accelerator type trait alias template to remove the ::type.
ALPAKA_FN_ACC void operator()(TAcc const &acc, TElem *ptr, TElem value) const
ALPAKA_FN_ACC void operator()(TAcc const &acc, TElem *ptr, TElem value, TExtent extent, TPitchBytes pitchBytes) const