alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Fill.hpp
Go to the documentation of this file.
1/* Copyright 2025 Maria Michailidi, Anna Polova, Abdulrahman Al Marzouqi
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
13
14namespace alpaka
15{
16 class DevCpu;
17
18 namespace detail
19 {
20 //! The CPU device N-dimensional memory fill task.
21 template<typename TDim, typename TView, typename TExtent>
23 {
24 static_assert(TDim::value > 0);
25
29
30 static_assert(std::is_trivially_copyable_v<Elem>, "Only trivially copyable types supported for fill");
31
32 template<typename TViewFwd>
33 TaskFillCpu(TViewFwd&& view, Elem const& value, TExtent const& extent)
34 : m_value(value)
35 , m_extent(getExtents(extent))
36#if(!defined(NDEBUG))
37 , m_dstExtent(getExtents(view))
38#endif
39 , m_dstPitchBytes(getPitchesInBytes(view))
40 , m_dstMemNative(getPtrNative(view))
41 {
42 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
43 if constexpr(TDim::value > 0)
44 {
45 ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[TDim::value - 1]) >= sizeof(Elem));
46 ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[TDim::value - 1]) % alignof(Elem) == 0);
47 }
48 if constexpr(TDim::value > 1)
49 {
50 for(int dim = TDim::value - 2; dim >= 0; --dim)
51 {
53 static_cast<std::size_t>(m_dstPitchBytes[dim])
54 >= static_cast<std::size_t>(m_dstPitchBytes[dim + 1] * m_dstExtent[dim + 1]));
55 ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[dim]) % alignof(Elem) == 0);
56 }
57 }
58 ALPAKA_ASSERT(reinterpret_cast<std::uintptr_t>(m_dstMemNative) % alignof(Elem) == 0);
59 }
60
61 ALPAKA_FN_HOST auto operator()() const -> void
62 {
63 if(static_cast<std::size_t>(m_extent.prod()) != 0u)
64 {
66 m_extent,
67 [&](Vec<TDim, ExtentSize> const& idx)
68 {
69 // All elements of m_dstPitchBytes are multiples of the alignment of Elem.
70 std::uintptr_t offsetBytes = static_cast<std::uintptr_t>((idx * m_dstPitchBytes).sum());
71 Elem* elem = reinterpret_cast<Elem*>(__builtin_assume_aligned(
72 reinterpret_cast<std::uint8_t*>(m_dstMemNative) + offsetBytes,
73 alignof(Elem)));
74 *elem = m_value;
75 });
76 }
77 }
78
79 private:
80 Elem const m_value;
81 Vec<TDim, ExtentSize> const m_extent;
82#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
83 Vec<TDim, DstSize> const m_dstExtent;
84#endif
85 Vec<TDim, DstSize> const m_dstPitchBytes;
86 Elem* const m_dstMemNative;
87 };
88
89 //! The CPU device 0-dimensional memory fill task specialisation.
90 template<typename TView, typename TExtent>
91 struct TaskFillCpu<DimInt<0u>, TView, TExtent>
92 {
94
95 template<typename TViewFwd>
96 TaskFillCpu(TViewFwd&& view, Elem const& value, [[maybe_unused]] TExtent const& extent)
97 : m_value(value)
98 , m_dstMemNative(getPtrNative(view))
99 {
100 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
101 ALPAKA_ASSERT(getExtents(view).prod() == 1u);
102 ALPAKA_ASSERT(reinterpret_cast<std::uintptr_t>(m_dstMemNative) % alignof(Elem) == 0);
103 }
104
105 ALPAKA_FN_HOST auto operator()() const noexcept -> void
106 {
107 *m_dstMemNative = m_value;
108 }
109
110 private:
111 Elem const m_value;
112 Elem* const m_dstMemNative;
113 };
114 } // namespace detail
115
116 namespace trait
117 {
118 //! The memory fill task trait specialization for CPU devices.
119 template<typename TDim>
120 struct CreateTaskFill<TDim, DevCpu>
121 {
122 template<typename TExtent, typename TViewFwd>
123 ALPAKA_FN_HOST static auto createTaskFill(
124 TViewFwd&& view,
125 alpaka::Elem<std::remove_reference_t<TViewFwd>> const& value,
126 TExtent const& extent)
127 {
128 using TView = std::remove_reference_t<TViewFwd>;
130 static_assert(
131 std::is_trivially_copyable_v<Elem>,
132 "Only trivially copyable types are supported for fill");
133
134 return alpaka::detail::TaskFillCpu<TDim, TView, TExtent>{std::forward<TViewFwd>(view), value, extent};
135 }
136 };
137 } // namespace trait
138
139} // namespace alpaka
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
A n-dimensional vector.
Definition Vec.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:81
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:199
ALPAKA_FN_HOST auto createTaskFill(TViewFwd &&view, TValue const &value, TExtent const &extent)
Definition Traits.hpp:228
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:139
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt
ALPAKA_FN_HOST auto operator()() const noexcept -> void
Definition Fill.hpp:105
TaskFillCpu(TViewFwd &&view, Elem const &value, TExtent const &extent)
Definition Fill.hpp:96
The CPU device N-dimensional memory fill task.
Definition Fill.hpp:23
ALPAKA_FN_HOST auto operator()() const -> void
Definition Fill.hpp:61
Idx< TExtent > ExtentSize
Definition Fill.hpp:26
TaskFillCpu(TViewFwd &&view, Elem const &value, TExtent const &extent)
Definition Fill.hpp:33
alpaka::Elem< TView > Elem
Definition Fill.hpp:28