alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Set.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, Erik Zenker, Matthias Werner, Andrea Bocci, Jan Stephan, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
13
14#include <cstring>
15
16namespace alpaka
17{
18 class DevCpu;
19
20 namespace detail
21 {
22 //! The CPU device ND memory set task base.
23 template<typename TDim, typename TView, typename TExtent>
25 {
26 static_assert(TDim::value > 0);
27
31
32 template<typename TViewFwd>
33 TaskSetCpuBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
34 : m_byte(byte)
35 , m_extent(getExtents(extent))
36 , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
37#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
38 , m_dstExtent(getExtents(view))
39#endif
41 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
42 {
43 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
44 if constexpr(TDim::value > 1)
46 }
47
48#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
49 ALPAKA_FN_HOST auto printDebug() const -> void
50 {
51 std::cout << __func__ << " e: " << this->m_extent << " ewb: " << this->m_extentWidthBytes
52 << " de: " << this->m_dstExtent << " dptr: " << reinterpret_cast<void*>(this->m_dstMemNative)
53 << " dpitchb: " << this->m_dstPitchBytes << std::endl;
54 }
55#endif
56
57 std::uint8_t const m_byte;
60#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
62#endif
64 std::uint8_t* const m_dstMemNative;
65 };
66
67 //! The CPU device ND memory set task.
68 template<typename TDim, typename TView, typename TExtent>
69 struct TaskSetCpu : public TaskSetCpuBase<TDim, TView, TExtent>
70 {
71 using DimMin1 = DimInt<TDim::value - 1u>;
74
75 using TaskSetCpuBase<TDim, TView, TExtent>::TaskSetCpuBase;
76
77 ALPAKA_FN_HOST auto operator()() const -> void
78 {
80
81#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
82 this->printDebug();
83#endif
84 // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
85 // iteration.
86 Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
87 Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
88
89 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
90 {
92 extentWithoutInnermost,
93 [&](Vec<DimMin1, ExtentSize> const& idx)
94 {
95 std::memset(
96 this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
97 this->m_byte,
98 static_cast<std::size_t>(this->m_extentWidthBytes));
99 });
100 }
101 }
102 };
103
104 //! The CPU device 1D memory set task.
105 template<typename TView, typename TExtent>
106 struct TaskSetCpu<DimInt<1u>, TView, TExtent> : public TaskSetCpuBase<DimInt<1u>, TView, TExtent>
107 {
108 using TaskSetCpuBase<DimInt<1u>, TView, TExtent>::TaskSetCpuBase;
109
110 ALPAKA_FN_HOST auto operator()() const -> void
111 {
113
114#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
115 this->printDebug();
116#endif
117 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
118 {
119 std::memset(
120 this->m_dstMemNative,
121 this->m_byte,
122 static_cast<std::size_t>(this->m_extentWidthBytes));
123 }
124 }
125 };
126
127 //! The CPU device scalar memory set task.
128 template<typename TView, typename TExtent>
129 struct TaskSetCpu<DimInt<0u>, TView, TExtent>
130 {
135
136 template<typename TViewFwd>
137 TaskSetCpu(TViewFwd&& view, std::uint8_t const& byte, [[maybe_unused]] TExtent const& extent)
138 : m_byte(byte)
139 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
140 {
141 // all zero-sized extents are equivalent
142 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
143 ALPAKA_ASSERT(getExtents(view).prod() == 1u);
144 }
145
146#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
147 ALPAKA_FN_HOST auto printDebug() const -> void
148 {
149 std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
150 << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
151 << std::endl;
152 }
153#endif
154
156 {
158
159#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
160 printDebug();
161#endif
162 std::memset(m_dstMemNative, m_byte, sizeof(Elem));
163 }
164
165 std::uint8_t const m_byte;
166 std::uint8_t* const m_dstMemNative;
167 };
168 } // namespace detail
169
170 namespace trait
171 {
172 //! The CPU device memory set trait specialization.
173 template<typename TDim>
175 {
176 template<typename TExtent, typename TViewFwd>
178 TViewFwd&& view,
179 std::uint8_t const& byte,
181 {
182 return {std::forward<TViewFwd>(view), byte, extent};
183 }
184 };
185 } // namespace trait
186} // namespace alpaka
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
The CPU device handle.
Definition DevCpu.hpp:56
A n-dimensional vector.
Definition Vec.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:81
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt
STL namespace.
The CPU device ND memory set task base.
Definition Set.hpp:25
ExtentSize const m_extentWidthBytes
Definition Set.hpp:59
ALPAKA_FN_HOST auto printDebug() const -> void
Definition Set.hpp:49
std::uint8_t const m_byte
Definition Set.hpp:57
Vec< TDim, DstSize > const m_dstExtent
Definition Set.hpp:61
Idx< TExtent > ExtentSize
Definition Set.hpp:28
std::uint8_t *const m_dstMemNative
Definition Set.hpp:64
Vec< TDim, ExtentSize > const m_extent
Definition Set.hpp:58
TaskSetCpuBase(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:33
alpaka::Elem< TView > Elem
Definition Set.hpp:30
Vec< TDim, DstSize > const m_dstPitchBytes
Definition Set.hpp:63
TaskSetCpu(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:137
ALPAKA_FN_HOST auto printDebug() const -> void
Definition Set.hpp:147
ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG< ALPAKA_DEBUG_FULL) -> void
Definition Set.hpp:155
ALPAKA_FN_HOST auto operator()() const -> void
Definition Set.hpp:110
The CPU device ND memory set task.
Definition Set.hpp:70
ALPAKA_FN_HOST auto operator()() const -> void
Definition Set.hpp:77
DimInt< TDim::value - 1u > DimMin1
Definition Set.hpp:71
static ALPAKA_FN_HOST auto createTaskMemset(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent) -> alpaka::detail::TaskSetCpu< TDim, std::remove_reference_t< TViewFwd >, TExtent >
Definition Set.hpp:177
The memory set task trait.
Definition Traits.hpp:114