alpaka
Abstraction Library for Parallel Kernel Acceleration
Set.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Erik Zenker, Matthias Werner, Andrea Bocci, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/core/Assert.hpp"
11 #include "alpaka/meta/Integral.hpp"
12 #include "alpaka/meta/NdLoop.hpp"
13 
14 #include <cstring>
15 
16 namespace alpaka
17 {
18  class DevCpu;
19 
20  namespace detail
21  {
22  //! The CPU device ND memory set task base.
23  template<typename TDim, typename TView, typename TExtent>
25  {
26  static_assert(TDim::value > 0);
27 
31 
32  template<typename TViewFwd>
33  TaskSetCpuBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
34  : m_byte(byte)
35  , m_extent(getExtents(extent))
36  , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
37 #if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
38  , m_dstExtent(getExtents(view))
39 #endif
41  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
42  {
43  ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
44  if constexpr(TDim::value > 1)
46  }
47 
48 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
49  ALPAKA_FN_HOST auto printDebug() const -> void
50  {
51  std::cout << __func__ << " e: " << this->m_extent << " ewb: " << this->m_extentWidthBytes
52  << " de: " << this->m_dstExtent << " dptr: " << reinterpret_cast<void*>(this->m_dstMemNative)
53  << " dpitchb: " << this->m_dstPitchBytes << std::endl;
54  }
55 #endif
56 
57  std::uint8_t const m_byte;
60 #if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
62 #endif
64  std::uint8_t* const m_dstMemNative;
65  };
66 
67  //! The CPU device ND memory set task.
68  template<typename TDim, typename TView, typename TExtent>
69  struct TaskSetCpu : public TaskSetCpuBase<TDim, TView, TExtent>
70  {
71  using DimMin1 = DimInt<TDim::value - 1u>;
74 
76 
77  ALPAKA_FN_HOST auto operator()() const -> void
78  {
80 
81 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
82  this->printDebug();
83 #endif
84  // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
85  // iteration.
86  Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
87  Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
88 
89  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
90  {
92  extentWithoutInnermost,
93  [&](Vec<DimMin1, ExtentSize> const& idx)
94  {
96  this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
97  this->m_byte,
98  static_cast<std::size_t>(this->m_extentWidthBytes));
99  });
100  }
101  }
102  };
103 
104  //! The CPU device 1D memory set task.
105  template<typename TView, typename TExtent>
106  struct TaskSetCpu<DimInt<1u>, TView, TExtent> : public TaskSetCpuBase<DimInt<1u>, TView, TExtent>
107  {
108  using TaskSetCpuBase<DimInt<1u>, TView, TExtent>::TaskSetCpuBase;
109 
110  ALPAKA_FN_HOST auto operator()() const -> void
111  {
113 
114 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
115  this->printDebug();
116 #endif
117  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
118  {
119  std::memset(
120  this->m_dstMemNative,
121  this->m_byte,
122  static_cast<std::size_t>(this->m_extentWidthBytes));
123  }
124  }
125  };
126 
127  //! The CPU device scalar memory set task.
128  template<typename TView, typename TExtent>
129  struct TaskSetCpu<DimInt<0u>, TView, TExtent>
130  {
135 
136  template<typename TViewFwd>
137  TaskSetCpu(TViewFwd&& view, std::uint8_t const& byte, [[maybe_unused]] TExtent const& extent)
138  : m_byte(byte)
139  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
140  {
141  // all zero-sized extents are equivalent
142  ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
143  ALPAKA_ASSERT(getExtents(view).prod() == 1u);
144  }
145 
146 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
147  ALPAKA_FN_HOST auto printDebug() const -> void
148  {
149  std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
150  << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
151  << std::endl;
152  }
153 #endif
154 
155  ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG < ALPAKA_DEBUG_FULL) -> void
156  {
158 
159 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
160  printDebug();
161 #endif
163  }
164 
165  std::uint8_t const m_byte;
166  std::uint8_t* const m_dstMemNative;
167  };
168  } // namespace detail
169 
170  namespace trait
171  {
172  //! The CPU device memory set trait specialization.
173  template<typename TDim>
174  struct CreateTaskMemset<TDim, DevCpu>
175  {
176  template<typename TExtent, typename TViewFwd>
178  TViewFwd&& view,
179  std::uint8_t const& byte,
181  {
182  return {std::forward<TViewFwd>(view), byte, extent};
183  }
184  };
185  } // namespace trait
186 } // namespace alpaka
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition: Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition: Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition: Debug.hpp:18
The CPU device handle.
Definition: DevCpu.hpp:56
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition: Traits.hpp:114
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
Definition: Traits.hpp:59
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView >>
Definition: Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition: Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition: Traits.hpp:21
ALPAKA_FN_HOST auto memset(TQueue &queue, TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent) -> void
Sets the bytes of the memory of view, described by extent, to the given value.
Definition: Traits.hpp:231
std::integral_constant< std::size_t, N > DimInt
The CPU device ND memory set task base.
Definition: Set.hpp:25
ExtentSize const m_extentWidthBytes
Definition: Set.hpp:59
ALPAKA_FN_HOST auto printDebug() const -> void
Definition: Set.hpp:49
std::uint8_t const m_byte
Definition: Set.hpp:57
Vec< TDim, DstSize > const m_dstExtent
Definition: Set.hpp:61
Idx< TExtent > ExtentSize
Definition: Set.hpp:28
std::uint8_t *const m_dstMemNative
Definition: Set.hpp:64
Vec< TDim, ExtentSize > const m_extent
Definition: Set.hpp:58
TaskSetCpuBase(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition: Set.hpp:33
alpaka::Elem< TView > Elem
Definition: Set.hpp:30
Vec< TDim, DstSize > const m_dstPitchBytes
Definition: Set.hpp:63
ALPAKA_FN_HOST auto printDebug() const -> void
Definition: Set.hpp:147
TaskSetCpu(TViewFwd &&view, std::uint8_t const &byte, [[maybe_unused]] TExtent const &extent)
Definition: Set.hpp:137
ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG< ALPAKA_DEBUG_FULL) -> void
Definition: Set.hpp:155
ALPAKA_FN_HOST auto operator()() const -> void
Definition: Set.hpp:110
The CPU device ND memory set task.
Definition: Set.hpp:70
ALPAKA_FN_HOST auto operator()() const -> void
Definition: Set.hpp:77
DimInt< TDim::value - 1u > DimMin1
Definition: Set.hpp:71
static ALPAKA_FN_HOST auto createTaskMemset(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent) -> alpaka::detail::TaskSetCpu< TDim, std::remove_reference_t< TViewFwd >, TExtent >
Definition: Set.hpp:177
The memory set task trait.
Definition: Traits.hpp:114