alpaka
Abstraction Library for Parallel Kernel Acceleration
Set.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Jan Stephan, Luca Ferragina, Aurora Perego, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/core/Debug.hpp"
8 #include "alpaka/core/Sycl.hpp"
10 #include "alpaka/dev/Traits.hpp"
12 #include "alpaka/extent/Traits.hpp"
15 #include "alpaka/meta/NdLoop.hpp"
18 #include "alpaka/queue/Traits.hpp"
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <memory>
23 
24 #ifdef ALPAKA_ACC_SYCL_ENABLED
25 
26 namespace alpaka
27 {
28 
29  namespace detail
30  {
31  //! The SYCL ND memory set task base.
32  template<typename TDim, typename TView, typename TExtent>
33  struct TaskSetSyclBase
34  {
35  using ExtentSize = Idx<TExtent>;
36  using DstSize = Idx<TView>;
37  using Elem = alpaka::Elem<TView>;
38 
39  template<typename TViewFwd>
40  TaskSetSyclBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
41  : m_byte(byte)
42  , m_extent(getExtents(extent))
43  , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
44 # if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
45  , m_dstExtent(getExtents(view))
46 # endif
47 
48  , m_dstPitchBytes(getPitchesInBytes(view))
49  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
50 
51  {
52  ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
53  if constexpr(TDim::value > 1)
54  ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 2]);
55  }
56 
57 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
58  auto printDebug() const -> void
59  {
60  std::cout << __func__ << " e: " << this->m_extent << " ewb: " << this->m_extentWidthBytes
61  << " de: " << this->m_dstExtent << " dptr: " << reinterpret_cast<void*>(this->m_dstMemNative)
62  << " dpitchb: " << this->m_dstPitchBytes << std::endl;
63  }
64 # endif
65 
66  std::uint8_t const m_byte;
67  Vec<TDim, ExtentSize> const m_extent;
68  ExtentSize const m_extentWidthBytes;
69 # if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
70  Vec<TDim, DstSize> const m_dstExtent;
71 # endif
72  Vec<TDim, DstSize> const m_dstPitchBytes;
73  std::uint8_t* const m_dstMemNative;
74  static constexpr auto is_sycl_task = true;
75  };
76 
77  //! The SYCL device ND memory set task.
78  template<typename TDim, typename TView, typename TExtent>
79  struct TaskSetSycl : public TaskSetSyclBase<TDim, TView, TExtent>
80  {
81  using DimMin1 = DimInt<TDim::value - 1u>;
82  using typename TaskSetSyclBase<TDim, TView, TExtent>::ExtentSize;
83  using typename TaskSetSyclBase<TDim, TView, TExtent>::DstSize;
84 
85  using TaskSetSyclBase<TDim, TView, TExtent>::TaskSetSyclBase;
86 
87  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
88  {
90 
91 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
92  this->printDebug();
93 # endif
94  // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
95  // iteration.
96  Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
97  Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
98 
99  // Record an event for each memcpy call
100  std::vector<sycl::event> events;
101  events.reserve(static_cast<std::size_t>(extentWithoutInnermost.prod()));
102 
103  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
104  {
106  extentWithoutInnermost,
107  [&](Vec<DimMin1, ExtentSize> const& idx)
108  {
109  events.push_back(queue.memset(
110  this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
111  this->m_byte,
112  static_cast<std::size_t>(this->m_extentWidthBytes),
113  requirements));
114  });
115  }
116 
117  // Return an event that depends on all the events assciated to the memcpy calls
118  return queue.ext_oneapi_submit_barrier(events);
119  }
120  };
121 
122  //! The 1D SYCL memory set task.
123  template<typename TView, typename TExtent>
124  struct TaskSetSycl<DimInt<1u>, TView, TExtent> : public TaskSetSyclBase<DimInt<1u>, TView, TExtent>
125  {
126  using TaskSetSyclBase<DimInt<1u>, TView, TExtent>::TaskSetSyclBase;
127 
128  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
129  {
131 
132 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
133  this->printDebug();
134 # endif
135  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
136  {
137  return queue.memset(
138  reinterpret_cast<void*>(this->m_dstMemNative),
139  this->m_byte,
140  static_cast<std::size_t>(this->m_extentWidthBytes),
141  requirements);
142  }
143  else
144  {
145  return queue.ext_oneapi_submit_barrier();
146  }
147  }
148  };
149 
150  //! The SYCL device scalar memory set task.
151  template<typename TView, typename TExtent>
152  struct TaskSetSycl<DimInt<0u>, TView, TExtent>
153  {
154  using ExtentSize = Idx<TExtent>;
155  using Scalar = Vec<DimInt<0u>, ExtentSize>;
156  using DstSize = Idx<TView>;
157  using Elem = alpaka::Elem<TView>;
158 
159  template<typename TViewFwd>
160  TaskSetSycl(TViewFwd&& view, std::uint8_t const& byte, [[maybe_unused]] TExtent const& extent)
161  : m_byte(byte)
162  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
163  {
164  // all zero-sized extents are equivalent
165  ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
166  ALPAKA_ASSERT(getExtents(view).prod() == 1u);
167  }
168 
169 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
170  auto printDebug() const -> void
171  {
172  std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
173  << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
174  << std::endl;
175  }
176 # endif
177 
178  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
179  {
181 
182 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
183  printDebug();
184 # endif
185  return queue.memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem), requirements);
186  }
187 
188  std::uint8_t const m_byte;
189  std::uint8_t* const m_dstMemNative;
190  static constexpr auto is_sycl_task = true;
191  };
192 
193  } // namespace detail
194 
195  namespace trait
196  {
197  //! The SYCL device memory set trait specialization.
198  template<typename TDim, typename TPlatform>
199  struct CreateTaskMemset<TDim, DevGenericSycl<TPlatform>>
200  {
201  template<typename TExtent, typename TView>
202  static auto createTaskMemset(TView& view, std::uint8_t const& byte, TExtent const& extent)
203  -> detail::TaskSetSycl<TDim, TView, TExtent>
204  {
205  return detail::TaskSetSycl<TDim, TView, TExtent>(view, byte, extent);
206  }
207  };
208 
209  } // namespace trait
210 
211 } // namespace alpaka
212 #endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition: Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition: Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition: Debug.hpp:18
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition: Traits.hpp:114
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
Definition: Traits.hpp:59
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView >>
Definition: Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition: Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition: Traits.hpp:21
std::integral_constant< std::size_t, N > DimInt
ALPAKA_FN_HOST auto createTaskMemset(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Create a memory set task.
Definition: Traits.hpp:207