alpaka
Abstraction Library for Parallel Kernel Acceleration
Copy.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Jan Stephan, Bernhard Manfred Gruber, Luca Ferragina, Aurora Perego, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/core/Debug.hpp"
8 #include "alpaka/core/Sycl.hpp"
9 #include "alpaka/dev/DevCpu.hpp"
12 #include "alpaka/elem/Traits.hpp"
13 #include "alpaka/extent/Traits.hpp"
16 #include "alpaka/meta/NdLoop.hpp"
19 
20 #include <memory>
21 #include <type_traits>
22 
23 #ifdef ALPAKA_ACC_SYCL_ENABLED
24 
25 # include <sycl/sycl.hpp>
26 
27 namespace alpaka::detail
28 {
29  //! The SYCL device memory copy task base.
30  template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
31  struct TaskCopySyclBase
32  {
33  static_assert(
34  std::is_same_v<std::remove_const_t<alpaka::Elem<TViewSrc>>, std::remove_const_t<alpaka::Elem<TViewDst>>>,
35  "The source and the destination view are required to have the same element type!");
36  using ExtentSize = Idx<TExtent>;
37  using DstSize = Idx<TViewDst>;
38  using SrcSize = Idx<TViewSrc>;
40 
41  template<typename TViewFwd>
42  TaskCopySyclBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
43  : m_extent(getExtents(extent))
44  , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
45 # if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
46  , m_dstExtent(getExtents(viewDst))
47  , m_srcExtent(getExtents(viewSrc))
48 # endif
49  , m_dstPitchBytes(getPitchesInBytes(viewDst))
50  , m_srcPitchBytes(getPitchesInBytes(viewSrc))
51  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
52  , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
53  {
54  if constexpr(TDim::value > 0)
55  {
56  ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
57  ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
58  }
59  }
60 
61 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
62  auto printDebug() const -> void
63  {
64  std::cout << __func__ << " e: " << m_extent << " ewb: " << this->m_extentWidthBytes
65  << " de: " << m_dstExtent << " dptr: " << reinterpret_cast<void*>(m_dstMemNative)
66  << " se: " << m_srcExtent << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
67  << std::endl;
68  }
69 # endif
70 
71  Vec<TDim, ExtentSize> const m_extent;
72  ExtentSize const m_extentWidthBytes;
73 # if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
74  Vec<TDim, DstSize> const m_dstExtent;
75  Vec<TDim, SrcSize> const m_srcExtent;
76 # endif
77 
78  Vec<TDim, DstSize> const m_dstPitchBytes;
79  Vec<TDim, SrcSize> const m_srcPitchBytes;
80  std::uint8_t* const m_dstMemNative;
81  std::uint8_t const* const m_srcMemNative;
82  static constexpr auto is_sycl_task = true;
83  };
84 
85  //! The SYCL device ND memory copy task.
86  template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
87  struct TaskCopySycl : public TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>
88  {
89  using DimMin1 = DimInt<TDim::value - 1u>;
90  using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::ExtentSize;
91  using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::DstSize;
92  using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::SrcSize;
93 
94  using TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;
95 
96  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
97  {
99 
100 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
101  this->printDebug();
102 # endif
103  // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
104  // iteration.
105  Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
106  Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
107  Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_srcPitchBytes));
108 
109  // Record an event for each memcpy call
110  std::vector<sycl::event> events;
111  events.reserve(static_cast<std::size_t>(extentWithoutInnermost.prod()));
112 
113  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
114  {
116  extentWithoutInnermost,
117  [&](Vec<DimMin1, ExtentSize> const& idx)
118  {
119  events.push_back(queue.memcpy(
120  this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
121  this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
122  static_cast<std::size_t>(this->m_extentWidthBytes),
123  requirements));
124  });
125  }
126 
127  // Return an event that depends on all the events assciated to the memcpy calls
128  return queue.ext_oneapi_submit_barrier(events);
129  }
130  };
131 
132  //! The SYCL device 1D memory copy task.
133  template<typename TViewDst, typename TViewSrc, typename TExtent>
134  struct TaskCopySycl<DimInt<1u>, TViewDst, TViewSrc, TExtent>
135  : TaskCopySyclBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>
136  {
137  using TaskCopySyclBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;
139 
140  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
141  {
143 
144 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
145  this->printDebug();
146 # endif
147  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
148  {
149  return queue.memcpy(
150  this->m_dstMemNative,
151  this->m_srcMemNative,
152  sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()),
153  requirements);
154  }
155  else
156  {
157  return queue.ext_oneapi_submit_barrier();
158  }
159  }
160  };
161 
162  //! The scalar SYCL memory copy trait.
163  template<typename TViewDst, typename TViewSrc, typename TExtent>
164  struct TaskCopySycl<DimInt<0u>, TViewDst, TViewSrc, TExtent>
165  {
166  static_assert(
167  std::is_same_v<std::remove_const_t<alpaka::Elem<TViewSrc>>, std::remove_const_t<alpaka::Elem<TViewDst>>>,
168  "The source and the destination view are required to have the same element type!");
169 
171 
172  template<typename TViewDstFwd>
173  TaskCopySycl(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, [[maybe_unused]] TExtent const& extent)
174  : m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
175  , m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
176  {
177  // all zero-sized extents are equivalent
178  ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
179  ALPAKA_ASSERT(getExtents(viewDst).prod() == 1u);
180  ALPAKA_ASSERT(getExtents(viewSrc).prod() == 1u);
181  }
182 
183  auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
184  {
185  return queue.memcpy(m_dstMemNative, m_srcMemNative, sizeof(Elem), requirements);
186  }
187 
188  void* m_dstMemNative;
189  void const* m_srcMemNative;
190  static constexpr auto is_sycl_task = true;
191  };
192 } // namespace alpaka::detail
193 
194 // Trait specializations for CreateTaskMemcpy.
195 namespace alpaka::trait
196 {
197  //! The SYCL host-to-device memory copy trait specialization.
198  template<concepts::Tag TTag, typename TDim>
199  struct CreateTaskMemcpy<TDim, DevGenericSycl<TTag>, DevCpu>
200  {
201  template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
202  static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
203  -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
204  {
206 
207  return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
208  }
209  };
210 
211  //! The SYCL device-to-host memory copy trait specialization.
212  template<concepts::Tag TTag, typename TDim>
213  struct CreateTaskMemcpy<TDim, DevCpu, DevGenericSycl<TTag>>
214  {
215  template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
216  static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
217  -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
218  {
220 
221  return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
222  }
223  };
224 
225  //! The SYCL device-to-device memory copy trait specialization.
226  template<concepts::Tag TTagDst, concepts::Tag TTagSrc, typename TDim>
227  struct CreateTaskMemcpy<TDim, DevGenericSycl<TTagDst>, DevGenericSycl<TTagSrc>>
228  {
229  template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
230  static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
231  -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
232  {
234 
235  return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
236  }
237  };
238 } // namespace alpaka::trait
239 
240 #endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition: Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition: Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition: Debug.hpp:62
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition: Debug.hpp:18
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
The accelerator traits.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition: Traits.hpp:114
ALPAKA_FN_HOST auto createTaskMemcpy(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Creates a memory copy task.
Definition: Traits.hpp:253
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
Definition: Traits.hpp:59
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView >>
Definition: Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition: Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition: Traits.hpp:21
std::integral_constant< std::size_t, N > DimInt