alpaka
Abstraction Library for Parallel Kernel Acceleration
Copy.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Erik Zenker, Matthias Werner, RenĂ© Widera, Andrea Bocci, Jan Stephan, Bernhard
2  * Manfred Gruber
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
8 #include "alpaka/core/Assert.hpp"
10 #include "alpaka/extent/Traits.hpp"
12 #include "alpaka/meta/Integral.hpp"
13 #include "alpaka/meta/NdLoop.hpp"
14 
15 #include <cstring>
16 
17 namespace alpaka
18 {
19  class DevCpu;
20 } // namespace alpaka
21 
22 namespace alpaka
23 {
24  namespace detail
25  {
26  //! The CPU device memory copy task base.
27  //!
28  //! Copies from CPU memory into CPU memory.
29  template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
31  {
32  static_assert(TDim::value > 0);
33 
38 
39  template<typename TViewFwd>
40  TaskCopyCpuBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
41  : m_extent(getExtents(extent))
42  , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
43 #if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
44  , m_dstExtent(getExtents(viewDst))
45  , m_srcExtent(getExtents(viewSrc))
46 #endif
49  , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
50  , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
51  {
52  if constexpr(TDim::value > 0)
53  {
54  ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
55  ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
56  if constexpr(TDim::value > 1)
57  {
58  ALPAKA_ASSERT(static_cast<DstSize>(m_extentWidthBytes) <= m_dstPitchBytes[TDim::value - 2]);
59  ALPAKA_ASSERT(static_cast<SrcSize>(m_extentWidthBytes) <= m_srcPitchBytes[TDim::value - 2]);
60  }
61  }
62  }
63 
64 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
65  ALPAKA_FN_HOST auto printDebug() const -> void
66  {
67  std::cout << __func__ << " e: " << m_extent << " ewb: " << this->m_extentWidthBytes
68  << " de: " << m_dstExtent << " dptr: " << reinterpret_cast<void*>(m_dstMemNative)
69  << " dpitchb: " << m_dstPitchBytes << " se: " << m_srcExtent
70  << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
71  << " spitchb: " << m_srcPitchBytes << std::endl;
72  }
73 #endif
74 
77 #if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
80 #endif
83 
84  std::uint8_t* const m_dstMemNative;
85  std::uint8_t const* const m_srcMemNative;
86  };
87 
88  //! The CPU device ND memory copy task.
89  template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
90  struct TaskCopyCpu : public TaskCopyCpuBase<TDim, TViewDst, TViewSrc, TExtent>
91  {
92  using DimMin1 = DimInt<TDim::value - 1u>;
96 
98 
99  ALPAKA_FN_HOST auto operator()() const -> void
100  {
102 
103 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
104  this->printDebug();
105 #endif
106  // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
107  // iteration.
108  Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
109  Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost
110  = subVecBegin<DimMin1>(this->m_dstPitchBytes);
111  Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost
112  = subVecBegin<DimMin1>(this->m_srcPitchBytes);
113 
114  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
115  {
117  extentWithoutInnermost,
118  [&](Vec<DimMin1, ExtentSize> const& idx)
119  {
120  std::memcpy(
121  this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
122  this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
123  static_cast<std::size_t>(this->m_extentWidthBytes));
124  });
125  }
126  }
127  };
128 
129  //! The CPU device 1D memory copy task.
130  template<typename TViewDst, typename TViewSrc, typename TExtent>
131  struct TaskCopyCpu<DimInt<1u>, TViewDst, TViewSrc, TExtent>
132  : TaskCopyCpuBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>
133  {
134  using TaskCopyCpuBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>::TaskCopyCpuBase;
135 
136  ALPAKA_FN_HOST auto operator()() const -> void
137  {
139 
140 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
141  this->printDebug();
142 #endif
143  if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
144  {
145  std::memcpy(
146  reinterpret_cast<void*>(this->m_dstMemNative),
147  reinterpret_cast<void const*>(this->m_srcMemNative),
148  static_cast<std::size_t>(this->m_extentWidthBytes));
149  }
150  }
151  };
152 
153  //! The CPU device scalar memory copy task.
154  //!
155  //! Copies from CPU memory into CPU memory.
156  template<typename TViewDst, typename TViewSrc, typename TExtent>
157  struct TaskCopyCpu<DimInt<0u>, TViewDst, TViewSrc, TExtent>
158  {
160 
161  template<typename TViewDstFwd>
162  TaskCopyCpu(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, [[maybe_unused]] TExtent const& extent)
163  : m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
164  , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
165  {
166  // all zero-sized extents are equivalent
167  ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
168  ALPAKA_ASSERT(getExtents(viewDst).prod() == 1u);
169  ALPAKA_ASSERT(getExtents(viewSrc).prod() == 1u);
170  }
171 
172 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
173  ALPAKA_FN_HOST auto printDebug() const -> void
174  {
175  using Scalar = Vec<DimInt<0u>, Idx<TExtent>>;
176  std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
177  << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
178  << " se: " << Scalar() << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
179  << " spitchb: " << Scalar() << std::endl;
180  }
181 #endif
182 
183  ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG < ALPAKA_DEBUG_FULL) -> void
184  {
186 
187 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
188  printDebug();
189 #endif
190  std::memcpy(
191  reinterpret_cast<void*>(m_dstMemNative),
192  reinterpret_cast<void const*>(m_srcMemNative),
193  sizeof(Elem));
194  }
195 
196  std::uint8_t* const m_dstMemNative;
197  std::uint8_t const* const m_srcMemNative;
198  };
199  } // namespace detail
200 
201  namespace trait
202  {
203  //! The CPU device memory copy trait specialization.
204  //!
205  //! Copies from CPU memory into CPU memory.
206  template<typename TDim>
208  {
209  template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
211  TViewDstFwd&& viewDst,
212  TViewSrc const& viewSrc,
213  TExtent const& extent)
215  {
216  return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
217  }
218  };
219  } // namespace trait
220 } // namespace alpaka
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition: Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition: Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition: Debug.hpp:18
The CPU device handle.
Definition: DevCpu.hpp:56
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition: Traits.hpp:114
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
Definition: Traits.hpp:59
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView >>
Definition: Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition: Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition: Traits.hpp:21
std::integral_constant< std::size_t, N > DimInt
The CPU device memory copy task base.
Definition: Copy.hpp:31
std::uint8_t const *const m_srcMemNative
Definition: Copy.hpp:85
Vec< TDim, DstSize > const m_dstPitchBytes
Definition: Copy.hpp:81
Vec< TDim, SrcSize > const m_srcPitchBytes
Definition: Copy.hpp:82
Vec< TDim, ExtentSize > const m_extent
Definition: Copy.hpp:75
Idx< TExtent > ExtentSize
Definition: Copy.hpp:34
TaskCopyCpuBase(TViewFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Definition: Copy.hpp:40
ALPAKA_FN_HOST auto printDebug() const -> void
Definition: Copy.hpp:65
Vec< TDim, SrcSize > const m_srcExtent
Definition: Copy.hpp:79
Idx< TViewDst > DstSize
Definition: Copy.hpp:35
alpaka::Elem< TViewSrc > Elem
Definition: Copy.hpp:37
std::uint8_t *const m_dstMemNative
Definition: Copy.hpp:84
Vec< TDim, DstSize > const m_dstExtent
Definition: Copy.hpp:78
ExtentSize const m_extentWidthBytes
Definition: Copy.hpp:76
Idx< TViewSrc > SrcSize
Definition: Copy.hpp:36
TaskCopyCpu(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, [[maybe_unused]] TExtent const &extent)
Definition: Copy.hpp:162
ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG< ALPAKA_DEBUG_FULL) -> void
Definition: Copy.hpp:183
The CPU device ND memory copy task.
Definition: Copy.hpp:91
DimInt< TDim::value - 1u > DimMin1
Definition: Copy.hpp:92
ALPAKA_FN_HOST auto operator()() const -> void
Definition: Copy.hpp:99
static ALPAKA_FN_HOST auto createTaskMemcpy(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent) -> alpaka::detail::TaskCopyCpu< TDim, std::remove_reference_t< TViewDstFwd >, TViewSrc, TExtent >
Definition: Copy.hpp:210
The memory copy task trait.
Definition: Traits.hpp:120