alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Copy.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, Erik Zenker, Matthias Werner, René Widera, Andrea Bocci, Jan Stephan, Bernhard
2 * Manfred Gruber
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
14
15#include <cstring>
16
17namespace alpaka
18{
19 class DevCpu;
20} // namespace alpaka
21
22namespace alpaka
23{
24 namespace detail
25 {
26 //! The CPU device memory copy task base.
27 //!
28 //! Copies from CPU memory into CPU memory.
29 template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
31 {
32 static_assert(TDim::value > 0);
33
38
39 template<typename TViewFwd>
40 TaskCopyCpuBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
41 : m_extent(getExtents(extent))
42 , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
43#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
44 , m_dstExtent(getExtents(viewDst))
45 , m_srcExtent(getExtents(viewSrc))
46#endif
49 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
50 , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
51 {
52 if constexpr(TDim::value > 0)
53 {
54 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
55 ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
56 if constexpr(TDim::value > 1)
57 {
58 ALPAKA_ASSERT(static_cast<DstSize>(m_extentWidthBytes) <= m_dstPitchBytes[TDim::value - 2]);
59 ALPAKA_ASSERT(static_cast<SrcSize>(m_extentWidthBytes) <= m_srcPitchBytes[TDim::value - 2]);
60 }
61 }
62 }
63
64#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
65 ALPAKA_FN_HOST auto printDebug() const -> void
66 {
67 std::cout << __func__ << " e: " << m_extent << " ewb: " << this->m_extentWidthBytes
68 << " de: " << m_dstExtent << " dptr: " << reinterpret_cast<void*>(m_dstMemNative)
69 << " dpitchb: " << m_dstPitchBytes << " se: " << m_srcExtent
70 << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
71 << " spitchb: " << m_srcPitchBytes << std::endl;
72 }
73#endif
74
77#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
80#endif
83
84 std::uint8_t* const m_dstMemNative;
85 std::uint8_t const* const m_srcMemNative;
86 };
87
88 //! The CPU device ND memory copy task.
89 template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
90 struct TaskCopyCpu : public TaskCopyCpuBase<TDim, TViewDst, TViewSrc, TExtent>
91 {
92 using DimMin1 = DimInt<TDim::value - 1u>;
96
97 using TaskCopyCpuBase<TDim, TViewDst, TViewSrc, TExtent>::TaskCopyCpuBase;
98
99 ALPAKA_FN_HOST auto operator()() const -> void
100 {
102
103#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
104 this->printDebug();
105#endif
106 // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
107 // iteration.
108 Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
109 Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost
110 = subVecBegin<DimMin1>(this->m_dstPitchBytes);
111 Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost
112 = subVecBegin<DimMin1>(this->m_srcPitchBytes);
113
114 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
115 {
117 extentWithoutInnermost,
118 [&](Vec<DimMin1, ExtentSize> const& idx)
119 {
120 std::memcpy(
121 this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
122 this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
123 static_cast<std::size_t>(this->m_extentWidthBytes));
124 });
125 }
126 }
127 };
128
129 //! The CPU device 1D memory copy task.
130 template<typename TViewDst, typename TViewSrc, typename TExtent>
131 struct TaskCopyCpu<DimInt<1u>, TViewDst, TViewSrc, TExtent>
132 : TaskCopyCpuBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>
133 {
134 using TaskCopyCpuBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>::TaskCopyCpuBase;
135
136 ALPAKA_FN_HOST auto operator()() const -> void
137 {
139
140#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
141 this->printDebug();
142#endif
143 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
144 {
145 std::memcpy(
146 reinterpret_cast<void*>(this->m_dstMemNative),
147 reinterpret_cast<void const*>(this->m_srcMemNative),
148 static_cast<std::size_t>(this->m_extentWidthBytes));
149 }
150 }
151 };
152
153 //! The CPU device scalar memory copy task.
154 //!
155 //! Copies from CPU memory into CPU memory.
156 template<typename TViewDst, typename TViewSrc, typename TExtent>
157 struct TaskCopyCpu<DimInt<0u>, TViewDst, TViewSrc, TExtent>
158 {
160
161 template<typename TViewDstFwd>
162 TaskCopyCpu(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, [[maybe_unused]] TExtent const& extent)
163 : m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
164 , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
165 {
166 // all zero-sized extents are equivalent
167 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
168 ALPAKA_ASSERT(getExtents(viewDst).prod() == 1u);
169 ALPAKA_ASSERT(getExtents(viewSrc).prod() == 1u);
170 }
171
172#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
173 ALPAKA_FN_HOST auto printDebug() const -> void
174 {
175 using Scalar = Vec<DimInt<0u>, Idx<TExtent>>;
176 std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
177 << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
178 << " se: " << Scalar() << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
179 << " spitchb: " << Scalar() << std::endl;
180 }
181#endif
182
184 {
186
187#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
188 printDebug();
189#endif
190 std::memcpy(
191 reinterpret_cast<void*>(m_dstMemNative),
192 reinterpret_cast<void const*>(m_srcMemNative),
193 sizeof(Elem));
194 }
195
196 std::uint8_t* const m_dstMemNative;
197 std::uint8_t const* const m_srcMemNative;
198 };
199 } // namespace detail
200
201 namespace trait
202 {
203 //! The CPU device memory copy trait specialization.
204 //!
205 //! Copies from CPU memory into CPU memory.
206 template<typename TDim>
208 {
209 template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
211 TViewDstFwd&& viewDst,
212 TViewSrc const& viewSrc,
213 TExtent const& extent)
215 {
216 return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
217 }
218 };
219 } // namespace trait
220} // namespace alpaka
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
The CPU device handle.
Definition DevCpu.hpp:56
A n-dimensional vector.
Definition Vec.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:81
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt
STL namespace.
The CPU device memory copy task base.
Definition Copy.hpp:31
std::uint8_t const *const m_srcMemNative
Definition Copy.hpp:85
Vec< TDim, DstSize > const m_dstPitchBytes
Definition Copy.hpp:81
Vec< TDim, SrcSize > const m_srcPitchBytes
Definition Copy.hpp:82
Vec< TDim, ExtentSize > const m_extent
Definition Copy.hpp:75
Idx< TExtent > ExtentSize
Definition Copy.hpp:34
TaskCopyCpuBase(TViewFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Definition Copy.hpp:40
ALPAKA_FN_HOST auto printDebug() const -> void
Definition Copy.hpp:65
Vec< TDim, SrcSize > const m_srcExtent
Definition Copy.hpp:79
Idx< TViewDst > DstSize
Definition Copy.hpp:35
alpaka::Elem< TViewSrc > Elem
Definition Copy.hpp:37
std::uint8_t *const m_dstMemNative
Definition Copy.hpp:84
Vec< TDim, DstSize > const m_dstExtent
Definition Copy.hpp:78
ExtentSize const m_extentWidthBytes
Definition Copy.hpp:76
Idx< TViewSrc > SrcSize
Definition Copy.hpp:36
TaskCopyCpu(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Definition Copy.hpp:162
ALPAKA_FN_HOST auto operator()() const noexcept(ALPAKA_DEBUG< ALPAKA_DEBUG_FULL) -> void
Definition Copy.hpp:183
The CPU device ND memory copy task.
Definition Copy.hpp:91
DimInt< TDim::value - 1u > DimMin1
Definition Copy.hpp:92
ALPAKA_FN_HOST auto operator()() const -> void
Definition Copy.hpp:99
static ALPAKA_FN_HOST auto createTaskMemcpy(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent) -> alpaka::detail::TaskCopyCpu< TDim, std::remove_reference_t< TViewDstFwd >, TViewSrc, TExtent >
Definition Copy.hpp:210
The memory copy task trait.
Definition Traits.hpp:120