alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Copy.hpp
Go to the documentation of this file.
1/* Copyright 2024 Jan Stephan, Bernhard Manfred Gruber, Luca Ferragina, Aurora Perego, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
19
20#include <memory>
21#include <type_traits>
22
23#ifdef ALPAKA_ACC_SYCL_ENABLED
24
25# include <sycl/sycl.hpp>
26
27namespace alpaka::detail
28{
29 //! The SYCL device memory copy task base.
30 template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
31 struct TaskCopySyclBase
32 {
33 static_assert(
34 std::is_same_v<std::remove_const_t<alpaka::Elem<TViewSrc>>, std::remove_const_t<alpaka::Elem<TViewDst>>>,
35 "The source and the destination view are required to have the same element type!");
36 using ExtentSize = Idx<TExtent>;
37 using DstSize = Idx<TViewDst>;
38 using SrcSize = Idx<TViewSrc>;
40
41 template<typename TViewFwd>
42 TaskCopySyclBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
43 : m_extent(getExtents(extent))
44 , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
45# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
46 , m_dstExtent(getExtents(viewDst))
47 , m_srcExtent(getExtents(viewSrc))
48# endif
49 , m_dstPitchBytes(getPitchesInBytes(viewDst))
50 , m_srcPitchBytes(getPitchesInBytes(viewSrc))
51 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
52 , m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
53 {
54 if constexpr(TDim::value > 0)
55 {
56 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
57 ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
58 }
59 }
60
61# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
62 auto printDebug() const -> void
63 {
64 std::cout << __func__ << " e: " << m_extent << " ewb: " << this->m_extentWidthBytes
65 << " de: " << m_dstExtent << " dptr: " << reinterpret_cast<void*>(m_dstMemNative)
66 << " se: " << m_srcExtent << " sptr: " << reinterpret_cast<void const*>(m_srcMemNative)
67 << std::endl;
68 }
69# endif
70
71 Vec<TDim, ExtentSize> const m_extent;
72 ExtentSize const m_extentWidthBytes;
73# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
74 Vec<TDim, DstSize> const m_dstExtent;
75 Vec<TDim, SrcSize> const m_srcExtent;
76# endif
77
78 Vec<TDim, DstSize> const m_dstPitchBytes;
79 Vec<TDim, SrcSize> const m_srcPitchBytes;
80 std::uint8_t* const m_dstMemNative;
81 std::uint8_t const* const m_srcMemNative;
82 static constexpr auto is_sycl_task = true;
83 };
84
85 //! The SYCL device ND memory copy task.
86 template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
87 struct TaskCopySycl : public TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>
88 {
89 using DimMin1 = DimInt<TDim::value - 1u>;
90 using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::ExtentSize;
91 using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::DstSize;
92 using typename TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::SrcSize;
93
94 using TaskCopySyclBase<TDim, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;
95
96 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
97 {
99
100# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
101 this->printDebug();
102# endif
103 // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
104 // iteration.
105 Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
106 Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
107 Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_srcPitchBytes));
108
109 // Record an event for each memcpy call
110 std::vector<sycl::event> events;
111 events.reserve(static_cast<std::size_t>(extentWithoutInnermost.prod()));
112
113 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
114 {
115 meta::ndLoopIncIdx(
116 extentWithoutInnermost,
117 [&](Vec<DimMin1, ExtentSize> const& idx)
118 {
119 events.push_back(queue.memcpy(
120 this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
121 this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
122 static_cast<std::size_t>(this->m_extentWidthBytes),
123 requirements));
124 });
125 }
126
127 // Return an event that depends on all the events assciated to the memcpy calls
128 return queue.ext_oneapi_submit_barrier(events);
129 }
130 };
131
132 //! The SYCL device 1D memory copy task.
133 template<typename TViewDst, typename TViewSrc, typename TExtent>
134 struct TaskCopySycl<DimInt<1u>, TViewDst, TViewSrc, TExtent>
135 : TaskCopySyclBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>
136 {
137 using TaskCopySyclBase<DimInt<1u>, TViewDst, TViewSrc, TExtent>::TaskCopySyclBase;
139
140 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
141 {
143
144# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
145 this->printDebug();
146# endif
147 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
148 {
149 return queue.memcpy(
150 this->m_dstMemNative,
151 this->m_srcMemNative,
152 sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()),
153 requirements);
154 }
155 else
156 {
157 return queue.ext_oneapi_submit_barrier();
158 }
159 }
160 };
161
162 //! The scalar SYCL memory copy trait.
163 template<typename TViewDst, typename TViewSrc, typename TExtent>
164 struct TaskCopySycl<DimInt<0u>, TViewDst, TViewSrc, TExtent>
165 {
166 static_assert(
167 std::is_same_v<std::remove_const_t<alpaka::Elem<TViewSrc>>, std::remove_const_t<alpaka::Elem<TViewDst>>>,
168 "The source and the destination view are required to have the same element type!");
169
171
172 template<typename TViewDstFwd>
173 TaskCopySycl(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, [[maybe_unused]] TExtent const& extent)
174 : m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
175 , m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
176 {
177 // all zero-sized extents are equivalent
178 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
179 ALPAKA_ASSERT(getExtents(viewDst).prod() == 1u);
180 ALPAKA_ASSERT(getExtents(viewSrc).prod() == 1u);
181 }
182
183 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
184 {
185 return queue.memcpy(m_dstMemNative, m_srcMemNative, sizeof(Elem), requirements);
186 }
187
188 void* m_dstMemNative;
189 void const* m_srcMemNative;
190 static constexpr auto is_sycl_task = true;
191 };
192} // namespace alpaka::detail
193
194// Trait specializations for CreateTaskMemcpy.
195namespace alpaka::trait
196{
197 //! The SYCL host-to-device memory copy trait specialization.
198 template<concepts::Tag TTag, typename TDim>
199 struct CreateTaskMemcpy<TDim, DevGenericSycl<TTag>, DevCpu>
200 {
201 template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
202 static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
203 -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
204 {
206
207 return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
208 }
209 };
210
211 //! The SYCL device-to-host memory copy trait specialization.
212 template<concepts::Tag TTag, typename TDim>
213 struct CreateTaskMemcpy<TDim, DevCpu, DevGenericSycl<TTag>>
214 {
215 template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
216 static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
217 -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
218 {
220
221 return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
222 }
223 };
224
225 //! The SYCL device-to-device memory copy trait specialization.
226 template<concepts::Tag TTagDst, concepts::Tag TTagSrc, typename TDim>
227 struct CreateTaskMemcpy<TDim, DevGenericSycl<TTagDst>, DevGenericSycl<TTagSrc>>
228 {
229 template<typename TExtent, typename TViewSrc, typename TViewDstFwd>
230 static auto createTaskMemcpy(TViewDstFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
231 -> alpaka::detail::TaskCopySycl<TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
232 {
234
235 return {std::forward<TViewDstFwd>(viewDst), viewSrc, extent};
236 }
237 };
238} // namespace alpaka::trait
239
240#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition Debug.hpp:62
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
The accelerator traits.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:114
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:196
ALPAKA_FN_HOST auto createTaskMemcpy(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Creates a memory copy task.
Definition Traits.hpp:253
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt
STL namespace.