alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Set.hpp
Go to the documentation of this file.
1/* Copyright 2023 Jan Stephan, Luca Ferragina, Aurora Perego, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/dev/Traits.hpp"
19
20#include <cstddef>
21#include <cstdint>
22#include <memory>
23
24#ifdef ALPAKA_ACC_SYCL_ENABLED
25
26namespace alpaka
27{
28
29 namespace detail
30 {
31 //! The SYCL ND memory set task base.
32 template<typename TDim, typename TView, typename TExtent>
33 struct TaskSetSyclBase
34 {
35 using ExtentSize = Idx<TExtent>;
36 using DstSize = Idx<TView>;
38
39 template<typename TViewFwd>
40 TaskSetSyclBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
41 : m_byte(byte)
42 , m_extent(getExtents(extent))
43 , m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
44# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
45 , m_dstExtent(getExtents(view))
46# endif
47
48 , m_dstPitchBytes(getPitchesInBytes(view))
49 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
50
51 {
52 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
53 if constexpr(TDim::value > 1)
54 ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 2]);
55 }
56
57# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
58 auto printDebug() const -> void
59 {
60 std::cout << __func__ << " e: " << this->m_extent << " ewb: " << this->m_extentWidthBytes
61 << " de: " << this->m_dstExtent << " dptr: " << reinterpret_cast<void*>(this->m_dstMemNative)
62 << " dpitchb: " << this->m_dstPitchBytes << std::endl;
63 }
64# endif
65
66 std::uint8_t const m_byte;
67 Vec<TDim, ExtentSize> const m_extent;
68 ExtentSize const m_extentWidthBytes;
69# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
70 Vec<TDim, DstSize> const m_dstExtent;
71# endif
72 Vec<TDim, DstSize> const m_dstPitchBytes;
73 std::uint8_t* const m_dstMemNative;
74 static constexpr auto is_sycl_task = true;
75 };
76
77 //! The SYCL device ND memory set task.
78 template<typename TDim, typename TView, typename TExtent>
79 struct TaskSetSycl : public TaskSetSyclBase<TDim, TView, TExtent>
80 {
81 using DimMin1 = DimInt<TDim::value - 1u>;
82 using typename TaskSetSyclBase<TDim, TView, TExtent>::ExtentSize;
83 using typename TaskSetSyclBase<TDim, TView, TExtent>::DstSize;
84
85 using TaskSetSyclBase<TDim, TView, TExtent>::TaskSetSyclBase;
86
87 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
88 {
90
91# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
92 this->printDebug();
93# endif
94 // [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
95 // iteration.
96 Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
97 Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
98
99 // Record an event for each memcpy call
100 std::vector<sycl::event> events;
101 events.reserve(static_cast<std::size_t>(extentWithoutInnermost.prod()));
102
103 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
104 {
105 meta::ndLoopIncIdx(
106 extentWithoutInnermost,
107 [&](Vec<DimMin1, ExtentSize> const& idx)
108 {
109 events.push_back(queue.memset(
110 this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
111 this->m_byte,
112 static_cast<std::size_t>(this->m_extentWidthBytes),
113 requirements));
114 });
115 }
116
117 // Return an event that depends on all the events assciated to the memcpy calls
118 return queue.ext_oneapi_submit_barrier(events);
119 }
120 };
121
122 //! The 1D SYCL memory set task.
123 template<typename TView, typename TExtent>
124 struct TaskSetSycl<DimInt<1u>, TView, TExtent> : public TaskSetSyclBase<DimInt<1u>, TView, TExtent>
125 {
126 using TaskSetSyclBase<DimInt<1u>, TView, TExtent>::TaskSetSyclBase;
127
128 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
129 {
131
132# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
133 this->printDebug();
134# endif
135 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
136 {
137 return queue.memset(
138 reinterpret_cast<void*>(this->m_dstMemNative),
139 this->m_byte,
140 static_cast<std::size_t>(this->m_extentWidthBytes),
141 requirements);
142 }
143 else
144 {
145 return queue.ext_oneapi_submit_barrier();
146 }
147 }
148 };
149
150 //! The SYCL device scalar memory set task.
151 template<typename TView, typename TExtent>
152 struct TaskSetSycl<DimInt<0u>, TView, TExtent>
153 {
154 using ExtentSize = Idx<TExtent>;
155 using Scalar = Vec<DimInt<0u>, ExtentSize>;
156 using DstSize = Idx<TView>;
158
159 template<typename TViewFwd>
160 TaskSetSycl(TViewFwd&& view, std::uint8_t const& byte, [[maybe_unused]] TExtent const& extent)
161 : m_byte(byte)
162 , m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
163 {
164 // all zero-sized extents are equivalent
165 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
166 ALPAKA_ASSERT(getExtents(view).prod() == 1u);
167 }
168
169# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
170 auto printDebug() const -> void
171 {
172 std::cout << __func__ << " e: " << Scalar() << " ewb: " << sizeof(Elem) << " de: " << Scalar()
173 << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << Scalar()
174 << std::endl;
175 }
176# endif
177
178 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
179 {
181
182# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
183 printDebug();
184# endif
185 return queue.memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem), requirements);
186 }
187
188 std::uint8_t const m_byte;
189 std::uint8_t* const m_dstMemNative;
190 static constexpr auto is_sycl_task = true;
191 };
192
193 } // namespace detail
194
195 namespace trait
196 {
197 //! The SYCL device memory set trait specialization.
198 template<typename TDim, typename TPlatform>
199 struct CreateTaskMemset<TDim, DevGenericSycl<TPlatform>>
200 {
201 template<typename TExtent, typename TView>
202 static auto createTaskMemset(TView& view, std::uint8_t const& byte, TExtent const& extent)
203 -> alpaka::detail::TaskSetSycl<TDim, TView, TExtent>
204 {
205 return alpaka::detail::TaskSetSycl<TDim, TView, TExtent>(view, byte, extent);
206 }
207 };
208
209 } // namespace trait
210
211} // namespace alpaka
212#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:114
The alpaka accelerator library.
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:196
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt
ALPAKA_FN_HOST auto createTaskMemset(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Create a memory set task.
Definition Traits.hpp:207
STL namespace.