alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Fill.hpp
Go to the documentation of this file.
1/* Copyright 2025 Maria Michailidi, Anna Polova, Abdulrahman Al Marzouqi
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/dev/Traits.hpp"
19
20#include <cstddef>
21#include <cstdint>
22#include <iostream>
23#include <memory>
24#include <type_traits>
25
26
27#ifdef ALPAKA_ACC_SYCL_ENABLED
28
29namespace alpaka
30{
31
32 namespace detail
33 {
34
35 template<typename TDim, typename TView, typename TExtent, typename TValue>
36 struct TaskFillSyclBase
37 {
38 using ExtentSize = Idx<TExtent>;
39 using DstSize = Idx<TView>;
41
42 template<typename TViewFwd>
43 TaskFillSyclBase(TViewFwd&& view, TValue const& value, TExtent const& extent)
44 : m_value(value)
45 , m_extent(getExtents(extent))
46 , m_extentWidth(m_extent.back())
47# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
48 , m_dstExtent(getExtents(view))
49# endif
50 , m_dstPitchBytes(getPitchesInBytes(view))
51 , m_dstMemNative(getPtrNative(view))
52 {
53 ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
54 if constexpr(TDim::value > 1)
56 m_extentWidth * static_cast<ExtentSize>(sizeof(Elem)) <= m_dstPitchBytes[TDim::value - 2]);
57 }
58
59# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
60 auto printDebug() const -> void
61 {
62 std::cout << __func__ << " e: " << m_extent << " ew: " << m_extentWidth << " de: " << m_dstExtent
63 << " dptr: " << reinterpret_cast<void*>(m_dstMemNative) << " dpitchb: " << m_dstPitchBytes
64 << std::endl;
65 }
66# endif
67
68 TValue const m_value;
69 Vec<TDim, ExtentSize> const m_extent;
70 ExtentSize const m_extentWidth;
71# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
72 Vec<TDim, DstSize> const m_dstExtent;
73# endif
74 Vec<TDim, DstSize> const m_dstPitchBytes;
75 Elem* const m_dstMemNative;
76
77 static constexpr auto is_sycl_task = true;
78 };
79
80 template<typename TDim, typename TView, typename TExtent, typename TValue>
81 struct TaskFillSycl : public TaskFillSyclBase<TDim, TView, TExtent, TValue>
82 {
83 using Base = TaskFillSyclBase<TDim, TView, TExtent, TValue>;
84 using Base::Base;
85 using typename Base::DstSize;
86 using typename Base::ExtentSize;
87 using DimMin1 = DimInt<TDim::value - 1u>;
88
89 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
90 {
92# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
93 this->printDebug();
94# endif
95 Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
96 Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
97
98 std::vector<sycl::event> events;
99 events.reserve(static_cast<std::size_t>(extentWithoutInnermost.prod()));
100
101 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
102 {
103 using Elem = std::remove_cvref_t<decltype(this->m_value)>;
104
105 meta::ndLoopIncIdx(
106 extentWithoutInnermost,
107 [&](Vec<DimMin1, ExtentSize> const& idx)
108 {
109 auto offsetBytes = (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum();
110 Elem* ptr = reinterpret_cast<Elem*>(
111 reinterpret_cast<std::uint8_t*>(this->m_dstMemNative) + offsetBytes);
112
113 assert(this->m_extentWidth >= 0);
114
115 events.push_back(queue.fill<TValue>(
116 ptr,
117 this->m_value,
118 static_cast<std::size_t>(this->m_extentWidth),
119 requirements));
120 });
121 }
122
123
124 return queue.ext_oneapi_submit_barrier(events);
125 }
126 };
127
128 template<typename TView, typename TExtent, typename TValue>
129 struct TaskFillSycl<DimInt<1u>, TView, TExtent, TValue>
130 : public TaskFillSyclBase<DimInt<1u>, TView, TExtent, TValue>
131 {
132 using Base = TaskFillSyclBase<DimInt<1u>, TView, TExtent, TValue>;
133 using Base::Base;
134
135 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
136 {
138# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
139 this->printDebug();
140# endif
141 if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
142 {
143 return queue.fill(
144 this->m_dstMemNative,
145 this->m_value,
146 static_cast<std::size_t>(this->m_extentWidth),
147 requirements);
148 }
149 else
150 {
151 return queue.ext_oneapi_submit_barrier();
152 }
153 }
154 };
155
156 template<typename TView, typename TExtent, typename TValue>
157 struct TaskFillSycl<DimInt<0u>, TView, TExtent, TValue>
158 {
160
161 template<typename TViewFwd>
162 TaskFillSycl(TViewFwd&& view, TValue const& value, [[maybe_unused]] TExtent const& extent)
163 : m_value(value)
164 , m_dstMemNative(getPtrNative(view))
165 {
166 ALPAKA_ASSERT(getExtents(extent).prod() == 1u);
167 ALPAKA_ASSERT(getExtents(view).prod() == 1u);
168 }
169
170 auto operator()(sycl::queue& queue, std::vector<sycl::event> const& requirements) const -> sycl::event
171 {
173 return queue.fill(m_dstMemNative, m_value, 1, requirements);
174 }
175
176 TValue const m_value;
177 Elem* const m_dstMemNative;
178 static constexpr auto is_sycl_task = true;
179 };
180
181 } // namespace detail
182
183 namespace trait
184 {
185 template<typename TDim, typename TPlatform>
186 struct CreateTaskFill<TDim, DevGenericSycl<TPlatform>>
187 {
188 template<typename TExtent, typename TView, typename TValue>
189 static auto createTaskFill(TView& view, TValue const& value, TExtent const& extent)
190 -> alpaka::detail::TaskFillSycl<TDim, TView, TExtent, TValue>
191 {
192 return alpaka::detail::TaskFillSycl<TDim, TView, TExtent, TValue>(view, value, extent);
193 }
194 };
195 } // namespace trait
196
197} // namespace alpaka
198#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
Definition Debug.hpp:22
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_DEBUG_FULL
The full debug level.
Definition Debug.hpp:18
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:114
The alpaka accelerator library.
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:199
ALPAKA_FN_HOST auto createTaskFill(TViewFwd &&view, TValue const &value, TExtent const &extent)
Definition Traits.hpp:228
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:139
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
std::integral_constant< std::size_t, N > DimInt