alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
QueueGenericSyclBase.hpp
Go to the documentation of this file.
1/* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Andrea Bocci, Aurora Perego
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/acc/Tag.hpp"
13
14#include <algorithm>
15#include <exception>
16#include <memory>
17#include <mutex>
18#include <shared_mutex>
19#include <type_traits>
20#include <utility>
21#include <vector>
22
23#ifdef ALPAKA_ACC_SYCL_ENABLED
24
25# include <sycl/sycl.hpp>
26
27namespace alpaka
28{
29 template<concepts::Tag TTag>
30 class DevGenericSycl;
31
32 template<concepts::Tag TTag>
33 class EventGenericSycl;
34
35 namespace detail
36 {
37 template<typename T, typename = void>
38 inline constexpr auto is_sycl_task = false;
39
40 template<typename T>
41 inline constexpr auto is_sycl_task<T, std::void_t<decltype(T::is_sycl_task)>> = true;
42
43 template<typename T, typename = void>
44 inline constexpr auto is_sycl_kernel = false;
45
46 template<typename T>
47 inline constexpr auto is_sycl_kernel<T, std::void_t<decltype(T::is_sycl_kernel)>> = true;
48
49 class QueueGenericSyclImpl
50 {
51 public:
52 QueueGenericSyclImpl(sycl::context context, sycl::device device)
53 : m_queue{
54 std::move(context), // This is important. In SYCL a device can belong to multiple contexts.
55 std::move(device),
56 {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}}
57 {
58 }
59
60 // This class will only exist as a pointer. We don't care about copy and move semantics.
61 QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete;
62 auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete;
63
64 QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete;
65 auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete;
66
67 ~QueueGenericSyclImpl()
68 {
69 try
70 {
71 m_queue.wait_and_throw();
72 }
73 catch(sycl::exception const& err)
74 {
75 std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " ("
76 << err.code() << ')' << std::endl;
77 }
78 catch(std::exception const& err)
79 {
80 std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what()
81 << std::endl;
82 }
83 }
84
85 // Don't call this without locking first!
86 auto clean_dependencies() -> void
87 {
88 // Clean up completed events
89 auto const start = std::begin(m_dependencies);
90 auto const old_end = std::end(m_dependencies);
91 auto const new_end = std::remove_if(
92 start,
93 old_end,
94 [](sycl::event ev) {
95 return ev.get_info<sycl::info::event::command_execution_status>()
96 == sycl::info::event_command_status::complete;
97 });
98
99 m_dependencies.erase(new_end, old_end);
100 }
101
102 auto register_dependency(sycl::event event) -> void
103 {
104 std::lock_guard<std::shared_mutex> lock{m_mutex};
105
106 clean_dependencies();
107 m_dependencies.push_back(event);
108 }
109
110 auto empty() const -> bool
111 {
112 std::shared_lock<std::shared_mutex> lock{m_mutex};
113 return m_last_event.get_info<sycl::info::event::command_execution_status>()
114 == sycl::info::event_command_status::complete;
115 }
116
117 auto wait() -> void
118 {
119 // SYCL queues are thread-safe.
120 m_queue.wait_and_throw();
121 }
122
123 auto get_last_event() const -> sycl::event
124 {
125 std::shared_lock<std::shared_mutex> lock{m_mutex};
126 return m_last_event;
127 }
128
129 template<bool TBlocking, typename TTask>
130 auto enqueue(TTask const& task) -> void
131 {
132 {
133 std::lock_guard<std::shared_mutex> lock{m_mutex};
134
135 clean_dependencies();
136
137 // Execute task
138 if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
139 {
140 m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
141 }
142 else
143 {
144 m_last_event = m_queue.submit(
145 [this, &task](sycl::handler& cgh)
146 {
147 if(!m_dependencies.empty())
148 cgh.depends_on(m_dependencies);
149
150 if constexpr(is_sycl_kernel<TTask>) // Kernel
151 task(cgh); // Will call cgh.parallel_for internally
152 else // Host
153 cgh.host_task(task);
154 });
155 }
156
157 m_dependencies.clear();
158 }
159
160 if constexpr(TBlocking)
161 wait();
162 }
163
164 [[nodiscard]] auto getNativeHandle() const noexcept
165 {
166 return m_queue;
167 }
168
169 std::vector<sycl::event> m_dependencies;
170 sycl::event m_last_event;
171 std::shared_mutex mutable m_mutex;
172
173 private:
174 sycl::queue m_queue;
175 };
176
177 template<concepts::Tag TTag, bool TBlocking>
178 class QueueGenericSyclBase
179 : public interface::Implements<ConceptCurrentThreadWaitFor, QueueGenericSyclBase<TTag, TBlocking>>
180 , public interface::Implements<ConceptQueue, QueueGenericSyclBase<TTag, TBlocking>>
181 , public interface::Implements<ConceptGetDev, QueueGenericSyclBase<TTag, TBlocking>>
182 {
183 public:
184 QueueGenericSyclBase(DevGenericSycl<TTag> const& dev)
185 : m_dev{dev}
186 , m_spQueueImpl{std::make_shared<detail::QueueGenericSyclImpl>(
187 dev.getNativeHandle().second,
188 dev.getNativeHandle().first)}
189 {
190 m_dev.m_impl->register_queue(m_spQueueImpl);
191 }
192
193 friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
194 {
195 return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl);
196 }
197
198 friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
199 {
200 return !(lhs == rhs);
201 }
202
203 [[nodiscard]] auto getNativeHandle() const noexcept
204 {
205 return m_spQueueImpl->getNativeHandle();
206 }
207
208 DevGenericSycl<TTag> m_dev;
209 std::shared_ptr<detail::QueueGenericSyclImpl> m_spQueueImpl;
210 };
211 } // namespace detail
212
213 namespace trait
214 {
215 //! The SYCL blocking queue device type trait specialization.
216 template<concepts::Tag TTag, bool TBlocking>
217 struct DevType<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
218 {
219 using type = DevGenericSycl<TTag>;
220 };
221
222 //! The SYCL blocking queue device get trait specialization.
223 template<concepts::Tag TTag, bool TBlocking>
224 struct GetDev<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
225 {
226 static auto getDev(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
227 {
229 return queue.m_dev;
230 }
231 };
232
233 //! The SYCL blocking queue event type trait specialization.
234 template<concepts::Tag TTag, bool TBlocking>
235 struct EventType<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
236 {
237 using type = EventGenericSycl<TTag>;
238 };
239
240 //! The SYCL blocking queue enqueue trait specialization.
241 template<concepts::Tag TTag, bool TBlocking, typename TTask>
242 struct Enqueue<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>, TTask>
243 {
244 static auto enqueue(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>& queue, TTask const& task)
245 -> void
246 {
248 queue.m_spQueueImpl->template enqueue<TBlocking>(task);
249 }
250 };
251
252 //! The SYCL blocking queue test trait specialization.
253 template<concepts::Tag TTag, bool TBlocking>
254 struct Empty<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
255 {
256 static auto empty(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue) -> bool
257 {
259 return queue.m_spQueueImpl->empty();
260 }
261 };
262
263 //! The SYCL blocking queue thread wait trait specialization.
264 //!
265 //! Blocks execution of the calling thread until the queue has finished processing all previously requested
266 //! tasks (kernels, data copies, ...)
267 template<concepts::Tag TTag, bool TBlocking>
268 struct CurrentThreadWaitFor<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
269 {
270 static auto currentThreadWaitFor(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
271 -> void
272 {
274 queue.m_spQueueImpl->wait();
275 }
276 };
277
278 //! The SYCL queue native handle trait specialization.
279 template<concepts::Tag TTag, bool TBlocking>
280 struct NativeHandle<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
281 {
282 [[nodiscard]] static auto getNativeHandle(
283 alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
284 {
285 return queue.getNativeHandle();
286 }
287 };
288 } // namespace trait
289} // namespace alpaka
290#endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition Complex.hpp:294
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition Complex.hpp:320
The alpaka accelerator library.
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition Traits.hpp:29
ALPAKA_FN_HOST auto empty(TQueue const &queue) -> bool
Tests if the queue is empty (all ops in the given queue have been completed).
Definition Traits.hpp:58
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition Traits.hpp:34
STL namespace.