alpaka
Abstraction Library for Parallel Kernel Acceleration
QueueGenericSyclBase.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Andrea Bocci, Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/acc/Tag.hpp"
8 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/queue/Traits.hpp"
11 #include "alpaka/traits/Traits.hpp"
12 #include "alpaka/wait/Traits.hpp"
13 
14 #include <algorithm>
15 #include <exception>
16 #include <memory>
17 #include <mutex>
18 #include <shared_mutex>
19 #include <type_traits>
20 #include <utility>
21 #include <vector>
22 
23 #ifdef ALPAKA_ACC_SYCL_ENABLED
24 
25 # include <sycl/sycl.hpp>
26 
27 namespace alpaka
28 {
29  template<concepts::Tag TTag>
30  class DevGenericSycl;
31 
32  template<concepts::Tag TTag>
33  class EventGenericSycl;
34 
35  namespace detail
36  {
37  template<typename T, typename = void>
38  inline constexpr auto is_sycl_task = false;
39 
40  template<typename T>
41  inline constexpr auto is_sycl_task<T, std::void_t<decltype(T::is_sycl_task)>> = true;
42 
43  template<typename T, typename = void>
44  inline constexpr auto is_sycl_kernel = false;
45 
46  template<typename T>
47  inline constexpr auto is_sycl_kernel<T, std::void_t<decltype(T::is_sycl_kernel)>> = true;
48 
49  class QueueGenericSyclImpl
50  {
51  public:
52  QueueGenericSyclImpl(sycl::context context, sycl::device device)
53  : m_queue{
54  std::move(context), // This is important. In SYCL a device can belong to multiple contexts.
55  std::move(device),
56  {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}}
57  {
58  }
59 
60  // This class will only exist as a pointer. We don't care about copy and move semantics.
61  QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete;
62  auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete;
63 
64  QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete;
65  auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete;
66 
67  ~QueueGenericSyclImpl()
68  {
69  try
70  {
71  m_queue.wait_and_throw();
72  }
73  catch(sycl::exception const& err)
74  {
75  std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " ("
76  << err.code() << ')' << std::endl;
77  }
78  catch(std::exception const& err)
79  {
80  std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what()
81  << std::endl;
82  }
83  }
84 
85  // Don't call this without locking first!
86  auto clean_dependencies() -> void
87  {
88  // Clean up completed events
89  auto const start = std::begin(m_dependencies);
90  auto const old_end = std::end(m_dependencies);
91  auto const new_end = std::remove_if(
92  start,
93  old_end,
94  [](sycl::event ev) {
95  return ev.get_info<sycl::info::event::command_execution_status>()
96  == sycl::info::event_command_status::complete;
97  });
98 
99  m_dependencies.erase(new_end, old_end);
100  }
101 
102  auto register_dependency(sycl::event event) -> void
103  {
104  std::lock_guard<std::shared_mutex> lock{m_mutex};
105 
106  clean_dependencies();
107  m_dependencies.push_back(event);
108  }
109 
110  auto empty() const -> bool
111  {
112  std::shared_lock<std::shared_mutex> lock{m_mutex};
113  return m_last_event.get_info<sycl::info::event::command_execution_status>()
114  == sycl::info::event_command_status::complete;
115  }
116 
117  auto wait() -> void
118  {
119  // SYCL queues are thread-safe.
120  m_queue.wait_and_throw();
121  }
122 
123  auto get_last_event() const -> sycl::event
124  {
125  std::shared_lock<std::shared_mutex> lock{m_mutex};
126  return m_last_event;
127  }
128 
129  template<bool TBlocking, typename TTask>
130  auto enqueue(TTask const& task) -> void
131  {
132  {
133  std::lock_guard<std::shared_mutex> lock{m_mutex};
134 
135  clean_dependencies();
136 
137  // Execute task
138  if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
139  {
140  m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
141  }
142  else
143  {
144  m_last_event = m_queue.submit(
145  [this, &task](sycl::handler& cgh)
146  {
147  if(!m_dependencies.empty())
148  cgh.depends_on(m_dependencies);
149 
150  if constexpr(is_sycl_kernel<TTask>) // Kernel
151  task(cgh); // Will call cgh.parallel_for internally
152  else // Host
153  cgh.host_task(task);
154  });
155  }
156 
157  m_dependencies.clear();
158  }
159 
160  if constexpr(TBlocking)
161  wait();
162  }
163 
164  [[nodiscard]] auto getNativeHandle() const noexcept
165  {
166  return m_queue;
167  }
168 
169  std::vector<sycl::event> m_dependencies;
170  sycl::event m_last_event;
171  std::shared_mutex mutable m_mutex;
172 
173  private:
174  sycl::queue m_queue;
175  };
176 
177  template<concepts::Tag TTag, bool TBlocking>
178  class QueueGenericSyclBase
179  : public interface::Implements<ConceptCurrentThreadWaitFor, QueueGenericSyclBase<TTag, TBlocking>>
180  , public interface::Implements<ConceptQueue, QueueGenericSyclBase<TTag, TBlocking>>
181  , public interface::Implements<ConceptGetDev, QueueGenericSyclBase<TTag, TBlocking>>
182  {
183  public:
184  QueueGenericSyclBase(DevGenericSycl<TTag> const& dev)
185  : m_dev{dev}
186  , m_spQueueImpl{std::make_shared<detail::QueueGenericSyclImpl>(
187  dev.getNativeHandle().second,
188  dev.getNativeHandle().first)}
189  {
190  m_dev.m_impl->register_queue(m_spQueueImpl);
191  }
192 
193  friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
194  {
195  return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl);
196  }
197 
198  friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
199  {
200  return !(lhs == rhs);
201  }
202 
203  [[nodiscard]] auto getNativeHandle() const noexcept
204  {
205  return m_spQueueImpl->getNativeHandle();
206  }
207 
208  DevGenericSycl<TTag> m_dev;
209  std::shared_ptr<detail::QueueGenericSyclImpl> m_spQueueImpl;
210  };
211  } // namespace detail
212 
213  namespace trait
214  {
215  //! The SYCL blocking queue device type trait specialization.
216  template<concepts::Tag TTag, bool TBlocking>
217  struct DevType<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
218  {
219  using type = DevGenericSycl<TTag>;
220  };
221 
222  //! The SYCL blocking queue device get trait specialization.
223  template<concepts::Tag TTag, bool TBlocking>
224  struct GetDev<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
225  {
226  static auto getDev(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
227  {
229  return queue.m_dev;
230  }
231  };
232 
233  //! The SYCL blocking queue event type trait specialization.
234  template<concepts::Tag TTag, bool TBlocking>
235  struct EventType<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
236  {
237  using type = EventGenericSycl<TTag>;
238  };
239 
240  //! The SYCL blocking queue enqueue trait specialization.
241  template<concepts::Tag TTag, bool TBlocking, typename TTask>
242  struct Enqueue<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>, TTask>
243  {
244  static auto enqueue(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>& queue, TTask const& task)
245  -> void
246  {
248  queue.m_spQueueImpl->template enqueue<TBlocking>(task);
249  }
250  };
251 
252  //! The SYCL blocking queue test trait specialization.
253  template<concepts::Tag TTag, bool TBlocking>
254  struct Empty<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
255  {
256  static auto empty(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue) -> bool
257  {
259  return queue.m_spQueueImpl->empty();
260  }
261  };
262 
263  //! The SYCL blocking queue thread wait trait specialization.
264  //!
265  //! Blocks execution of the calling thread until the queue has finished processing all previously requested
266  //! tasks (kernels, data copies, ...)
267  template<concepts::Tag TTag, bool TBlocking>
268  struct CurrentThreadWaitFor<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
269  {
270  static auto currentThreadWaitFor(alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
271  -> void
272  {
274  queue.m_spQueueImpl->wait();
275  }
276  };
277 
278  //! The SYCL queue native handle trait specialization.
279  template<concepts::Tag TTag, bool TBlocking>
280  struct NativeHandle<alpaka::detail::QueueGenericSyclBase<TTag, TBlocking>>
281  {
282  [[nodiscard]] static auto getNativeHandle(
283  alpaka::detail::QueueGenericSyclBase<TTag, TBlocking> const& queue)
284  {
285  return queue.getNativeHandle();
286  }
287  };
288  } // namespace trait
289 } // namespace alpaka
290 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition: Complex.hpp:292
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition: Complex.hpp:318
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:139
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:133
The alpaka accelerator library.
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition: Traits.hpp:36
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto empty(TQueue const &queue) -> bool
Tests if the queue is empty (all ops in the given queue have been completed).
Definition: Traits.hpp:58
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition: Traits.hpp:34