alpaka
Abstraction Library for Parallel Kernel Acceleration
QueueGenericSyclBase.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Andrea Bocci, Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/traits/Traits.hpp"
11 #include "alpaka/wait/Traits.hpp"
12 
13 #include <algorithm>
14 #include <exception>
15 #include <memory>
16 #include <mutex>
17 #include <shared_mutex>
18 #include <type_traits>
19 #include <utility>
20 #include <vector>
21 
22 #ifdef ALPAKA_ACC_SYCL_ENABLED
23 
24 # include <sycl/sycl.hpp>
25 
26 namespace alpaka::detail
27 {
28  template<typename T, typename = void>
29  inline constexpr auto is_sycl_task = false;
30 
31  template<typename T>
32  inline constexpr auto is_sycl_task<T, std::void_t<decltype(T::is_sycl_task)>> = true;
33 
34  template<typename T, typename = void>
35  inline constexpr auto is_sycl_kernel = false;
36 
37  template<typename T>
38  inline constexpr auto is_sycl_kernel<T, std::void_t<decltype(T::is_sycl_kernel)>> = true;
39 
40  class QueueGenericSyclImpl
41  {
42  public:
43  QueueGenericSyclImpl(sycl::context context, sycl::device device)
44  : m_queue{
45  std::move(context), // This is important. In SYCL a device can belong to multiple contexts.
46  std::move(device),
47  {sycl::property::queue::enable_profiling{}, sycl::property::queue::in_order{}}}
48  {
49  }
50 
51  // This class will only exist as a pointer. We don't care about copy and move semantics.
52  QueueGenericSyclImpl(QueueGenericSyclImpl const& other) = delete;
53  auto operator=(QueueGenericSyclImpl const& rhs) -> QueueGenericSyclImpl& = delete;
54 
55  QueueGenericSyclImpl(QueueGenericSyclImpl&& other) noexcept = delete;
56  auto operator=(QueueGenericSyclImpl&& rhs) noexcept -> QueueGenericSyclImpl& = delete;
57 
58  ~QueueGenericSyclImpl()
59  {
60  try
61  {
62  m_queue.wait_and_throw();
63  }
64  catch(sycl::exception const& err)
65  {
66  std::cerr << "Caught SYCL exception while destructing a SYCL queue: " << err.what() << " ("
67  << err.code() << ')' << std::endl;
68  }
69  catch(std::exception const& err)
70  {
71  std::cerr << "The following runtime error(s) occured while destructing a SYCL queue:" << err.what()
72  << std::endl;
73  }
74  }
75 
76  // Don't call this without locking first!
77  auto clean_dependencies() -> void
78  {
79  // Clean up completed events
80  auto const start = std::begin(m_dependencies);
81  auto const old_end = std::end(m_dependencies);
82  auto const new_end = std::remove_if(
83  start,
84  old_end,
85  [](sycl::event ev) {
86  return ev.get_info<sycl::info::event::command_execution_status>()
87  == sycl::info::event_command_status::complete;
88  });
89 
90  m_dependencies.erase(new_end, old_end);
91  }
92 
93  auto register_dependency(sycl::event event) -> void
94  {
95  std::lock_guard<std::shared_mutex> lock{m_mutex};
96 
97  clean_dependencies();
98  m_dependencies.push_back(event);
99  }
100 
101  auto empty() const -> bool
102  {
103  std::shared_lock<std::shared_mutex> lock{m_mutex};
104  return m_last_event.get_info<sycl::info::event::command_execution_status>()
105  == sycl::info::event_command_status::complete;
106  }
107 
108  auto wait() -> void
109  {
110  // SYCL queues are thread-safe.
111  m_queue.wait_and_throw();
112  }
113 
114  auto get_last_event() const -> sycl::event
115  {
116  std::shared_lock<std::shared_mutex> lock{m_mutex};
117  return m_last_event;
118  }
119 
120  template<bool TBlocking, typename TTask>
121  auto enqueue(TTask const& task) -> void
122  {
123  {
124  std::lock_guard<std::shared_mutex> lock{m_mutex};
125 
126  clean_dependencies();
127 
128  // Execute task
129  if constexpr(is_sycl_task<TTask> && !is_sycl_kernel<TTask>) // Copy / Fill
130  {
131  m_last_event = task(m_queue, m_dependencies); // Will call queue.{copy, fill} internally
132  }
133  else
134  {
135  m_last_event = m_queue.submit(
136  [this, &task](sycl::handler& cgh)
137  {
138  if(!m_dependencies.empty())
139  cgh.depends_on(m_dependencies);
140 
141  if constexpr(is_sycl_kernel<TTask>) // Kernel
142  task(cgh); // Will call cgh.parallel_for internally
143  else // Host
144  cgh.host_task(task);
145  });
146  }
147 
148  m_dependencies.clear();
149  }
150 
151  if constexpr(TBlocking)
152  wait();
153  }
154 
155  [[nodiscard]] auto getNativeHandle() const noexcept
156  {
157  return m_queue;
158  }
159 
160  std::vector<sycl::event> m_dependencies;
161  sycl::event m_last_event;
162  std::shared_mutex mutable m_mutex;
163 
164  private:
165  sycl::queue m_queue;
166  };
167 
168  template<typename TDev, bool TBlocking>
169  class QueueGenericSyclBase
170  {
171  public:
172  QueueGenericSyclBase(TDev const& dev)
173  : m_dev{dev}
174  , m_spQueueImpl{std::make_shared<detail::QueueGenericSyclImpl>(
175  dev.getNativeHandle().second,
176  dev.getNativeHandle().first)}
177  {
178  m_dev.m_impl->register_queue(m_spQueueImpl);
179  }
180 
181  friend auto operator==(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
182  {
183  return (lhs.m_dev == rhs.m_dev) && (lhs.m_spQueueImpl == rhs.m_spQueueImpl);
184  }
185 
186  friend auto operator!=(QueueGenericSyclBase const& lhs, QueueGenericSyclBase const& rhs) -> bool
187  {
188  return !(lhs == rhs);
189  }
190 
191  [[nodiscard]] auto getNativeHandle() const noexcept
192  {
193  return m_spQueueImpl->getNativeHandle();
194  }
195 
196  TDev m_dev;
197  std::shared_ptr<detail::QueueGenericSyclImpl> m_spQueueImpl;
198  };
199 } // namespace alpaka::detail
200 
201 namespace alpaka
202 {
203  template<typename TDev>
204  class EventGenericSycl;
205 } // namespace alpaka
206 
207 namespace alpaka::trait
208 {
209  //! The SYCL blocking queue device type trait specialization.
210  template<typename TDev, bool TBlocking>
211  struct DevType<detail::QueueGenericSyclBase<TDev, TBlocking>>
212  {
213  using type = TDev;
214  };
215 
216  //! The SYCL blocking queue device get trait specialization.
217  template<typename TDev, bool TBlocking>
218  struct GetDev<detail::QueueGenericSyclBase<TDev, TBlocking>>
219  {
220  static auto getDev(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue)
221  {
223  return queue.m_dev;
224  }
225  };
226 
227  //! The SYCL blocking queue event type trait specialization.
228  template<typename TDev, bool TBlocking>
229  struct EventType<detail::QueueGenericSyclBase<TDev, TBlocking>>
230  {
231  using type = EventGenericSycl<TDev>;
232  };
233 
234  //! The SYCL blocking queue enqueue trait specialization.
235  template<typename TDev, bool TBlocking, typename TTask>
236  struct Enqueue<detail::QueueGenericSyclBase<TDev, TBlocking>, TTask>
237  {
238  static auto enqueue(detail::QueueGenericSyclBase<TDev, TBlocking>& queue, TTask const& task) -> void
239  {
241  queue.m_spQueueImpl->template enqueue<TBlocking>(task);
242  }
243  };
244 
245  //! The SYCL blocking queue test trait specialization.
246  template<typename TDev, bool TBlocking>
247  struct Empty<detail::QueueGenericSyclBase<TDev, TBlocking>>
248  {
249  static auto empty(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue) -> bool
250  {
252  return queue.m_spQueueImpl->empty();
253  }
254  };
255 
256  //! The SYCL blocking queue thread wait trait specialization.
257  //!
258  //! Blocks execution of the calling thread until the queue has finished processing all previously requested
259  //! tasks (kernels, data copies, ...)
260  template<typename TDev, bool TBlocking>
261  struct CurrentThreadWaitFor<detail::QueueGenericSyclBase<TDev, TBlocking>>
262  {
263  static auto currentThreadWaitFor(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue) -> void
264  {
266  queue.m_spQueueImpl->wait();
267  }
268  };
269 
270  //! The SYCL queue native handle trait specialization.
271  template<typename TDev, bool TBlocking>
272  struct NativeHandle<detail::QueueGenericSyclBase<TDev, TBlocking>>
273  {
274  [[nodiscard]] static auto getNativeHandle(detail::QueueGenericSyclBase<TDev, TBlocking> const& queue)
275  {
276  return queue.getNativeHandle();
277  }
278  };
279 } // namespace alpaka::trait
280 
281 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:139
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:133
The accelerator traits.
The alpaka accelerator library.
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition: Complex.hpp:285
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition: Traits.hpp:36
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto empty(TQueue const &queue) -> bool
Tests if the queue is empty (all ops in the given queue have been completed).
Definition: Traits.hpp:58
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition: Traits.hpp:34
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition: Complex.hpp:311
static auto getNativeHandle(TImpl const &)
Definition: Traits.hpp:18