alpaka
Abstraction Library for Parallel Kernel Acceleration
DevGenericSycl.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/acc/Traits.hpp"
8 #include "alpaka/core/Common.hpp"
9 #include "alpaka/core/Sycl.hpp"
10 #include "alpaka/dev/Traits.hpp"
14 #include "alpaka/queue/Traits.hpp"
16 #include "alpaka/traits/Traits.hpp"
17 #include "alpaka/wait/Traits.hpp"
18 
19 #include <algorithm>
20 #include <cstddef>
21 #include <functional>
22 #include <memory>
23 #include <mutex>
24 #include <shared_mutex>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 #ifdef ALPAKA_ACC_SYCL_ENABLED
30 
31 # include <sycl/sycl.hpp>
32 
33 namespace alpaka
34 {
35  template<typename TElem, typename TDim, typename TIdx, typename TDev>
36  class BufGenericSycl;
37 
38  namespace detail
39  {
40  class DevGenericSyclImpl
41  {
42  public:
43  DevGenericSyclImpl(sycl::device device, sycl::context context)
44  : m_device{std::move(device)}
45  , m_context{std::move(context)}
46  {
47  }
48 
49  // Don't call this without locking first!
50  auto clean_queues() -> void
51  {
52  // Clean up dead queues
53  auto const start = std::begin(m_queues);
54  auto const old_end = std::end(m_queues);
55  auto const new_end = std::remove_if(start, old_end, [](auto q_ptr) { return q_ptr.expired(); });
56  m_queues.erase(new_end, old_end);
57  }
58 
59  auto register_queue(std::shared_ptr<QueueGenericSyclImpl> const& queue) -> void
60  {
61  std::lock_guard<std::shared_mutex> lock{m_mutex};
62 
63  clean_queues();
64  m_queues.emplace_back(queue);
65  }
66 
67  auto register_dependency(sycl::event event) -> void
68  {
69  std::shared_lock<std::shared_mutex> lock{m_mutex};
70 
71  for(auto& q_ptr : m_queues)
72  {
73  if(auto ptr = q_ptr.lock(); ptr != nullptr)
74  ptr->register_dependency(event);
75  }
76  }
77 
78  auto wait()
79  {
80  std::shared_lock<std::shared_mutex> lock{m_mutex};
81 
82  for(auto& q_ptr : m_queues)
83  {
84  if(auto ptr = q_ptr.lock(); ptr != nullptr)
85  ptr->wait();
86  }
87  }
88 
89  auto get_device() const -> sycl::device
90  {
91  return m_device;
92  }
93 
94  auto get_context() const -> sycl::context
95  {
96  return m_context;
97  }
98 
99  private:
100  sycl::device m_device;
101  sycl::context m_context;
102  std::vector<std::weak_ptr<QueueGenericSyclImpl>> m_queues;
103  std::shared_mutex mutable m_mutex;
104  };
105  } // namespace detail
106 
107  //! The SYCL device handle.
108  template<typename TPlatform>
109  class DevGenericSycl
110  : public concepts::Implements<ConceptCurrentThreadWaitFor, DevGenericSycl<TPlatform>>
111  , public concepts::Implements<ConceptDev, DevGenericSycl<TPlatform>>
112  {
113  public:
114  DevGenericSycl(sycl::device device, sycl::context context)
115  : m_impl{std::make_shared<detail::DevGenericSyclImpl>(std::move(device), std::move(context))}
116  {
117  }
118 
119  friend auto operator==(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
120  {
121  return (lhs.m_impl == rhs.m_impl);
122  }
123 
124  friend auto operator!=(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
125  {
126  return !(lhs == rhs);
127  }
128 
129  [[nodiscard]] auto getNativeHandle() const -> std::pair<sycl::device, sycl::context>
130  {
131  return std::make_pair(m_impl->get_device(), m_impl->get_context());
132  }
133 
134  std::shared_ptr<detail::DevGenericSyclImpl> m_impl;
135  };
136 } // namespace alpaka
137 
138 namespace alpaka::trait
139 {
140  //! The SYCL device name get trait specialization.
141  template<typename TPlatform>
142  struct GetName<DevGenericSycl<TPlatform>>
143  {
144  static auto getName(DevGenericSycl<TPlatform> const& dev) -> std::string
145  {
146  auto const device = dev.getNativeHandle().first;
147  return device.template get_info<sycl::info::device::name>();
148  }
149  };
150 
151  //! The SYCL device available memory get trait specialization.
152  template<typename TPlatform>
153  struct GetMemBytes<DevGenericSycl<TPlatform>>
154  {
155  static auto getMemBytes(DevGenericSycl<TPlatform> const& dev) -> std::size_t
156  {
157  auto const device = dev.getNativeHandle().first;
158  return device.template get_info<sycl::info::device::global_mem_size>();
159  }
160  };
161 
162  //! The SYCL device free memory get trait specialization.
163  template<typename TPlatform>
164  struct GetFreeMemBytes<DevGenericSycl<TPlatform>>
165  {
166  static auto getFreeMemBytes(DevGenericSycl<TPlatform> const& /* dev */) -> std::size_t
167  {
168  static_assert(!sizeof(TPlatform), "Querying free device memory not supported for SYCL devices.");
169  return std::size_t{};
170  }
171  };
172 
173  //! The SYCL device warp size get trait specialization.
174  template<typename TPlatform>
175  struct GetWarpSizes<DevGenericSycl<TPlatform>>
176  {
177  static auto getWarpSizes(DevGenericSycl<TPlatform> const& dev) -> std::vector<std::size_t>
178  {
179  auto const device = dev.getNativeHandle().first;
180  std::vector<std::size_t> warp_sizes = device.template get_info<sycl::info::device::sub_group_sizes>();
181  // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently does not
182  auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
183  if(find64 != warp_sizes.end())
184  warp_sizes.erase(find64);
185  // Sort the warp sizes in decreasing order
186  std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
187  return warp_sizes;
188  }
189  };
190 
191  //! The SYCL device preferred warp size get trait specialization.
192  template<typename TPlatform>
193  struct GetPreferredWarpSize<DevGenericSycl<TPlatform>>
194  {
195  static auto getPreferredWarpSize(DevGenericSycl<TPlatform> const& dev) -> std::size_t
196  {
197  return GetWarpSizes<DevGenericSycl<TPlatform>>::getWarpSizes(dev).front();
198  }
199  };
200 
201  //! The SYCL device reset trait specialization.
202  template<typename TPlatform>
203  struct Reset<DevGenericSycl<TPlatform>>
204  {
205  static auto reset(DevGenericSycl<TPlatform> const&) -> void
206  {
207  static_assert(!sizeof(TPlatform), "Explicit device reset not supported for SYCL devices");
208  }
209  };
210 
211  //! The SYCL device native handle trait specialization.
212  template<typename TPlatform>
213  struct NativeHandle<DevGenericSycl<TPlatform>>
214  {
215  [[nodiscard]] static auto getNativeHandle(DevGenericSycl<TPlatform> const& dev)
216  {
217  return dev.getNativeHandle();
218  }
219  };
220 
221  //! The SYCL device memory buffer type trait specialization.
222  template<typename TElem, typename TDim, typename TIdx, typename TPlatform>
223  struct BufType<DevGenericSycl<TPlatform>, TElem, TDim, TIdx>
224  {
225  using type = BufGenericSycl<TElem, TDim, TIdx, TPlatform>;
226  };
227 
228  //! The SYCL device platform type trait specialization.
229  template<typename TPlatform>
230  struct PlatformType<DevGenericSycl<TPlatform>>
231  {
232  using type = TPlatform;
233  };
234 
235  //! The thread SYCL device wait specialization.
236  template<typename TPlatform>
237  struct CurrentThreadWaitFor<DevGenericSycl<TPlatform>>
238  {
239  static auto currentThreadWaitFor(DevGenericSycl<TPlatform> const& dev) -> void
240  {
241  dev.m_impl->wait();
242  }
243  };
244 
245  //! The SYCL blocking queue trait specialization.
246  template<typename TPlatform>
247  struct QueueType<DevGenericSycl<TPlatform>, Blocking>
248  {
249  using type = detail::QueueGenericSyclBase<DevGenericSycl<TPlatform>, true>;
250  };
251 
252  //! The SYCL non-blocking queue trait specialization.
253  template<typename TPlatform>
254  struct QueueType<DevGenericSycl<TPlatform>, NonBlocking>
255  {
256  using type = detail::QueueGenericSyclBase<DevGenericSycl<TPlatform>, false>;
257  };
258 } // namespace alpaka::trait
259 
260 #endif
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:139
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition: Iterator.hpp:133
The accelerator traits.
The alpaka accelerator library.
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition: Traits.hpp:87
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition: Traits.hpp:111
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition: Traits.hpp:126
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:104
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition: Complex.hpp:285
constexpr ALPAKA_FN_HOST auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition: Traits.hpp:118
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition: Traits.hpp:36
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition: Traits.hpp:34
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition: Complex.hpp:311
static auto getNativeHandle(TImpl const &)
Definition: Traits.hpp:18