alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
DevGenericSycl.hpp
Go to the documentation of this file.
1/* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/acc/Tag.hpp"
10#include "alpaka/core/Sycl.hpp"
11#include "alpaka/dev/Traits.hpp"
20
21#include <algorithm>
22#include <cstddef>
23#include <functional>
24#include <memory>
25#include <mutex>
26#include <shared_mutex>
27#include <string>
28#include <utility>
29#include <vector>
30
31#ifdef ALPAKA_ACC_SYCL_ENABLED
32
33# include <sycl/sycl.hpp>
34
35namespace alpaka
36{
37 namespace trait
38 {
39 template<typename TPlatform, typename TSfinae>
40 struct GetDevByIdx;
41 } // namespace trait
42
43 template<concepts::Tag TTag>
44 using QueueGenericSyclBlocking = detail::QueueGenericSyclBase<TTag, true>;
45
46 template<concepts::Tag TTag>
47 using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase<TTag, false>;
48
49 template<concepts::Tag TTag>
50 struct PlatformGenericSycl;
51
52 namespace detail
53 {
54 class DevGenericSyclImpl
55 {
56 public:
57 DevGenericSyclImpl(sycl::device device, sycl::context context)
58 : m_device{std::move(device)}
59 , m_context{std::move(context)}
60 {
61 }
62
63 // Don't call this without locking first!
64 auto clean_queues() -> void
65 {
66 // Clean up dead queues
67 auto const start = std::begin(m_queues);
68 auto const old_end = std::end(m_queues);
69 auto const new_end = std::remove_if(start, old_end, [](auto q_ptr) { return q_ptr.expired(); });
70 m_queues.erase(new_end, old_end);
71 }
72
73 auto register_queue(std::shared_ptr<QueueGenericSyclImpl> const& queue) -> void
74 {
75 std::lock_guard<std::shared_mutex> lock{m_mutex};
76
77 clean_queues();
78 m_queues.emplace_back(queue);
79 }
80
81 auto register_dependency(sycl::event event) -> void
82 {
83 std::shared_lock<std::shared_mutex> lock{m_mutex};
84
85 for(auto& q_ptr : m_queues)
86 {
87 if(auto ptr = q_ptr.lock(); ptr != nullptr)
88 ptr->register_dependency(event);
89 }
90 }
91
92 auto wait()
93 {
94 std::shared_lock<std::shared_mutex> lock{m_mutex};
95
96 for(auto& q_ptr : m_queues)
97 {
98 if(auto ptr = q_ptr.lock(); ptr != nullptr)
99 ptr->wait();
100 }
101 }
102
103 auto get_device() const -> sycl::device
104 {
105 return m_device;
106 }
107
108 auto get_context() const -> sycl::context
109 {
110 return m_context;
111 }
112
113 auto deviceProperties() -> std::optional<alpaka::DeviceProperties>&
114 {
115 std::call_once(
116 m_onceFlag,
117 [&]()
118 {
119 m_deviceProperties = std::make_optional<alpaka::DeviceProperties>();
120 auto const& device = this->get_device();
121 m_deviceProperties->name = device.template get_info<sycl::info::device::name>();
122 m_deviceProperties->totalGlobalMem
123 = device.template get_info<sycl::info::device::global_mem_size>();
124
125 std::vector<std::size_t> warp_sizes
126 = device.template get_info<sycl::info::device::sub_group_sizes>();
127 // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently
128 // does not
129 auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
130 if(find64 != warp_sizes.end())
131 warp_sizes.erase(find64);
132 // Sort the warp sizes in decreasing order
133 std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
134 m_deviceProperties->warpSizes = std::move(warp_sizes);
135 m_deviceProperties->preferredWarpSize = m_deviceProperties->warpSizes.front();
136 });
137
138 return m_deviceProperties;
139 }
140
141 private:
142 sycl::device m_device;
143 sycl::context m_context;
144 std::vector<std::weak_ptr<QueueGenericSyclImpl>> m_queues;
145 std::optional<alpaka::DeviceProperties> m_deviceProperties;
146 std::shared_mutex mutable m_mutex;
147 std::once_flag m_onceFlag;
148 };
149 } // namespace detail
150
151 //! The SYCL device handle.
152 template<concepts::Tag TTag>
153 class DevGenericSycl
154 : public interface::Implements<ConceptCurrentThreadWaitFor, DevGenericSycl<TTag>>
155 , public interface::Implements<ConceptDev, DevGenericSycl<TTag>>
156 {
157 friend struct trait::GetDevByIdx<PlatformGenericSycl<TTag>>;
158
159 public:
160 DevGenericSycl(sycl::device device, sycl::context context)
161 : m_impl{std::make_shared<detail::DevGenericSyclImpl>(std::move(device), std::move(context))}
162 {
163 }
164
165 friend auto operator==(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
166 {
167 return (lhs.m_impl == rhs.m_impl);
168 }
169
170 friend auto operator!=(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
171 {
172 return !(lhs == rhs);
173 }
174
175 [[nodiscard]] auto getNativeHandle() const -> std::pair<sycl::device, sycl::context>
176 {
177 return std::make_pair(m_impl->get_device(), m_impl->get_context());
178 }
179
180 std::shared_ptr<detail::DevGenericSyclImpl> m_impl;
181 };
182
183 namespace trait
184 {
185
186 //! The SYCL device name get trait specialization.
187 template<concepts::Tag TTag>
188 struct GetName<DevGenericSycl<TTag>>
189 {
190 static auto getName(DevGenericSycl<TTag> const& dev) -> std::string
191 {
192 return dev.m_impl->deviceProperties()->name;
193 }
194 };
195
196 //! The SYCL device available memory get trait specialization.
197 template<concepts::Tag TTag>
198 struct GetMemBytes<DevGenericSycl<TTag>>
199 {
200 static auto getMemBytes(DevGenericSycl<TTag> const& dev) -> std::size_t
201 {
202 return dev.m_impl->deviceProperties()->totalGlobalMem;
203 }
204 };
205
206 //! The SYCL device free memory get trait specialization.
207 template<concepts::Tag TTag>
208 struct GetFreeMemBytes<DevGenericSycl<TTag>>
209 {
210 static auto getFreeMemBytes(DevGenericSycl<TTag> const& /* dev */) -> std::size_t
211 {
212 static_assert(
213 !sizeof(PlatformGenericSycl<TTag>),
214 "Querying free device memory not supported for SYCL devices.");
215 return std::size_t{};
216 }
217 };
218
219 //! The SYCL device warp size get trait specialization.
220 template<concepts::Tag TTag>
221 struct GetWarpSizes<DevGenericSycl<TTag>>
222 {
223 static auto getWarpSizes(DevGenericSycl<TTag> const& dev) -> std::vector<std::size_t>
224 {
225 return dev.m_impl->deviceProperties()->warpSizes;
226 }
227 };
228
229 //! The SYCL device preferred warp size get trait specialization.
230 template<concepts::Tag TTag>
231 struct GetPreferredWarpSize<DevGenericSycl<TTag>>
232 {
233 static auto getPreferredWarpSize(DevGenericSycl<TTag> const& dev) -> std::size_t
234 {
235 return dev.m_impl->deviceProperties()->preferredWarpSize;
236 }
237 };
238
239 //! The SYCL device reset trait specialization.
240 template<concepts::Tag TTag>
241 struct Reset<DevGenericSycl<TTag>>
242 {
243 static auto reset(DevGenericSycl<TTag> const&) -> void
244 {
245 static_assert(
246 !sizeof(PlatformGenericSycl<TTag>),
247 "Explicit device reset not supported for SYCL devices");
248 }
249 };
250
251 //! The SYCL device native handle trait specialization.
252 template<concepts::Tag TTag>
253 struct NativeHandle<DevGenericSycl<TTag>>
254 {
255 [[nodiscard]] static auto getNativeHandle(DevGenericSycl<TTag> const& dev)
256 {
257 return dev.getNativeHandle();
258 }
259 };
260
261 //! The SYCL device platform type trait specialization.
262 template<concepts::Tag TTag>
263 struct PlatformType<DevGenericSycl<TTag>>
264 {
265 using type = PlatformGenericSycl<TTag>;
266 };
267
268 //! The thread SYCL device wait specialization.
269 template<concepts::Tag TTag>
270 struct CurrentThreadWaitFor<DevGenericSycl<TTag>>
271 {
272 static auto currentThreadWaitFor(DevGenericSycl<TTag> const& dev) -> void
273 {
274 dev.m_impl->wait();
275 }
276 };
277
278 //! The SYCL blocking queue trait specialization.
279 template<concepts::Tag TTag>
280 struct QueueType<DevGenericSycl<TTag>, Blocking>
281 {
282 using type = QueueGenericSyclBlocking<TTag>;
283 };
284
285 //! The SYCL non-blocking queue trait specialization.
286 template<concepts::Tag TTag>
287 struct QueueType<DevGenericSycl<TTag>, NonBlocking>
288 {
289 using type = QueueGenericSyclNonBlocking<TTag>;
290 };
291
292 } // namespace trait
293} // namespace alpaka
294
295#endif
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition Complex.hpp:294
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition Complex.hpp:320
The alpaka accelerator library.
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition Traits.hpp:118
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition Traits.hpp:87
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition Traits.hpp:111
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition Traits.hpp:126
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:104
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition Traits.hpp:29
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition Traits.hpp:34
STL namespace.
static auto getNativeHandle(TImpl const &)
Definition Traits.hpp:18