alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
DevGenericSycl.hpp
Go to the documentation of this file.
1/* Copyright 2024 Jan Stephan, Antonio Di Pilato, Luca Ferragina, Aurora Perego, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/acc/Tag.hpp"
10#include "alpaka/core/Sycl.hpp"
11#include "alpaka/dev/Traits.hpp"
20
21#include <algorithm>
22#include <cstddef>
23#include <functional>
24#include <memory>
25#include <mutex>
26#include <shared_mutex>
27#include <string>
28#include <utility>
29#include <vector>
30
31#ifdef ALPAKA_ACC_SYCL_ENABLED
32
33# include <sycl/sycl.hpp>
34
35namespace alpaka
36{
37 namespace trait
38 {
39 template<typename TPlatform, typename TSfinae>
40 struct GetDevByIdx;
41 } // namespace trait
42
43 template<concepts::Tag TTag>
44 using QueueGenericSyclBlocking = detail::QueueGenericSyclBase<TTag, true>;
45
46 template<concepts::Tag TTag>
47 using QueueGenericSyclNonBlocking = detail::QueueGenericSyclBase<TTag, false>;
48
49 template<concepts::Tag TTag>
50 struct PlatformGenericSycl;
51
52 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
53 class BufGenericSycl;
54
55 namespace detail
56 {
57 class DevGenericSyclImpl
58 {
59 public:
60 DevGenericSyclImpl(sycl::device device, sycl::context context)
61 : m_device{std::move(device)}
62 , m_context{std::move(context)}
63 {
64 }
65
66 // Don't call this without locking first!
67 auto clean_queues() -> void
68 {
69 // Clean up dead queues
70 auto const start = std::begin(m_queues);
71 auto const old_end = std::end(m_queues);
72 auto const new_end = std::remove_if(start, old_end, [](auto q_ptr) { return q_ptr.expired(); });
73 m_queues.erase(new_end, old_end);
74 }
75
76 auto register_queue(std::shared_ptr<QueueGenericSyclImpl> const& queue) -> void
77 {
78 std::lock_guard<std::shared_mutex> lock{m_mutex};
79
80 clean_queues();
81 m_queues.emplace_back(queue);
82 }
83
84 auto register_dependency(sycl::event event) -> void
85 {
86 std::shared_lock<std::shared_mutex> lock{m_mutex};
87
88 for(auto& q_ptr : m_queues)
89 {
90 if(auto ptr = q_ptr.lock(); ptr != nullptr)
91 ptr->register_dependency(event);
92 }
93 }
94
95 auto wait()
96 {
97 std::shared_lock<std::shared_mutex> lock{m_mutex};
98
99 for(auto& q_ptr : m_queues)
100 {
101 if(auto ptr = q_ptr.lock(); ptr != nullptr)
102 ptr->wait();
103 }
104 }
105
106 auto get_device() const -> sycl::device
107 {
108 return m_device;
109 }
110
111 auto get_context() const -> sycl::context
112 {
113 return m_context;
114 }
115
116 auto deviceProperties() -> std::optional<alpaka::DeviceProperties>&
117 {
118 std::call_once(
119 m_onceFlag,
120 [&]()
121 {
122 m_deviceProperties = std::make_optional<alpaka::DeviceProperties>();
123 auto const& device = this->get_device();
124 m_deviceProperties->name = device.template get_info<sycl::info::device::name>();
125 m_deviceProperties->totalGlobalMem
126 = device.template get_info<sycl::info::device::global_mem_size>();
127
128 std::vector<std::size_t> warp_sizes
129 = device.template get_info<sycl::info::device::sub_group_sizes>();
130 // The CPU runtime supports a sub-group size of 64, but the SYCL implementation currently
131 // does not
132 auto find64 = std::find(warp_sizes.begin(), warp_sizes.end(), 64);
133 if(find64 != warp_sizes.end())
134 warp_sizes.erase(find64);
135 // Sort the warp sizes in decreasing order
136 std::sort(warp_sizes.begin(), warp_sizes.end(), std::greater<>{});
137 m_deviceProperties->warpSizes = std::move(warp_sizes);
138 m_deviceProperties->preferredWarpSize = m_deviceProperties->warpSizes.front();
139 });
140
141 return m_deviceProperties;
142 }
143
144 private:
145 sycl::device m_device;
146 sycl::context m_context;
147 std::vector<std::weak_ptr<QueueGenericSyclImpl>> m_queues;
148 std::optional<alpaka::DeviceProperties> m_deviceProperties;
149 std::shared_mutex mutable m_mutex;
150 std::once_flag m_onceFlag;
151 };
152 } // namespace detail
153
154 //! The SYCL device handle.
155 template<concepts::Tag TTag>
156 class DevGenericSycl
157 : public interface::Implements<ConceptCurrentThreadWaitFor, DevGenericSycl<TTag>>
158 , public interface::Implements<ConceptDev, DevGenericSycl<TTag>>
159 {
160 friend struct trait::GetDevByIdx<PlatformGenericSycl<TTag>>;
161
162 public:
163 DevGenericSycl(sycl::device device, sycl::context context)
164 : m_impl{std::make_shared<detail::DevGenericSyclImpl>(std::move(device), std::move(context))}
165 {
166 }
167
168 friend auto operator==(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
169 {
170 return (lhs.m_impl == rhs.m_impl);
171 }
172
173 friend auto operator!=(DevGenericSycl const& lhs, DevGenericSycl const& rhs) -> bool
174 {
175 return !(lhs == rhs);
176 }
177
178 [[nodiscard]] auto getNativeHandle() const -> std::pair<sycl::device, sycl::context>
179 {
180 return std::make_pair(m_impl->get_device(), m_impl->get_context());
181 }
182
183 std::shared_ptr<detail::DevGenericSyclImpl> m_impl;
184 };
185
186 namespace trait
187 {
188
189 //! The SYCL device name get trait specialization.
190 template<concepts::Tag TTag>
191 struct GetName<DevGenericSycl<TTag>>
192 {
193 static auto getName(DevGenericSycl<TTag> const& dev) -> std::string
194 {
195 return dev.m_impl->deviceProperties()->name;
196 }
197 };
198
199 //! The SYCL device available memory get trait specialization.
200 template<concepts::Tag TTag>
201 struct GetMemBytes<DevGenericSycl<TTag>>
202 {
203 static auto getMemBytes(DevGenericSycl<TTag> const& dev) -> std::size_t
204 {
205 return dev.m_impl->deviceProperties()->totalGlobalMem;
206 }
207 };
208
209 //! The SYCL device free memory get trait specialization.
210 template<concepts::Tag TTag>
211 struct GetFreeMemBytes<DevGenericSycl<TTag>>
212 {
213 static auto getFreeMemBytes(DevGenericSycl<TTag> const& /* dev */) -> std::size_t
214 {
215 static_assert(
216 !sizeof(PlatformGenericSycl<TTag>),
217 "Querying free device memory not supported for SYCL devices.");
218 return std::size_t{};
219 }
220 };
221
222 //! The SYCL device warp size get trait specialization.
223 template<concepts::Tag TTag>
224 struct GetWarpSizes<DevGenericSycl<TTag>>
225 {
226 static auto getWarpSizes(DevGenericSycl<TTag> const& dev) -> std::vector<std::size_t>
227 {
228 return dev.m_impl->deviceProperties()->warpSizes;
229 }
230 };
231
232 //! The SYCL device preferred warp size get trait specialization.
233 template<concepts::Tag TTag>
234 struct GetPreferredWarpSize<DevGenericSycl<TTag>>
235 {
236 static auto getPreferredWarpSize(DevGenericSycl<TTag> const& dev) -> std::size_t
237 {
238 return dev.m_impl->deviceProperties()->preferredWarpSize;
239 }
240 };
241
242 //! The SYCL device reset trait specialization.
243 template<concepts::Tag TTag>
244 struct Reset<DevGenericSycl<TTag>>
245 {
246 static auto reset(DevGenericSycl<TTag> const&) -> void
247 {
248 static_assert(
249 !sizeof(PlatformGenericSycl<TTag>),
250 "Explicit device reset not supported for SYCL devices");
251 }
252 };
253
254 //! The SYCL device native handle trait specialization.
255 template<concepts::Tag TTag>
256 struct NativeHandle<DevGenericSycl<TTag>>
257 {
258 [[nodiscard]] static auto getNativeHandle(DevGenericSycl<TTag> const& dev)
259 {
260 return dev.getNativeHandle();
261 }
262 };
263
264 //! The SYCL device memory buffer type trait specialization.
265 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
266 struct BufType<DevGenericSycl<TTag>, TElem, TDim, TIdx>
267 {
268 using type = BufGenericSycl<TElem, TDim, TIdx, TTag>;
269 };
270
271 //! The SYCL device platform type trait specialization.
272 template<concepts::Tag TTag>
273 struct PlatformType<DevGenericSycl<TTag>>
274 {
275 using type = PlatformGenericSycl<TTag>;
276 };
277
278 //! The thread SYCL device wait specialization.
279 template<concepts::Tag TTag>
280 struct CurrentThreadWaitFor<DevGenericSycl<TTag>>
281 {
282 static auto currentThreadWaitFor(DevGenericSycl<TTag> const& dev) -> void
283 {
284 dev.m_impl->wait();
285 }
286 };
287
288 //! The SYCL blocking queue trait specialization.
289 template<concepts::Tag TTag>
290 struct QueueType<DevGenericSycl<TTag>, Blocking>
291 {
292 using type = QueueGenericSyclBlocking<TTag>;
293 };
294
295 //! The SYCL non-blocking queue trait specialization.
296 template<concepts::Tag TTag>
297 struct QueueType<DevGenericSycl<TTag>, NonBlocking>
298 {
299 using type = QueueGenericSyclNonBlocking<TTag>;
300 };
301
302 } // namespace trait
303} // namespace alpaka
304
305#endif
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition Complex.hpp:294
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition Complex.hpp:320
The alpaka accelerator library.
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition Traits.hpp:118
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition Traits.hpp:87
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition Traits.hpp:111
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition Traits.hpp:126
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:104
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition Traits.hpp:29
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition Traits.hpp:34
STL namespace.
static auto getNativeHandle(TImpl const &)
Definition Traits.hpp:18