alpaka
Abstraction Library for Parallel Kernel Acceleration
AtomicGenericSycl.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Jan Stephan, Andrea Bocci, Luca Ferragina
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/atomic/Op.hpp"
11 
12 #include <cstdint>
13 #include <type_traits>
14 
15 #ifdef ALPAKA_ACC_SYCL_ENABLED
16 
17 # include <sycl/sycl.hpp>
18 
19 namespace alpaka
20 {
21  //! The SYCL accelerator atomic ops.
22  //
23  // Atomics can used in the hierarchy level grids, blocks and threads.
24  // Atomics are not guaranteed to be safe between devices
25  class AtomicGenericSycl
26  {
27  };
28 
29  namespace detail
30  {
31  template<typename THierarchy>
32  struct SyclMemoryScope
33  {
34  };
35 
36  template<>
37  struct SyclMemoryScope<hierarchy::Grids>
38  {
39  static constexpr auto value = sycl::memory_scope::device;
40  };
41 
42  template<>
43  struct SyclMemoryScope<hierarchy::Blocks>
44  {
45  static constexpr auto value = sycl::memory_scope::device;
46  };
47 
48  template<>
49  struct SyclMemoryScope<hierarchy::Threads>
50  {
51  static constexpr auto value = sycl::memory_scope::work_group;
52  };
53 
54  template<typename T>
55  inline auto get_global_ptr(T* const addr)
56  {
57  return sycl::address_space_cast<sycl::access::address_space::global_space, sycl::access::decorated::no>(
58  addr);
59  }
60 
61  template<typename T>
62  inline auto get_local_ptr(T* const addr)
63  {
64  return sycl::address_space_cast<sycl::access::address_space::local_space, sycl::access::decorated::no>(
65  addr);
66  }
67 
68  template<typename T, typename THierarchy>
69  using global_ref = sycl::atomic_ref<
70  T,
71  sycl::memory_order::relaxed,
72  SyclMemoryScope<THierarchy>::value,
73  sycl::access::address_space::global_space>;
74 
75  template<typename T, typename THierarchy>
76  using local_ref = sycl::atomic_ref<
77  T,
78  sycl::memory_order::relaxed,
79  SyclMemoryScope<THierarchy>::value,
80  sycl::access::address_space::local_space>;
81 
82  template<typename THierarchy, typename T, typename TOp>
83  inline auto callAtomicOp(T* const addr, TOp&& op)
84  {
85  if(auto ptr = get_global_ptr(addr); ptr != nullptr)
86  {
87  auto ref = global_ref<T, THierarchy>{*addr};
88  return op(ref);
89  }
90  else
91  {
92  auto ref = local_ref<T, THierarchy>{*addr};
93  return op(ref);
94  }
95  }
96 
97  template<typename TRef, typename T, typename TEval>
98  inline auto casWithCondition(T* const addr, TEval&& eval)
99  {
100  auto ref = TRef{*addr};
101  auto old_val = ref.load();
102 
103  // prefer compare_exchange_weak when in a loop, assuming that eval is not expensive
104  while(!ref.compare_exchange_weak(old_val, eval(old_val)))
105  {
106  }
107 
108  return old_val;
109  }
110  } // namespace detail
111 } // namespace alpaka
112 
113 namespace alpaka::trait
114 {
115  // Add.
116  //! The SYCL accelerator atomic operation.
117  template<typename T, typename THierarchy>
118  struct AtomicOp<AtomicAdd, AtomicGenericSycl, T, THierarchy>
119  {
120  static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "SYCL atomics do not support this type");
121 
122  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
123  {
124  return alpaka::detail::callAtomicOp<THierarchy>(
125  addr,
126  [&value](auto& ref) { return ref.fetch_add(value); });
127  }
128  };
129 
130  // Sub.
131  //! The SYCL accelerator atomic operation.
132  template<typename T, typename THierarchy>
133  struct AtomicOp<AtomicSub, AtomicGenericSycl, T, THierarchy>
134  {
135  static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "SYCL atomics do not support this type");
136 
137  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
138  {
139  return alpaka::detail::callAtomicOp<THierarchy>(
140  addr,
141  [&value](auto& ref) { return ref.fetch_sub(value); });
142  }
143  };
144 
145  // Min.
146  //! The SYCL accelerator atomic operation.
147  template<typename T, typename THierarchy>
148  struct AtomicOp<AtomicMin, AtomicGenericSycl, T, THierarchy>
149  {
150  static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "SYCL atomics do not support this type");
151 
152  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
153  {
154  return alpaka::detail::callAtomicOp<THierarchy>(
155  addr,
156  [&value](auto& ref) { return ref.fetch_min(value); });
157  }
158  };
159 
160  // Max.
161  //! The SYCL accelerator atomic operation.
162  template<typename T, typename THierarchy>
163  struct AtomicOp<AtomicMax, AtomicGenericSycl, T, THierarchy>
164  {
165  static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "SYCL atomics do not support this type");
166 
167  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
168  {
169  return alpaka::detail::callAtomicOp<THierarchy>(
170  addr,
171  [&value](auto& ref) { return ref.fetch_max(value); });
172  }
173  };
174 
175  // Exch.
176  //! The SYCL accelerator atomic operation.
177  template<typename T, typename THierarchy>
178  struct AtomicOp<AtomicExch, AtomicGenericSycl, T, THierarchy>
179  {
180  static_assert(
181  (std::is_integral_v<T> || std::is_floating_point_v<T>) &&(sizeof(T) == 4 || sizeof(T) == 8),
182  "SYCL atomics do not support this type");
183 
184  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
185  {
186  return alpaka::detail::callAtomicOp<THierarchy>(addr, [&value](auto& ref) { return ref.exchange(value); });
187  }
188  };
189 
190  // Inc.
191  //! The SYCL accelerator atomic operation.
192  template<typename T, typename THierarchy>
193  struct AtomicOp<AtomicInc, AtomicGenericSycl, T, THierarchy>
194  {
195  static_assert(
196  std::is_unsigned_v<T> && (sizeof(T) == 4 || sizeof(T) == 8),
197  "SYCL atomics support only 32- and 64-bits unsigned integral types");
198 
199  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
200  {
201  auto inc = [&value](auto old_val)
202  { return (old_val >= value) ? static_cast<T>(0) : (old_val + static_cast<T>(1)); };
203  if(auto ptr = alpaka::detail::get_global_ptr(addr); ptr != nullptr)
204  return alpaka::detail::casWithCondition<alpaka::detail::global_ref<T, THierarchy>>(addr, inc);
205  else
206  return alpaka::detail::casWithCondition<alpaka::detail::local_ref<T, THierarchy>>(addr, inc);
207  }
208  };
209 
210  // Dec.
211  //! The SYCL accelerator atomic operation.
212  template<typename T, typename THierarchy>
213  struct AtomicOp<AtomicDec, AtomicGenericSycl, T, THierarchy>
214  {
215  static_assert(
216  std::is_unsigned_v<T> && (sizeof(T) == 4 || sizeof(T) == 8),
217  "SYCL atomics support only 32- and 64-bits unsigned integral types");
218 
219  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
220  {
221  auto dec = [&value](auto& old_val)
222  { return ((old_val == 0) || (old_val > value)) ? value : (old_val - static_cast<T>(1)); };
223  if(auto ptr = alpaka::detail::get_global_ptr(addr); ptr != nullptr)
224  return alpaka::detail::casWithCondition<alpaka::detail::global_ref<T, THierarchy>>(addr, dec);
225  else
226  return alpaka::detail::casWithCondition<alpaka::detail::local_ref<T, THierarchy>>(addr, dec);
227  }
228  };
229 
230  // And.
231  //! The SYCL accelerator atomic operation.
232  template<typename T, typename THierarchy>
233  struct AtomicOp<AtomicAnd, AtomicGenericSycl, T, THierarchy>
234  {
235  static_assert(std::is_integral_v<T>, "Bitwise operations only supported for integral types.");
236 
237  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
238  {
239  return alpaka::detail::callAtomicOp<THierarchy>(
240  addr,
241  [&value](auto& ref) { return ref.fetch_and(value); });
242  }
243  };
244 
245  // Or.
246  //! The SYCL accelerator atomic operation.
247  template<typename T, typename THierarchy>
248  struct AtomicOp<AtomicOr, AtomicGenericSycl, T, THierarchy>
249  {
250  static_assert(std::is_integral_v<T>, "Bitwise operations only supported for integral types.");
251 
252  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
253  {
254  return alpaka::detail::callAtomicOp<THierarchy>(addr, [&value](auto& ref) { return ref.fetch_or(value); });
255  }
256  };
257 
258  // Xor.
259  //! The SYCL accelerator atomic operation.
260  template<typename T, typename THierarchy>
261  struct AtomicOp<AtomicXor, AtomicGenericSycl, T, THierarchy>
262  {
263  static_assert(std::is_integral_v<T>, "Bitwise operations only supported for integral types.");
264 
265  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& value) -> T
266  {
267  return alpaka::detail::callAtomicOp<THierarchy>(
268  addr,
269  [&value](auto& ref) { return ref.fetch_xor(value); });
270  }
271  };
272 
273  // Cas.
274  //! The SYCL accelerator atomic operation.
275  template<typename T, typename THierarchy>
276  struct AtomicOp<AtomicCas, AtomicGenericSycl, T, THierarchy>
277  {
278  static_assert(std::is_integral_v<T> || std::is_floating_point_v<T>, "SYCL atomics do not support this type");
279 
280  static auto atomicOp(AtomicGenericSycl const&, T* const addr, T const& expected, T const& desired) -> T
281  {
282  auto cas = [&expected, &desired](auto& ref)
283  {
284  auto expected_ = expected;
285  // Atomically compares the value of `ref` with the value of `expected`.
286  // If the values are equal, replaces the value of `ref` with `desired`.
287  // Otherwise updates `expected` with the value of `ref`.
288  // Returns a bool telling us if the exchange happened or not, but the Alpaka API does not make use of
289  // it.
290  ref.compare_exchange_strong(expected_, desired);
291 
292  // If the update succeded, return the previous value of `ref`.
293  // Otherwise, return the current value of `ref`.
294  return expected_;
295  };
296 
297  if(auto ptr = alpaka::detail::get_global_ptr(addr); ptr != nullptr)
298  {
299  auto ref = alpaka::detail::global_ref<T, THierarchy>{*addr};
300  return cas(ref);
301  }
302  else
303  {
304  auto ref = alpaka::detail::local_ref<T, THierarchy>{*addr};
305  return cas(ref);
306  }
307  }
308  };
309 } // namespace alpaka::trait
310 
311 #endif
boost::atomic_ref< T > atomic_ref
The accelerator traits.
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOp(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &=THierarchy()) -> T
Executes the given operation atomically.
Definition: Traits.hpp:73