18#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
20# if !defined(ALPAKA_HOST_ONLY)
22# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA
23# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
26# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP
27# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
40 std::conditional_t<
sizeof(T) == 8u,
unsigned long long int,
void>>;
52 return std::bit_cast<AtomicCasType<T>>(value);
64 typename TSfinae = void,
65 typename TDefer =
void>
75 using EmulatedType = std::decay_t<
decltype(*addressAsIntegralType)>;
80# if __has_builtin(__hip_atomic_load)
81 EmulatedType old{__hip_atomic_load(addressAsIntegralType, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT)};
83 EmulatedType old{__atomic_load_n(addressAsIntegralType, __ATOMIC_RELAXED)};
86 EmulatedType old{*addressAsIntegralType};
92 T v = std::bit_cast<T>(assumed);
94 using Cas = alpaka::trait::
95 AtomicOp<alpaka::AtomicCas, alpaka::AtomicUniformCudaHipBuiltIn, EmulatedType, THierarchy>;
96 old = Cas::atomicOp(ctx, addressAsIntegralType, assumed,
reinterpretValue(v));
98 }
while(assumed != old);
99 return std::bit_cast<T>(old);
104 template<
typename T,
typename THierarchy>
115 using EmulatedType = std::decay_t<
decltype(*addressAsIntegralType)>;
119 auto old = alpaka::trait::
120 AtomicOp<alpaka::AtomicCas, alpaka::AtomicUniformCudaHipBuiltIn, EmulatedType, THierarchy>::
121 atomicOp(ctx, addressAsIntegralType, reinterpretedCompare, reinterpretedValue);
123 return std::bit_cast<T>(old);
128 template<
typename T,
typename THierarchy>
142 template<
typename T,
typename THierarchy>
148 std::enable_if_t<std::is_floating_point_v<T>>>
154 "EmulateAtomic<alpaka::AtomicDec> is not supported for floating point data types!");
160 template<
typename T,
typename THierarchy>
166 std::enable_if_t<std::is_floating_point_v<T>>>
172 "EmulateAtomic<alpaka::AtomicInc> is not supported for floating point data types!");
178 template<
typename T,
typename THierarchy>
184 std::enable_if_t<std::is_floating_point_v<T>>>
190 "EmulateAtomic<alpaka::AtomicAnd> is not supported for floating point data types!");
196 template<
typename T,
typename THierarchy>
202 std::enable_if_t<std::is_floating_point_v<T>>>
208 "EmulateAtomic<alpaka::AtomicOr> is not supported for floating point data types!");
214 template<
typename T,
typename THierarchy>
220 std::enable_if_t<std::is_floating_point_v<T>>>
226 "EmulateAtomic<alpaka::AtomicXor> is not supported for floating point data types!");
238 template<
typename TOp,
typename T,
typename THierarchy>
243 [[maybe_unused]] T*
const addr,
244 [[maybe_unused]] T
const& value) -> T
247 sizeof(T) == 4u ||
sizeof(T) == 8u,
248 "atomicOp<TOp, AtomicUniformCudaHipBuiltIn, T>(atomic, addr, value) is not supported! Only 64 and "
249 "32bit atomics are supported.");
251 if constexpr(::AlpakaBuiltInAtomic<TOp, T, THierarchy>::value)
252 return ::AlpakaBuiltInAtomic<TOp, T, THierarchy>::atomic(addr, value);
254 else if constexpr(std::is_same_v<unsigned long int, T>)
256 if constexpr(
sizeof(T) == 4u && ::AlpakaBuiltInAtomic<TOp, unsigned int, THierarchy>::value)
257 return ::AlpakaBuiltInAtomic<TOp, unsigned int, THierarchy>::atomic(
258 reinterpret_cast<unsigned int*
>(addr),
259 static_cast<unsigned int>(value));
261 sizeof(T) == 8u && ::AlpakaBuiltInAtomic<TOp, unsigned long long int, THierarchy>::value)
263 return ::AlpakaBuiltInAtomic<TOp, unsigned long long int, THierarchy>::atomic(
264 reinterpret_cast<unsigned long long int*
>(addr),
265 static_cast<unsigned long long int>(value));
273 template<
typename T,
typename THierarchy>
278 [[maybe_unused]] T*
const addr,
279 [[maybe_unused]] T
const& compare,
280 [[maybe_unused]] T
const& value) -> T
283 sizeof(T) == 4u ||
sizeof(T) == 8u,
284 "atomicOp<AtomicCas, AtomicUniformCudaHipBuiltIn, T>(atomic, addr, compare, value) is not "
285 "supported! Only 64 and "
286 "32bit atomics are supported.");
288 if constexpr(::AlpakaBuiltInAtomic<AtomicCas, T, THierarchy>::value)
289 return ::AlpakaBuiltInAtomic<AtomicCas, T, THierarchy>::atomic(addr, compare, value);
291 else if constexpr(std::is_same_v<unsigned long int, T>)
293 if constexpr(
sizeof(T) == 4u && ::AlpakaBuiltInAtomic<AtomicCas, unsigned int, THierarchy>::value)
294 return ::AlpakaBuiltInAtomic<AtomicCas, unsigned int, THierarchy>::atomic(
295 reinterpret_cast<unsigned int*
>(addr),
296 static_cast<unsigned int>(compare),
297 static_cast<unsigned int>(value));
300 && ::AlpakaBuiltInAtomic<AtomicCas, unsigned long long int, THierarchy>::value)
302 return ::AlpakaBuiltInAtomic<AtomicCas, unsigned long long int, THierarchy>::atomic(
303 reinterpret_cast<unsigned long long int*
>(addr),
304 static_cast<unsigned long long int>(compare),
305 static_cast<unsigned long long int>(value));
The alpaka accelerator library.
The compare and swap function object.
The decrement function object.
The increment function object.
The subtraction function object.
The exclusive or function object.
The atomic operation trait.
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &compare, T const &value) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &value) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &value) -> T
std::conditional_t< sizeof(T)==4u, unsigned int, std::conditional_t< sizeof(T)==8u, unsigned long long int, void > > AtomicCasType
static __device__ auto reinterpretValue(T value) -> AtomicCasType< T > requires(sizeof(T)==4u||sizeof(T)==8u)
static __device__ auto reinterpretAddress(T *address) -> AtomicCasType< T > *requires(sizeof(T)==4u||sizeof(T)==8u)