15 #include <type_traits>
17 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
19 # if !defined(ALPAKA_HOST_ONLY)
21 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
22 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
25 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
26 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
36 template<
typename TAddressType>
38 -> std::enable_if_t<
sizeof(TAddressType) == 4u,
unsigned int*>
40 return reinterpret_cast<unsigned int*
>(address);
44 template<
typename TAddressType>
46 -> std::enable_if_t<
sizeof(TAddressType) == 8u,
unsigned long long int*>
48 return reinterpret_cast<unsigned long long int*
>(address);
52 template<
typename T_Type>
67 typename TSfinae = void,
68 typename TDefer =
void>
78 using EmulatedType = std::decay_t<decltype(*addressAsIntegralType)>;
83 # if __has_builtin(__hip_atomic_load)
84 EmulatedType old{__hip_atomic_load(addressAsIntegralType, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT)};
86 EmulatedType old{__atomic_load_n(addressAsIntegralType, __ATOMIC_RELAXED)};
89 EmulatedType old{*addressAsIntegralType};
95 T v = *(
reinterpret_cast<T*
>(&assumed));
97 using Cas = alpaka::trait::
98 AtomicOp<alpaka::AtomicCas, alpaka::AtomicUniformCudaHipBuiltIn, EmulatedType, THierarchy>;
101 }
while(assumed != old);
102 return *(
reinterpret_cast<T*
>(&old));
107 template<
typename T,
typename THierarchy>
118 using EmulatedType = std::decay_t<decltype(*addressAsIntegralType)>;
124 atomicOp(ctx, addressAsIntegralType, reinterpretedCompare, reinterpretedValue);
126 return *(
reinterpret_cast<T*
>(&old));
131 template<
typename T,
typename THierarchy>
145 template<
typename T,
typename THierarchy>
151 std::enable_if_t<std::is_floating_point_v<T>>>
157 "EmulateAtomic<alpaka::AtomicDec> is not supported for floating point data types!");
163 template<
typename T,
typename THierarchy>
169 std::enable_if_t<std::is_floating_point_v<T>>>
175 "EmulateAtomic<alpaka::AtomicInc> is not supported for floating point data types!");
181 template<
typename T,
typename THierarchy>
187 std::enable_if_t<std::is_floating_point_v<T>>>
193 "EmulateAtomic<alpaka::AtomicAnd> is not supported for floating point data types!");
199 template<
typename T,
typename THierarchy>
205 std::enable_if_t<std::is_floating_point_v<T>>>
211 "EmulateAtomic<alpaka::AtomicOr> is not supported for floating point data types!");
217 template<
typename T,
typename THierarchy>
223 std::enable_if_t<std::is_floating_point_v<T>>>
229 "EmulateAtomic<alpaka::AtomicXor> is not supported for floating point data types!");
241 template<
typename TOp,
typename T,
typename THierarchy>
246 [[maybe_unused]] T*
const addr,
247 [[maybe_unused]] T
const& value) -> T
250 sizeof(T) == 4u ||
sizeof(T) == 8u,
251 "atomicOp<TOp, AtomicUniformCudaHipBuiltIn, T>(atomic, addr, value) is not supported! Only 64 and "
252 "32bit atomics are supported.");
255 return ::AlpakaBuiltInAtomic<TOp, T, THierarchy>::atomic(addr, value);
257 else if constexpr(std::is_same_v<unsigned long int, T>)
260 return ::AlpakaBuiltInAtomic<TOp, unsigned int, THierarchy>::atomic(
261 reinterpret_cast<unsigned int*
>(addr),
262 static_cast<unsigned int>(value));
266 return ::AlpakaBuiltInAtomic<TOp, unsigned long long int, THierarchy>::atomic(
267 reinterpret_cast<unsigned long long int*
>(addr),
268 static_cast<unsigned long long int>(value));
276 template<
typename T,
typename THierarchy>
281 [[maybe_unused]] T*
const addr,
282 [[maybe_unused]] T
const& compare,
283 [[maybe_unused]] T
const& value) -> T
286 sizeof(T) == 4u ||
sizeof(T) == 8u,
287 "atomicOp<AtomicCas, AtomicUniformCudaHipBuiltIn, T>(atomic, addr, compare, value) is not "
288 "supported! Only 64 and "
289 "32bit atomics are supported.");
292 return ::AlpakaBuiltInAtomic<AtomicCas, T, THierarchy>::atomic(addr, compare, value);
294 else if constexpr(std::is_same_v<unsigned long int, T>)
297 return ::AlpakaBuiltInAtomic<AtomicCas, unsigned int, THierarchy>::atomic(
298 reinterpret_cast<unsigned int*
>(addr),
299 static_cast<unsigned int>(compare),
300 static_cast<unsigned int>(value));
305 return ::AlpakaBuiltInAtomic<AtomicCas, unsigned long long int, THierarchy>::atomic(
306 reinterpret_cast<unsigned long long int*
>(addr),
307 static_cast<unsigned long long int>(compare),
308 static_cast<unsigned long long int>(value));
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOp(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &=THierarchy()) -> T
Executes the given operation atomically.
Provide an interface to builtin atomic functions.
The compare and swap function object.
The decrement function object.
The increment function object.
The subtraction function object.
The exclusive or function object.
The atomic operation trait.
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &compare, T const &value) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &value) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &, T *const, T const &) -> T
static __device__ auto atomic(alpaka::AtomicUniformCudaHipBuiltIn const &ctx, T *const addr, T const &value) -> T
static __device__ auto reinterpretValue(T_Type value)
reinterprets a value to be usable for the atomicCAS emulation
static __device__ auto reinterpretAddress(TAddressType *address) -> std::enable_if_t< sizeof(TAddressType)==8u, unsigned long long int * >
reinterprets a address as an 64bit value for atomicCas emulation usage
static __device__ auto reinterpretAddress(TAddressType *address) -> std::enable_if_t< sizeof(TAddressType)==4u, unsigned int * >
reinterprets an address as an 32bit value for atomicCas emulation usage