12 #include <type_traits>
14 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
27 # if !defined(ALPAKA_HOST_ONLY)
29 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
30 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
33 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
34 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
38 # define CLANG_CUDA_PTX_WORKAROUND \
39 (BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_ARCH_PTX < BOOST_VERSION_NUMBER(6, 0, 0))
59 template<
typename TOp,
typename T,
typename THierarchy,
typename TSfinae =
void>
65 template<
typename T,
typename THierarchy>
71 decltype(atomicCAS(alpaka::core::declval<T*>(), alpaka::core::declval<T>(), alpaka::core::declval<T>()))>>
74 static __device__ T
atomic(T* add, T compare, T value)
76 return atomicCAS(add, compare, value);
80 # if !CLANG_CUDA_PTX_WORKAROUND
86 typename std::void_t<decltype(atomicCAS_block(
87 alpaka::core::declval<T*>(),
88 alpaka::core::declval<T>(),
89 alpaka::core::declval<T>()))>> : std::true_type
91 static __device__ T
atomic(T* add, T compare, T value)
93 return atomicCAS_block(add, compare, value);
100 template<
typename T,
typename THierarchy>
105 typename std::void_t<decltype(atomicAdd(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
108 static __device__ T
atomic(T* add, T value)
115 # if !CLANG_CUDA_PTX_WORKAROUND
121 typename std::void_t<decltype(atomicAdd_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
124 static __device__ T
atomic(T* add, T value)
126 return atomicAdd_block(add, value);
131 # if CLANG_CUDA_PTX_WORKAROUND
133 template<
typename THierarchy>
151 template<
typename T,
typename THierarchy>
156 typename std::void_t<decltype(atomicSub(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
159 static __device__ T
atomic(T* add, T value)
165 # if !CLANG_CUDA_PTX_WORKAROUND
171 typename std::void_t<decltype(atomicSub_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
174 static __device__ T
atomic(T* add, T value)
176 return atomicSub_block(add, value);
182 template<
typename T,
typename THierarchy>
187 typename std::void_t<decltype(atomicMin(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
190 static __device__ T
atomic(T* add, T value)
196 # if !CLANG_CUDA_PTX_WORKAROUND
202 typename std::void_t<decltype(atomicMin_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
205 static __device__ T
atomic(T* add, T value)
207 return atomicMin_block(add, value);
214 template<
typename THierarchy>
224 template<
typename THierarchy>
234 # if !__has_builtin(__hip_atomic_compare_exchange_strong)
235 template<
typename THierarchy>
249 template<
typename T,
typename THierarchy>
254 typename std::void_t<decltype(atomicMax(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
257 static __device__ T
atomic(T* add, T value)
263 # if !CLANG_CUDA_PTX_WORKAROUND
269 typename std::void_t<decltype(atomicMax_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
272 static __device__ T
atomic(T* add, T value)
274 return atomicMax_block(add, value);
281 template<
typename THierarchy>
291 template<
typename THierarchy>
301 # if !__has_builtin(__hip_atomic_compare_exchange_strong)
302 template<
typename THierarchy>
317 template<
typename T,
typename THierarchy>
322 typename std::void_t<decltype(atomicExch(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
325 static __device__ T
atomic(T* add, T value)
331 # if !CLANG_CUDA_PTX_WORKAROUND
337 typename std::void_t<decltype(atomicExch_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
340 static __device__ T
atomic(T* add, T value)
342 return atomicExch_block(add, value);
349 template<
typename T,
typename THierarchy>
354 typename std::void_t<decltype(atomicInc(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
357 static __device__ T
atomic(T* add, T value)
363 # if !CLANG_CUDA_PTX_WORKAROUND
369 typename std::void_t<decltype(atomicInc_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
372 static __device__ T
atomic(T* add, T value)
374 return atomicInc_block(add, value);
381 template<
typename T,
typename THierarchy>
386 typename std::void_t<decltype(atomicDec(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
389 static __device__ T
atomic(T* add, T value)
395 # if !CLANG_CUDA_PTX_WORKAROUND
401 typename std::void_t<decltype(atomicDec_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
404 static __device__ T
atomic(T* add, T value)
406 return atomicDec_block(add, value);
413 template<
typename T,
typename THierarchy>
418 typename std::void_t<decltype(atomicAnd(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
421 static __device__ T
atomic(T* add, T value)
427 # if !CLANG_CUDA_PTX_WORKAROUND
433 typename std::void_t<decltype(atomicAnd_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
436 static __device__ T
atomic(T* add, T value)
438 return atomicAnd_block(add, value);
445 template<
typename T,
typename THierarchy>
450 typename std::void_t<decltype(atomicOr(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
453 static __device__ T
atomic(T* add, T value)
459 # if !CLANG_CUDA_PTX_WORKAROUND
465 typename std::void_t<decltype(atomicOr_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
468 static __device__ T
atomic(T* add, T value)
470 return atomicOr_block(add, value);
477 template<
typename T,
typename THierarchy>
482 typename std::void_t<decltype(atomicXor(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
485 static __device__ T
atomic(T* add, T value)
491 # if !CLANG_CUDA_PTX_WORKAROUND
497 typename std::void_t<decltype(atomicXor_block(alpaka::core::declval<T*>(), alpaka::core::declval<T>()))>>
500 static __device__ T
atomic(T* add, T value)
502 return atomicXor_block(add, value);
509 # undef CLANG_CUDA_PTX_WORKAROUND
These types must be in the global namespace for checking existence of respective functions in global ...
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicXor(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic xor operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicExch(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic exchange operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicSub(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic sub operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicMin(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic min operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicInc(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic increment operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicDec(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic decrement operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicMax(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic max operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicAdd(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic add operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOr(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic or operation.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicAnd(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic and operation.
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T compare, T value)
static __device__ T atomic(T *add, T compare, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
static __device__ T atomic(T *add, T value)
Provide an interface to builtin atomic functions.