14#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
21# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
28# if !defined(ALPAKA_HOST_ONLY)
30# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA
31# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
34# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP
35# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
55# if defined(__CUDA_ARCH__)
58# elif defined(__HIP_DEVICE_COMPILE__)
63# elif defined(__GFX10__) or defined(__GFX11__) or defined(__GFX12__)
68# ifdef ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
69 return ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
71# error The current AMD GPU architucture is not supported by this version of alpaka. You can define a default wavefront size setting the preprocessor macro ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
86# if defined(__CUDA_ARCH__)
89# elif defined(__HIP_DEVICE_COMPILE__)
94# elif defined(__GFX10__) or defined(__GFX11__) or defined(__GFX12__)
99# ifdef ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
100 return ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
102# error The current AMD GPU architucture is not supported by this version of alpaka. You can define a default wavefront size setting the preprocessor macro ALPAKA_DEFAULT_AMD_WAVEFRONT_SIZE
118# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
119 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
120 return __activemask();
131 static __device__
auto all(
133 std::int32_t predicate) -> std::int32_t
135# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
136 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
137 return __all_sync(
activemask(warp), predicate);
139 return __all(predicate);
147 static __device__
auto any(
149 std::int32_t predicate) -> std::int32_t
151# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
152 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
153 return __any_sync(
activemask(warp), predicate);
155 return __any(predicate);
167# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
168 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
169 return __ballot_sync(
activemask(warp), predicate);
171 return __ballot(predicate);
184 std::int32_t width) -> T
186# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
187 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
188 return __shfl_sync(
activemask(warp), val, srcLane, width);
190 return __shfl(val, srcLane, width);
202 std::uint32_t offset,
203 std::int32_t width) -> T
205# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
206 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
207 return __shfl_up_sync(
activemask(warp), val, offset, width);
209 return __shfl_up(val, offset, width);
221 std::uint32_t offset,
222 std::int32_t width) -> T
224# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
225 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
226 return __shfl_down_sync(
activemask(warp), val, offset, width);
228 return __shfl_down(val, offset, width);
241 std::int32_t width) -> T
243# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) \
244 || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0))
245 return __shfl_xor_sync(
activemask(warp), val, mask, width);
247 return __shfl_xor(val, mask, width);
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> typename TWarp::mask_type
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
The ballot warp vote trait.
The compile-time warp size trait.
The warp size upper-limit trait.
The shfl down warp swizzling trait.
The shfl up warp swizzling trait.
The shfl xor warp swizzling trait.
The shfl warp swizzling trait.