13#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
22# if !defined(ALPAKA_HOST_ONLY)
24# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
25# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
28# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
29# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
44 struct Activemask<WarpUniformCudaHipBuiltIn>
46 __device__
static auto activemask(warp::WarpUniformCudaHipBuiltIn
const& )
47# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
53# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
54 return __activemask();
63 struct All<WarpUniformCudaHipBuiltIn>
65 __device__
static auto all(
66 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
67 std::int32_t predicate) -> std::int32_t
69# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
70 return __all_sync(0xffff'ffff, predicate);
72 return __all(predicate);
78 struct Any<WarpUniformCudaHipBuiltIn>
80 __device__
static auto any(
81 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
82 std::int32_t predicate) -> std::int32_t
84# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
85 return __any_sync(0xffff'ffff, predicate);
87 return __any(predicate);
93 struct Ballot<WarpUniformCudaHipBuiltIn>
95 __device__
static auto ballot(
96 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
97 std::int32_t predicate)
99# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
105# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
106 return __ballot_sync(0xffff'ffff, predicate);
108 return __ballot(predicate);
114 struct Shfl<WarpUniformCudaHipBuiltIn>
117 __device__
static auto shfl(
118 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
121 std::int32_t width) -> T
123# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
124 return __shfl_sync(0xffff'ffff, val, srcLane, width);
126 return __shfl(val, srcLane, width);
132 struct ShflUp<WarpUniformCudaHipBuiltIn>
135 __device__
static auto shfl_up(
136 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
138 std::uint32_t offset,
139 std::int32_t width) -> T
141# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
142 return __shfl_up_sync(0xffff'ffff, val, offset, width);
144 return __shfl_up(val, offset, width);
150 struct ShflDown<WarpUniformCudaHipBuiltIn>
154 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
156 std::uint32_t offset,
157 std::int32_t width) -> T
159# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
160 return __shfl_down_sync(0xffff'ffff, val, offset, width);
162 return __shfl_down(val, offset, width);
168 struct ShflXor<WarpUniformCudaHipBuiltIn>
172 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn
const& warp,
175 std::int32_t width) -> T
177# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
178 return __shfl_xor_sync(0xffff'ffff, val, mask, width);
180 return __shfl_xor(val, mask, width);
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate)
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> decltype(trait::Activemask< interface::ImplementationBase< ConceptWarp, TWarp > >::activemask(warp))
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...