alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
WarpUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1/* Copyright 2023 Sergei Bastrakov, David M. Rogers, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Aurora Perego
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#include <cstdint>
12
13#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
14
15namespace alpaka::warp
16{
17 //! The GPU CUDA/HIP warp.
18 class WarpUniformCudaHipBuiltIn : public interface::Implements<ConceptWarp, WarpUniformCudaHipBuiltIn>
19 {
20 };
21
22# if !defined(ALPAKA_HOST_ONLY)
23
24# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
25# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
26# endif
27
28# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
29# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
30# endif
31
32 namespace trait
33 {
34 template<>
35 struct GetSize<WarpUniformCudaHipBuiltIn>
36 {
37 __device__ static auto getSize(warp::WarpUniformCudaHipBuiltIn const& /*warp*/) -> std::int32_t
38 {
39 return warpSize;
40 }
41 };
42
43 template<>
44 struct Activemask<WarpUniformCudaHipBuiltIn>
45 {
46 __device__ static auto activemask(warp::WarpUniformCudaHipBuiltIn const& /*warp*/)
47# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
48 -> std::uint32_t
49# else
50 -> std::uint64_t
51# endif
52 {
53# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
54 return __activemask();
55# else
56 // No HIP intrinsic for it, emulate via ballot
57 return __ballot(1);
58# endif
59 }
60 };
61
62 template<>
63 struct All<WarpUniformCudaHipBuiltIn>
64 {
65 __device__ static auto all(
66 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
67 std::int32_t predicate) -> std::int32_t
68 {
69# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
70 return __all_sync(0xffff'ffff, predicate);
71# else
72 return __all(predicate);
73# endif
74 }
75 };
76
77 template<>
78 struct Any<WarpUniformCudaHipBuiltIn>
79 {
80 __device__ static auto any(
81 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
82 std::int32_t predicate) -> std::int32_t
83 {
84# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
85 return __any_sync(0xffff'ffff, predicate);
86# else
87 return __any(predicate);
88# endif
89 }
90 };
91
92 template<>
93 struct Ballot<WarpUniformCudaHipBuiltIn>
94 {
95 __device__ static auto ballot(
96 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
97 std::int32_t predicate)
98 // return type is required by the compiler
99# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
100 -> std::uint32_t
101# else
102 -> std::uint64_t
103# endif
104 {
105# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
106 return __ballot_sync(0xffff'ffff, predicate);
107# else
108 return __ballot(predicate);
109# endif
110 }
111 };
112
113 template<>
114 struct Shfl<WarpUniformCudaHipBuiltIn>
115 {
116 template<typename T>
117 __device__ static auto shfl(
118 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
119 T val,
120 int srcLane,
121 std::int32_t width) -> T
122 {
123# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
124 return __shfl_sync(0xffff'ffff, val, srcLane, width);
125# else
126 return __shfl(val, srcLane, width);
127# endif
128 }
129 };
130
131 template<>
132 struct ShflUp<WarpUniformCudaHipBuiltIn>
133 {
134 template<typename T>
135 __device__ static auto shfl_up(
136 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
137 T val,
138 std::uint32_t offset,
139 std::int32_t width) -> T
140 {
141# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
142 return __shfl_up_sync(0xffff'ffff, val, offset, width);
143# else
144 return __shfl_up(val, offset, width);
145# endif
146 }
147 };
148
149 template<>
150 struct ShflDown<WarpUniformCudaHipBuiltIn>
151 {
152 template<typename T>
153 __device__ static auto shfl_down(
154 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
155 T val,
156 std::uint32_t offset,
157 std::int32_t width) -> T
158 {
159# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
160 return __shfl_down_sync(0xffff'ffff, val, offset, width);
161# else
162 return __shfl_down(val, offset, width);
163# endif
164 }
165 };
166
167 template<>
168 struct ShflXor<WarpUniformCudaHipBuiltIn>
169 {
170 template<typename T>
171 __device__ static auto shfl_xor(
172 [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,
173 T val,
174 std::int32_t mask,
175 std::int32_t width) -> T
176 {
177# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
178 return __shfl_xor_sync(0xffff'ffff, val, mask, width);
179# else
180 return __shfl_xor(val, mask, width);
181# endif
182 }
183 };
184
185 } // namespace trait
186# endif
187} // namespace alpaka::warp
188
189#endif
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:114
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate)
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
Definition Traits.hpp:164
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
Definition Traits.hpp:236
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> decltype(trait::Activemask< interface::ImplementationBase< ConceptWarp, TWarp > >::activemask(warp))
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
Definition Traits.hpp:90
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:137
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
Definition Traits.hpp:274
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
Definition Traits.hpp:65
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
Definition Traits.hpp:312
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.
Definition Traits.hpp:198
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15