18#ifdef ALPAKA_ACC_SYCL_ENABLED
20# include <sycl/sycl.hpp>
25 template<
typename TDim>
26 class WarpGenericSycl :
public interface::Implements<alpaka::warp::ConceptWarp, WarpGenericSycl<TDim>>
29 WarpGenericSycl(sycl::nd_item<TDim::value> my_item) : m_item_warp{my_item}
33 sycl::nd_item<TDim::value> m_item_warp;
39 template<
typename TDim>
40 struct GetSize<warp::WarpGenericSycl<TDim>>
42 static auto getSize(warp::WarpGenericSycl<TDim>
const& warp) -> std::int32_t
44 auto const sub_group = warp.m_item_warp.get_sub_group();
46 return static_cast<std::int32_t
>(sub_group.get_max_local_range()[0]);
50 template<
typename TDim>
51 struct GetSizeCompileTime<warp::WarpGenericSycl<TDim>>
60 template<
typename TDim>
61 struct GetSizeUpperLimit<warp::WarpGenericSycl<TDim>>
70 template<
typename TDim>
71 struct Activemask<warp::WarpGenericSycl<TDim>>
76 static auto activemask(warp::WarpGenericSycl<TDim>
const& ) -> std::uint32_t
78 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
79 auto const mask = sycl::ext::oneapi::group_ballot(sg,
true);
80 std::uint32_t bits = 0;
81 mask.extract_bits(bits);
86 template<
typename TDim>
87 struct All<warp::WarpGenericSycl<TDim>>
89 static auto all(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate) -> std::int32_t
91 auto activegroup = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
92 return static_cast<std::int32_t
>(sycl::all_of_group(activegroup,
static_cast<bool>(predicate)));
96 template<
typename TDim>
97 struct Any<warp::WarpGenericSycl<TDim>>
99 static auto any(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate) -> std::int32_t
101 auto activegroup = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
102 return static_cast<std::int32_t
>(sycl::any_of_group(activegroup,
static_cast<bool>(predicate)));
106 template<
typename TDim>
107 struct Ballot<warp::WarpGenericSycl<TDim>>
112 static auto ballot(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate) -> std::uint32_t
114 auto sub_group = sycl::ext::oneapi::this_work_item::get_sub_group();
115 auto const mask = sycl::ext::oneapi::group_ballot(sub_group,
static_cast<bool>(predicate));
119 std::uint32_t bits = 0;
120 mask.extract_bits(bits);
125 template<
typename TDim>
126 struct Shfl<warp::WarpGenericSycl<TDim>>
130 warp::WarpGenericSycl<TDim>
const& ,
132 std::int32_t srcLane,
144 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
145 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
146 std::uint32_t
const start_index = actual_group.get_local_linear_id() / w * w;
147 return sycl::select_from_group(actual_group, value, start_index +
static_cast<std::uint32_t
>(srcLane) % w);
151 template<
typename TDim>
152 struct ShflUp<warp::WarpGenericSycl<TDim>>
156 warp::WarpGenericSycl<TDim>
const& ,
158 std::uint32_t offset,
161 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
162 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
163 std::uint32_t
const id = actual_group.get_local_linear_id();
164 std::uint32_t
const start_index =
id / w * w;
165 T result = sycl::shift_group_right(actual_group, value, offset);
166 if((
id - start_index) < offset)
174 template<
typename TDim>
175 struct ShflDown<warp::WarpGenericSycl<TDim>>
179 warp::WarpGenericSycl<TDim>
const& ,
181 std::uint32_t offset,
184 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
185 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
186 std::uint32_t
const id = actual_group.get_local_linear_id();
187 std::uint32_t
const end_index = (
id / w + 1) * w;
188 T result = sycl::shift_group_left(actual_group, value, offset);
189 if((
id + offset) >= end_index)
197 template<
typename TDim>
198 struct ShflXor<warp::WarpGenericSycl<TDim>>
202 warp::WarpGenericSycl<TDim>
const& ,
207 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
208 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
209 std::uint32_t
const id = actual_group.get_local_linear_id();
210 std::uint32_t
const start_index =
id / w * w;
211 std::uint32_t
const target_offset = (
id % w) ^
static_cast<std::uint32_t
>(mask);
212 return sycl::select_from_group(actual_group, value, target_offset < w ? start_index + target_offset : id);
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate)
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> decltype(trait::Activemask< interface::ImplementationBase< ConceptWarp, TWarp > >::activemask(warp))
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC constexpr auto getSizeUpperLimit() -> std::int32_t
If the warp size is available as a compile-time constant returns its value; otherwise returns an uppe...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC constexpr auto getSizeCompileTime() -> std::int32_t
If the warp size is available as a compile-time constant returns its value; otherwise returns 0.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.