18#ifdef ALPAKA_ACC_SYCL_ENABLED
20# include <sycl/sycl.hpp>
25 template<
typename TDim>
26 class WarpGenericSycl :
public interface::Implements<alpaka::warp::ConceptWarp, WarpGenericSycl<TDim>>
29 using mask_type = std::uint32_t;
31 WarpGenericSycl(sycl::nd_item<TDim::value> my_item) : m_item_warp{my_item}
35 sycl::nd_item<TDim::value> m_item_warp;
42 template<
typename TDim>
43 struct GetSize<warp::WarpGenericSycl<TDim>>
45 static auto getSize(warp::WarpGenericSycl<TDim>
const& warp) -> std::int32_t
47 auto const sub_group = warp.m_item_warp.get_sub_group();
49 return static_cast<std::int32_t
>(sub_group.get_max_local_range()[0]);
53 template<
typename TDim>
54 struct GetSizeCompileTime<warp::WarpGenericSycl<TDim>>
63 template<
typename TDim>
64 struct GetSizeUpperLimit<warp::WarpGenericSycl<TDim>>
73 template<
typename TDim>
74 struct Activemask<warp::WarpGenericSycl<TDim>>
79 static auto activemask(warp::WarpGenericSycl<TDim>
const& ) -> warp::WarpGenericSycl<TDim>::mask_type
81 sycl::sub_group sg = sycl::ext::oneapi::this_work_item::get_sub_group();
82 auto const mask = sycl::ext::oneapi::group_ballot(sg,
true);
83 std::uint32_t bits = 0;
84 mask.extract_bits(bits);
89 template<
typename TDim>
90 struct All<warp::WarpGenericSycl<TDim>>
92 static auto all(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate) -> std::int32_t
94 auto activegroup = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
95 return static_cast<std::int32_t
>(sycl::all_of_group(activegroup,
static_cast<bool>(predicate)));
99 template<
typename TDim>
100 struct Any<warp::WarpGenericSycl<TDim>>
102 static auto any(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate) -> std::int32_t
104 auto activegroup = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
105 return static_cast<std::int32_t
>(sycl::any_of_group(activegroup,
static_cast<bool>(predicate)));
109 template<
typename TDim>
110 struct Ballot<warp::WarpGenericSycl<TDim>>
115 static auto ballot(warp::WarpGenericSycl<TDim>
const& , std::int32_t predicate)
116 -> warp::WarpGenericSycl<TDim>::mask_type
118 auto sub_group = sycl::ext::oneapi::this_work_item::get_sub_group();
119 auto const mask = sycl::ext::oneapi::group_ballot(sub_group,
static_cast<bool>(predicate));
123 std::uint32_t bits = 0;
124 mask.extract_bits(bits);
129 template<
typename TDim>
130 struct Shfl<warp::WarpGenericSycl<TDim>>
134 warp::WarpGenericSycl<TDim>
const& ,
136 std::int32_t srcLane,
148 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
149 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
150 std::uint32_t
const start_index = actual_group.get_local_linear_id() / w * w;
151 return sycl::select_from_group(actual_group, value, start_index +
static_cast<std::uint32_t
>(srcLane) % w);
155 template<
typename TDim>
156 struct ShflUp<warp::WarpGenericSycl<TDim>>
160 warp::WarpGenericSycl<TDim>
const& ,
162 std::uint32_t offset,
165 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
166 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
167 std::uint32_t
const id = actual_group.get_local_linear_id();
168 std::uint32_t
const start_index =
id / w * w;
169 T result = sycl::shift_group_right(actual_group, value, offset);
170 if((
id - start_index) < offset)
178 template<
typename TDim>
179 struct ShflDown<warp::WarpGenericSycl<TDim>>
183 warp::WarpGenericSycl<TDim>
const& ,
185 std::uint32_t offset,
188 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
189 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
190 std::uint32_t
const id = actual_group.get_local_linear_id();
191 std::uint32_t
const end_index = (
id / w + 1) * w;
192 T result = sycl::shift_group_left(actual_group, value, offset);
193 if((
id + offset) >= end_index)
201 template<
typename TDim>
202 struct ShflXor<warp::WarpGenericSycl<TDim>>
206 warp::WarpGenericSycl<TDim>
const& ,
211 auto actual_group = sycl::ext::oneapi::experimental::this_kernel::get_opportunistic_group();
212 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
213 std::uint32_t
const id = actual_group.get_local_linear_id();
214 std::uint32_t
const start_index =
id / w * w;
215 std::uint32_t
const target_offset = (
id % w) ^
static_cast<std::uint32_t
>(mask);
216 return sycl::select_from_group(actual_group, value, target_offset < w ? start_index + target_offset : id);
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate) -> typename TWarp::mask_type
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC constexpr auto getSizeUpperLimit() -> std::int32_t
If the warp size is available as a compile-time constant returns its value; otherwise returns an uppe...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> typename TWarp::mask_type
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC constexpr auto getSizeCompileTime() -> std::int32_t
If the warp size is available as a compile-time constant returns its value; otherwise returns 0.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.