18 #ifdef ALPAKA_ACC_SYCL_ENABLED
20 # include <sycl/sycl.hpp>
25 template<
typename TDim>
26 class WarpGenericSycl :
public interface::Implements<alpaka::warp::ConceptWarp, WarpGenericSycl<TDim>>
29 WarpGenericSycl(sycl::nd_item<TDim::value> my_item) : m_item_warp{my_item}
33 sycl::nd_item<TDim::value> m_item_warp;
39 template<
typename TDim>
40 struct GetSize<warp::WarpGenericSycl<TDim>>
42 static auto getSize(warp::WarpGenericSycl<TDim>
const& warp) -> std::int32_t
44 auto const sub_group = warp.m_item_warp.get_sub_group();
46 return static_cast<std::int32_t
>(sub_group.get_max_local_range()[0]);
50 template<
typename TDim>
51 struct Activemask<warp::WarpGenericSycl<TDim>>
56 static auto activemask(warp::WarpGenericSycl<TDim>
const& warp) -> std::uint32_t
58 static_assert(!
sizeof(warp),
"activemask is not supported on SYCL");
73 return ~std::uint32_t{0};
77 template<
typename TDim>
78 struct All<warp::WarpGenericSycl<TDim>>
80 static auto all(warp::WarpGenericSycl<TDim>
const& warp, std::int32_t predicate) -> std::int32_t
82 auto const sub_group = warp.m_item_warp.get_sub_group();
83 return static_cast<std::int32_t
>(sycl::all_of_group(sub_group,
static_cast<bool>(predicate)));
87 template<
typename TDim>
88 struct Any<warp::WarpGenericSycl<TDim>>
90 static auto any(warp::WarpGenericSycl<TDim>
const& warp, std::int32_t predicate) -> std::int32_t
92 auto const sub_group = warp.m_item_warp.get_sub_group();
93 return static_cast<std::int32_t
>(sycl::any_of_group(sub_group,
static_cast<bool>(predicate)));
97 template<
typename TDim>
98 struct Ballot<warp::WarpGenericSycl<TDim>>
103 static auto ballot(warp::WarpGenericSycl<TDim>
const& warp, std::int32_t predicate) -> std::uint32_t
105 auto const sub_group = warp.m_item_warp.get_sub_group();
106 auto const mask = sycl::ext::oneapi::group_ballot(sub_group,
static_cast<bool>(predicate));
110 std::uint32_t bits = 0;
111 mask.extract_bits(bits);
116 template<
typename TDim>
117 struct Shfl<warp::WarpGenericSycl<TDim>>
120 static auto shfl(warp::WarpGenericSycl<TDim>
const& warp, T value, std::int32_t srcLane, std::int32_t width)
131 auto const actual_group = warp.m_item_warp.get_sub_group();
132 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
133 std::uint32_t
const start_index = actual_group.get_local_linear_id() / w * w;
134 return sycl::select_from_group(actual_group, value, start_index +
static_cast<std::uint32_t
>(srcLane) % w);
138 template<
typename TDim>
139 struct ShflUp<warp::WarpGenericSycl<TDim>>
143 warp::WarpGenericSycl<TDim>
const& warp,
148 auto const actual_group = warp.m_item_warp.get_sub_group();
149 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
150 std::uint32_t
const id = actual_group.get_local_linear_id();
151 std::uint32_t
const start_index =
id / w * w;
152 T result = sycl::shift_group_right(actual_group, value,
offset);
153 if((
id - start_index) <
offset)
161 template<
typename TDim>
162 struct ShflDown<warp::WarpGenericSycl<TDim>>
166 warp::WarpGenericSycl<TDim>
const& warp,
171 auto const actual_group = warp.m_item_warp.get_sub_group();
172 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
173 std::uint32_t
const id = actual_group.get_local_linear_id();
174 std::uint32_t
const end_index = (
id / w + 1) * w;
175 T result = sycl::shift_group_left(actual_group, value,
offset);
176 if((
id +
offset) >= end_index)
184 template<
typename TDim>
185 struct ShflXor<warp::WarpGenericSycl<TDim>>
188 static auto shfl_xor(warp::WarpGenericSycl<TDim>
const& warp, T value, std::int32_t mask, std::int32_t width)
190 auto const actual_group = warp.m_item_warp.get_sub_group();
191 std::uint32_t
const w =
static_cast<std::uint32_t
>(width);
192 std::uint32_t
const id = actual_group.get_local_linear_id();
193 std::uint32_t
const start_index =
id / w * w;
194 std::uint32_t
const target_offset = (
id % w) ^
static_cast<std::uint32_t
>(mask);
195 return sycl::select_from_group(actual_group, value, target_offset < w ? start_index + target_offset :
id);
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate)
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> decltype(trait::Activemask< interface::ImplementationBase< ConceptWarp, TWarp >>::activemask(warp))
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.