25 #include <type_traits>
28 #ifdef ALPAKA_ACC_SYCL_ENABLED
31 # pragma clang diagnostic push
32 # pragma clang diagnostic ignored "-Wunused-lambda-capture"
33 # pragma clang diagnostic ignored "-Wunused-parameter"
36 # include <sycl/sycl.hpp>
38 # define LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(sub_group_size) \
40 sycl::nd_range<TDim::value>{global_size, local_size}, \
41 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
42 sycl::nd_item<TDim::value> work_item) [[intel::reqd_sub_group_size(sub_group_size)]] \
44 auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \
46 [k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
50 # define LAUNCH_SYCL_KERNEL_WITH_DEFAULT_SUBGROUP_SIZE \
52 sycl::nd_range<TDim::value>{global_size, local_size}, \
53 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
54 sycl::nd_item<TDim::value> work_item) \
56 auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \
58 [k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
62 # define THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL \
63 throw sycl::exception(sycl::make_error_code(sycl::errc::kernel_not_supported)); \
65 sycl::nd_range<TDim::value>{global_size, local_size}, \
66 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
67 sycl::nd_item<TDim::value> work_item) {});
72 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
73 class TaskKernelGenericSycl final :
public WorkDivMembers<TDim, TIdx>
76 static_assert(TDim::value > 0 && TDim::value <= 3,
"Invalid kernel dimensionality");
78 template<
typename TWorkDiv>
79 TaskKernelGenericSycl(TWorkDiv&& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
81 , m_kernelFnObj{kernelFnObj}
82 , m_args{std::forward<TArgs>(args)...}
86 auto operator()(sycl::handler& cgh)
const ->
void
92 auto const global_size = get_global_size(work_groups, group_items);
93 auto const local_size = get_local_size(group_items);
96 auto const dyn_shared_mem_bytes =
std::max(
99 [&](std::decay_t<TArgs>
const&... args) {
100 return getBlockSharedMemDynSizeBytes<TAcc>(m_kernelFnObj, group_items, item_elements, args...);
104 auto dyn_shared_accessor = sycl::local_accessor<std::byte>{sycl::range<1>{dyn_shared_mem_bytes}, cgh};
108 auto st_shared_accessor = sycl::local_accessor<std::byte>{sycl::range<1>{st_shared_mem_bytes}, cgh};
111 auto k_func = m_kernelFnObj;
112 auto k_args = m_args;
114 constexpr std::size_t sub_group_size = trait::warpSize<TKernelFnObj, TAcc>;
115 bool supported =
false;
117 if constexpr(sub_group_size == 0)
120 LAUNCH_SYCL_KERNEL_WITH_DEFAULT_SUBGROUP_SIZE
125 # if(SYCL_SUBGROUP_SIZE == 0)
127 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(sub_group_size)
131 if constexpr(sub_group_size == 4)
133 # if(SYCL_SUBGROUP_SIZE & 4)
134 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(4)
138 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
143 if constexpr(sub_group_size == 8)
145 # if(SYCL_SUBGROUP_SIZE & 8)
146 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(8)
150 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
155 if constexpr(sub_group_size == 16)
157 # if(SYCL_SUBGROUP_SIZE & 16)
158 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(16)
162 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
167 if constexpr(sub_group_size == 32)
169 # if(SYCL_SUBGROUP_SIZE & 32)
170 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(32)
174 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
179 if constexpr(sub_group_size == 64)
181 # if(SYCL_SUBGROUP_SIZE & 64)
182 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(64)
186 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
193 throw sycl::exception(sycl::make_error_code(sycl::errc::kernel_not_supported));
197 static constexpr
auto is_sycl_task =
true;
199 static constexpr
auto is_sycl_kernel =
true;
202 auto get_global_size(Vec<TDim, TIdx>
const& work_groups, Vec<TDim, TIdx>
const& group_items)
const
204 if constexpr(TDim::value == 1)
205 return sycl::range<1>{
static_cast<std::size_t
>(work_groups[0] * group_items[0])};
206 else if constexpr(TDim::value == 2)
207 return sycl::range<2>{
208 static_cast<std::size_t
>(work_groups[1] * group_items[1]),
209 static_cast<std::size_t
>(work_groups[0] * group_items[0])};
211 return sycl::range<3>{
212 static_cast<std::size_t
>(work_groups[2] * group_items[2]),
213 static_cast<std::size_t
>(work_groups[1] * group_items[1]),
214 static_cast<std::size_t
>(work_groups[0] * group_items[0])};
217 auto get_local_size(Vec<TDim, TIdx>
const& group_items)
const
219 if constexpr(TDim::value == 1)
220 return sycl::range<1>{
static_cast<std::size_t
>(group_items[0])};
221 else if constexpr(TDim::value == 2)
222 return sycl::range<2>{
223 static_cast<std::size_t
>(group_items[1]),
224 static_cast<std::size_t
>(group_items[0])};
226 return sycl::range<3>{
227 static_cast<std::size_t
>(group_items[2]),
228 static_cast<std::size_t
>(group_items[1]),
229 static_cast<std::size_t
>(group_items[0])};
233 TKernelFnObj m_kernelFnObj;
234 std::tuple<std::decay_t<TArgs>...> m_args;
239 # if BOOST_COMP_CLANG
240 # pragma clang diagnostic pop
246 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
247 struct AccType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
253 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
254 struct DevType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
256 using type =
typename DevType<TAcc>::type;
260 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
261 struct PlatformType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
263 using type =
typename PlatformType<TAcc>::type;
267 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
268 struct DimType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
274 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
275 struct IdxType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
281 # undef LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS
#define ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB
Vec< TDim, TIdx > m_blockThreadExtent
Vec< TDim, TIdx > m_gridBlockExtent
Vec< TDim, TIdx > m_threadElemExtent
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.