27 #include <type_traits>
30 #ifdef ALPAKA_ACC_SYCL_ENABLED
33 # pragma clang diagnostic push
34 # pragma clang diagnostic ignored "-Wunused-lambda-capture"
35 # pragma clang diagnostic ignored "-Wunused-parameter"
38 # include <sycl/sycl.hpp>
40 # define LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(sub_group_size) \
42 sycl::nd_range<TDim::value>{global_size, local_size}, \
43 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
44 sycl::nd_item<TDim::value> work_item) [[intel::reqd_sub_group_size(sub_group_size)]] \
46 auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \
48 [k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
52 # define LAUNCH_SYCL_KERNEL_WITH_DEFAULT_SUBGROUP_SIZE \
54 sycl::nd_range<TDim::value>{global_size, local_size}, \
55 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
56 sycl::nd_item<TDim::value> work_item) \
58 auto acc = TAcc{item_elements, work_item, dyn_shared_accessor, st_shared_accessor}; \
60 [k_func, &acc](typename std::decay_t<TArgs> const&... args) { k_func(acc, args...); }, \
64 # define THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL \
65 throw sycl::exception(sycl::make_error_code(sycl::errc::kernel_not_supported)); \
67 sycl::nd_range<TDim::value>{global_size, local_size}, \
68 [item_elements, dyn_shared_accessor, st_shared_accessor, k_func, k_args]( \
69 sycl::nd_item<TDim::value> work_item) {});
74 template<
concepts::Tag TTag,
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
75 class TaskKernelGenericSycl final :
public WorkDivMembers<TDim, TIdx>
78 static_assert(TDim::value > 0 && TDim::value <= 3,
"Invalid kernel dimensionality");
80 template<
typename TWorkDiv>
81 TaskKernelGenericSycl(TWorkDiv&& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
83 , m_kernelFnObj{kernelFnObj}
84 , m_args{std::forward<TArgs>(args)...}
88 auto operator()(sycl::handler& cgh)
const ->
void
94 auto const global_size = get_global_size(work_groups, group_items);
95 auto const local_size = get_local_size(group_items);
98 auto const dyn_shared_mem_bytes =
std::max(
101 [&](std::decay_t<TArgs>
const&... args) {
102 return getBlockSharedMemDynSizeBytes<TAcc>(m_kernelFnObj, group_items, item_elements, args...);
106 auto dyn_shared_accessor = sycl::local_accessor<std::byte>{sycl::range<1>{dyn_shared_mem_bytes}, cgh};
110 auto st_shared_accessor = sycl::local_accessor<std::byte>{sycl::range<1>{st_shared_mem_bytes}, cgh};
113 auto k_func = m_kernelFnObj;
114 auto k_args = m_args;
116 constexpr std::size_t sub_group_size = trait::warpSize<TKernelFnObj, TAcc>;
117 bool supported =
false;
119 if constexpr(sub_group_size == 0)
122 LAUNCH_SYCL_KERNEL_WITH_DEFAULT_SUBGROUP_SIZE
127 # if(SYCL_SUBGROUP_SIZE == 0)
129 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(sub_group_size)
133 if constexpr(sub_group_size == 4)
135 # if(SYCL_SUBGROUP_SIZE & 4)
136 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(4)
140 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
145 if constexpr(sub_group_size == 8)
147 # if(SYCL_SUBGROUP_SIZE & 8)
148 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(8)
152 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
157 if constexpr(sub_group_size == 16)
159 # if(SYCL_SUBGROUP_SIZE & 16)
160 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(16)
164 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
169 if constexpr(sub_group_size == 32)
171 # if(SYCL_SUBGROUP_SIZE & 32)
172 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(32)
176 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
181 if constexpr(sub_group_size == 64)
183 # if(SYCL_SUBGROUP_SIZE & 64)
184 LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS(64)
188 THROW_AND_LAUNCH_EMPTY_SYCL_KERNEL
195 throw sycl::exception(sycl::make_error_code(sycl::errc::kernel_not_supported));
199 static constexpr
auto is_sycl_task =
true;
201 static constexpr
auto is_sycl_kernel =
true;
204 auto get_global_size(Vec<TDim, TIdx>
const& work_groups, Vec<TDim, TIdx>
const& group_items)
const
206 if constexpr(TDim::value == 1)
207 return sycl::range<1>{
static_cast<std::size_t
>(work_groups[0] * group_items[0])};
208 else if constexpr(TDim::value == 2)
209 return sycl::range<2>{
210 static_cast<std::size_t
>(work_groups[1] * group_items[1]),
211 static_cast<std::size_t
>(work_groups[0] * group_items[0])};
213 return sycl::range<3>{
214 static_cast<std::size_t
>(work_groups[2] * group_items[2]),
215 static_cast<std::size_t
>(work_groups[1] * group_items[1]),
216 static_cast<std::size_t
>(work_groups[0] * group_items[0])};
219 auto get_local_size(Vec<TDim, TIdx>
const& group_items)
const
221 if constexpr(TDim::value == 1)
222 return sycl::range<1>{
static_cast<std::size_t
>(group_items[0])};
223 else if constexpr(TDim::value == 2)
224 return sycl::range<2>{
225 static_cast<std::size_t
>(group_items[1]),
226 static_cast<std::size_t
>(group_items[0])};
228 return sycl::range<3>{
229 static_cast<std::size_t
>(group_items[2]),
230 static_cast<std::size_t
>(group_items[1]),
231 static_cast<std::size_t
>(group_items[0])};
235 TKernelFnObj m_kernelFnObj;
236 std::tuple<std::decay_t<TArgs>...> m_args;
241 # if BOOST_COMP_CLANG
242 # pragma clang diagnostic pop
248 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
249 struct AccType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
255 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
256 struct DevType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
258 using type =
typename DevType<TAcc>::type;
262 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
263 struct PlatformType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
265 using type =
typename PlatformType<TAcc>::type;
269 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
270 struct DimType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
276 template<
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
277 struct IdxType<TaskKernelGenericSycl<TAcc, TDim, TIdx, TKernelFnObj, TArgs...>>
289 template<
concepts::Tag TTag,
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
290 struct FunctionAttributes<AccGenericSycl<TTag, TDim, TIdx>, TDev, TKernelFn, TArgs...>
299 [[maybe_unused]] TKernelFn
const& kernelFn,
305 auto const& props = alpaka::getAccDevProps<AccGenericSycl<TTag, TDim, TIdx>>(dev);
306 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
307 return kernelFunctionAttributes;
312 # undef LAUNCH_SYCL_KERNEL_IF_SUBGROUP_SIZE_IS
#define ALPAKA_BLOCK_SHARED_DYN_MEMBER_ALLOC_KIB
Vec< TDim, TIdx > m_blockThreadExtent
Vec< TDim, TIdx > m_gridBlockExtent
Vec< TDim, TIdx > m_threadElemExtent
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
static ALPAKA_FN_HOST auto getFunctionAttributes([[maybe_unused]] TDev const &dev, [[maybe_unused]] TKernelFnObj const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes