19 #include <type_traits>
31 typename TKernelFnObj,
42 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
46 # pragma clang diagnostic push
47 # pragma clang diagnostic ignored \
58 # pragma clang diagnostic pop
61 template<
typename TDim,
typename... TArgs>
63 [[maybe_unused]] TKernelFnObj
const& kernelFnObj,
64 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& blockThreadExtent,
65 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& threadElemExtent,
66 [[maybe_unused]] TArgs
const&... args) -> std::size_t
78 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
79 struct WarpSize : std::integral_constant<std::uint32_t, 0>
84 template<
typename TKernelFnObj,
typename TAcc>
102 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
107 struct TraitNotSpecialized
113 # pragma clang diagnostic push
114 # pragma clang diagnostic ignored \
125 # pragma clang diagnostic pop
128 template<
typename TDim,
typename... TArgs>
130 [[maybe_unused]] TKernelFnObj
const& kernelFnObj,
131 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& blockThreadExtent,
132 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& threadElemExtent,
133 [[maybe_unused]] TArgs
const&... args) -> TraitNotSpecialized
135 return TraitNotSpecialized{};
141 # pragma clang diagnostic push
142 # pragma clang diagnostic ignored \
153 # pragma clang diagnostic pop
156 template<
typename TAcc,
typename TKernelFnObj,
typename TDim,
typename... TArgs>
158 TKernelFnObj
const& kernelFnObj,
161 TArgs
const&... args) -> std::size_t
171 # pragma clang diagnostic push
172 # pragma clang diagnostic ignored \
183 # pragma clang diagnostic pop
185 template<
typename TAcc,
typename TKernelFnObj,
typename TDim,
typename... TArgs>
187 TKernelFnObj
const& kernelFnObj,
190 TArgs
const&... args)
200 # pragma clang diagnostic push
201 # pragma clang diagnostic ignored \
217 template<
typename T,
typename =
void>
219 : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
231 template<
typename TAcc,
typename TSfinae =
void>
234 template<
typename TKernelFnObj,
typename... TArgs>
237 using Result = std::invoke_result_t<TKernelFnObj, TAcc
const&, TArgs
const&...>;
238 static_assert(std::is_same_v<Result, void>,
"The TKernelFnObj is required to return void!");
247 static_assert(isKernelArgumentTriviallyCopyable<T>,
"The kernel argument T must be trivially copyable!");
259 # pragma clang diagnostic pop
261 template<
typename TAcc,
typename TWorkDiv,
typename TKernelFnObj,
typename... TArgs>
269 std::is_trivially_copyable_v<TKernelFnObj> || __nv_is_extended_device_lambda_closure_type(TKernelFnObj)
270 || __nv_is_extended_host_device_lambda_closure_type(TKernelFnObj),
271 "Kernels must be trivially copyable or an extended CUDA lambda expression!");
273 static_assert(std::is_trivially_copyable_v<TKernelFnObj>,
"Kernels must be trivially copyable!");
275 (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
278 "The dimensions of TAcc and TWorkDiv have to be identical!");
281 "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
283 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
284 std::cout << __func__ <<
" workDiv: " << workDiv <<
", kernelFnObj: " <<
core::demangled<decltype(kernelFnObj)>
290 std::forward<TArgs>(args)...);
294 # pragma clang diagnostic push
295 # pragma clang diagnostic ignored \
306 # pragma clang diagnostic pop
308 template<
typename TAcc,
typename TQueue,
typename TWorkDiv,
typename TKernelFnObj,
typename... TArgs>
309 ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv
const& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
312 enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
#define ALPAKA_FN_HOST_ACC
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
const std::string demangled
void assertKernelArgIsTriviallyCopyable()
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args) -> std::size_t
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
constexpr bool isKernelArgumentTriviallyCopyable
Check if a type used as kernel argument is trivially copyable.
Check that the return of TKernelFnObj is void.
void operator()(TKernelFnObj const &, TArgs const &...)
The trait for getting the size of the block shared dynamic memory of a kernel.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> std::size_t
The kernel execution task creation trait.
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> TraitNotSpecialized
The trait for getting the warp size required by a kernel.