20 #include <type_traits>
32 typename TKernelFnObj,
43 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
47 # pragma clang diagnostic push
48 # pragma clang diagnostic ignored \
59 # pragma clang diagnostic pop
62 template<
typename TDim,
typename... TArgs>
64 [[maybe_unused]] TKernelFnObj
const& kernelFnObj,
65 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& blockThreadExtent,
66 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& threadElemExtent,
67 [[maybe_unused]] TArgs
const&... args) -> std::size_t
77 template<
typename TAcc,
typename TDev,
typename TKernelFnObj,
typename... TArgs>
86 [[maybe_unused]] TDev
const& dev,
87 [[maybe_unused]] TKernelFnObj
const& kernelFn,
91 = std::string(__func__) +
" function is not specialised for the given arguments.\n";
92 throw std::invalid_argument{str};
102 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
103 struct WarpSize : std::integral_constant<std::uint32_t, 0>
108 template<
typename TKernelFnObj,
typename TAcc>
126 template<
typename TKernelFnObj,
typename TAcc,
typename TSfinae =
void>
131 struct TraitNotSpecialized
137 # pragma clang diagnostic push
138 # pragma clang diagnostic ignored \
149 # pragma clang diagnostic pop
152 template<
typename TDim,
typename... TArgs>
154 [[maybe_unused]] TKernelFnObj
const& kernelFnObj,
155 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& blockThreadExtent,
156 [[maybe_unused]]
Vec<TDim,
Idx<TAcc>>
const& threadElemExtent,
157 [[maybe_unused]] TArgs
const&... args) -> TraitNotSpecialized
159 return TraitNotSpecialized{};
165 # pragma clang diagnostic push
166 # pragma clang diagnostic ignored \
177 # pragma clang diagnostic pop
180 template<
typename TAcc,
typename TKernelFnObj,
typename TDim,
typename... TArgs>
182 TKernelFnObj
const& kernelFnObj,
185 TArgs
const&... args) -> std::size_t
203 template<
typename TAcc,
typename TDev,
typename TKernelFnObj,
typename... TArgs>
210 std::forward<TArgs>(args)...);
214 # pragma clang diagnostic push
215 # pragma clang diagnostic ignored \
226 # pragma clang diagnostic pop
228 template<
typename TAcc,
typename TKernelFnObj,
typename TDim,
typename... TArgs>
230 TKernelFnObj
const& kernelFnObj,
233 TArgs
const&... args)
243 # pragma clang diagnostic push
244 # pragma clang diagnostic ignored \
260 template<
typename T,
typename =
void>
262 : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
274 template<
typename TAcc,
typename TSfinae =
void>
277 template<
typename TKernelFnObj,
typename... TArgs>
280 using Result = std::invoke_result_t<TKernelFnObj, TAcc
const&, TArgs
const&...>;
281 static_assert(std::is_same_v<Result, void>,
"The TKernelFnObj is required to return void!");
290 static_assert(isKernelArgumentTriviallyCopyable<T>,
"The kernel argument T must be trivially copyable!");
304 template<
typename T,
typename =
void>
307 : std::bool_constant<
308 std::is_trivially_copyable_v<T> || __nv_is_extended_device_lambda_closure_type(T)
309 || __nv_is_extended_host_device_lambda_closure_type(T)>
311 : std::is_trivially_copyable<T>
329 # pragma clang diagnostic pop
331 template<
typename TAcc,
typename TWorkDiv,
typename TKernelFnObj,
typename... TArgs>
339 isKernelTriviallyCopyable<TKernelFnObj>,
340 "Kernels must be trivially copyable or an extended CUDA lambda expression!");
342 static_assert(isKernelTriviallyCopyable<TKernelFnObj>,
"Kernels must be trivially copyable!");
344 (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
347 "The dimensions of TAcc and TWorkDiv have to be identical!");
350 "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
352 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
353 std::cout << __func__ <<
" workDiv: " << workDiv <<
", kernelFnObj: " <<
core::demangled<decltype(kernelFnObj)>
359 std::forward<TArgs>(args)...);
363 # pragma clang diagnostic push
364 # pragma clang diagnostic ignored \
375 # pragma clang diagnostic pop
377 template<
typename TAcc,
typename TQueue,
typename TWorkDiv,
typename TKernelFnObj,
typename... TArgs>
378 ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv
const& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
381 enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
#define ALPAKA_FN_HOST_ACC
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
const std::string demangled
void assertKernelArgIsTriviallyCopyable()
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
constexpr bool isKernelTriviallyCopyable
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args) -> std::size_t
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> alpaka::KernelFunctionAttributes
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
constexpr bool isKernelArgumentTriviallyCopyable
Check if a type used as kernel argument is trivially copyable.
Check if the kernel type is trivially copyable.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
Check that the return of TKernelFnObj is void.
void operator()(TKernelFnObj const &, TArgs const &...)
The trait for getting the size of the block shared dynamic memory of a kernel.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> std::size_t
The kernel execution task creation trait.
The structure template to access to the functions attributes of a kernel function object.
static ALPAKA_FN_HOST auto getFunctionAttributes([[maybe_unused]] TDev const &dev, [[maybe_unused]] TKernelFnObj const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> TraitNotSpecialized
The trait for getting the warp size required by a kernel.