alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Traits.hpp
Go to the documentation of this file.
1/* Copyright 2023 Axel Huebl, Benjamin Worpitz, René Widera, Sergei Bastrakov, Jan Stephan, Bernhard Manfred Gruber,
2 * Andrea Bocci, Aurora Perego, Mehmet Yusufoglu
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
8#include "alpaka/acc/Tag.hpp"
12#include "alpaka/core/Debug.hpp"
15#include "alpaka/dim/Traits.hpp"
16#include "alpaka/idx/Traits.hpp"
19#include "alpaka/vec/Vec.hpp"
21
22#include <type_traits>
23
24//! The alpaka accelerator library.
25namespace alpaka
26{
27 //! The kernel traits.
28 namespace trait
29 {
30 //! The kernel execution task creation trait.
31 template<
32 typename TAcc,
33 typename TWorkDiv,
34 typename TKernelFnObj,
35 typename... TArgs/*,
36 typename TSfinae = void*/>
38
39 //! The trait for getting the size of the block shared dynamic memory of a kernel.
40 //!
41 //! \tparam TKernelFnObj The kernel function object.
42 //! \tparam TAcc The accelerator.
43 //!
44 //! The default implementation returns 0.
45 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
47 {
48#if ALPAKA_COMP_CLANG
49# pragma clang diagnostic push
50# pragma clang diagnostic ignored \
51 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
52#endif
53 //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
54 //! \param blockThreadExtent The block thread extent.
55 //! \param threadElemExtent The thread element extent.
56 //! \tparam TArgs The kernel invocation argument types pack.
57 //! \param args,... The kernel invocation arguments.
58 //! \return The size of the shared memory allocated for a block in bytes.
59 //! The default version always returns zero.
60#if ALPAKA_COMP_CLANG
61# pragma clang diagnostic pop
62#endif
64 template<typename TDim, typename... TArgs>
66 [[maybe_unused]] TKernelFnObj const& kernelFnObj,
67 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
68 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
69 [[maybe_unused]] TArgs const&... args) -> std::size_t
70 {
71 return 0u;
72 }
73 };
74
75 //! \brief The structure template to access to the functions attributes of a kernel function object.
76 //! \tparam TAcc The accelerator type
77 //! \tparam TKernelFnObj Kernel function object type.
78 //! \tparam TArgs Kernel function object argument types as a parameter pack.
79 template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
81 {
82 //! \param dev The device instance
83 //! \param kernelFn The kernel function object which should be executed.
84 //! \param args The kernel invocation arguments.
85 //! \return KernelFunctionAttributes data structure instance. The default version always returns the
86 //! instance with fields which are set to zero.
88 [[maybe_unused]] TDev const& dev,
89 [[maybe_unused]] TKernelFnObj const& kernelFn,
90 [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
91 {
92 std::string const str
93 = std::string(__func__) + " function is not specialised for the given arguments.\n";
94 throw std::invalid_argument{str};
95 }
96 };
97
98 //! The trait for getting the warp size required by a kernel.
99 //!
100 //! \tparam TKernelFnObj The kernel function object.
101 //! \tparam TAcc The accelerator.
102 //!
103 //! The default implementation returns 0, which lets the accelerator compiler and runtime choose the warp size.
104 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
105 struct WarpSize : std::integral_constant<std::uint32_t, 0>
106 {
107 };
108
109 //! This is a shortcut for the trait defined above
110 template<typename TKernelFnObj, typename TAcc>
111 inline constexpr std::uint32_t warpSize = WarpSize<TKernelFnObj, TAcc>::value;
112
113 //! The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
114 //!
115 //! Has no effect on other accelerators.
116 //!
117 //! A user could either specialize this trait for their kernel, or define a public static member
118 //! ompScheduleKind of type alpaka::omp::Schedule, and additionally also int member ompScheduleChunkSize. In
119 //! the latter case, alpaka never odr-uses these members.
120 //!
121 //! In case schedule kind and chunk size are compile-time constants, setting then inside kernel may benefit
122 //! performance.
123 //!
124 //! \tparam TKernelFnObj The kernel function object.
125 //! \tparam TAcc The accelerator.
126 //!
127 //! The default implementation behaves as if the trait was not specialized.
128 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
130 {
131 private:
132 //! Type returned when the trait is not specialized
133 struct TraitNotSpecialized
134 {
135 };
136
137 public:
138#if ALPAKA_COMP_CLANG
139# pragma clang diagnostic push
140# pragma clang diagnostic ignored \
141 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
142#endif
143 //! \param kernelFnObj The kernel object for which the schedule should be returned.
144 //! \param blockThreadExtent The block thread extent.
145 //! \param threadElemExtent The thread element extent.
146 //! \tparam TArgs The kernel invocation argument types pack.
147 //! \param args,... The kernel invocation arguments.
148 //! \return The OpenMP schedule information as an alpaka::omp::Schedule object,
149 //! returning an object of any other type is treated as if the trait is not specialized.
150#if ALPAKA_COMP_CLANG
151# pragma clang diagnostic pop
152#endif
154 template<typename TDim, typename... TArgs>
156 [[maybe_unused]] TKernelFnObj const& kernelFnObj,
157 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
158 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
159 [[maybe_unused]] TArgs const&... args) -> TraitNotSpecialized
160 {
161 return TraitNotSpecialized{};
162 }
163 };
164 } // namespace trait
165
166#if ALPAKA_COMP_CLANG
167# pragma clang diagnostic push
168# pragma clang diagnostic ignored \
169 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
170#endif
171//! \tparam TAcc The accelerator type.
172//! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
173//! \param blockThreadExtent The block thread extent.
174//! \param threadElemExtent The thread element extent.
175//! \param args,... The kernel invocation arguments.
176//! \return The size of the shared memory allocated for a block in bytes.
177//! The default implementation always returns zero.
178#if ALPAKA_COMP_CLANG
179# pragma clang diagnostic pop
180#endif
182 template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
184 TKernelFnObj const& kernelFnObj,
185 Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
186 Vec<TDim, Idx<TAcc>> const& threadElemExtent,
187 TArgs const&... args) -> std::size_t
188 {
190 kernelFnObj,
191 blockThreadExtent,
192 threadElemExtent,
193 args...);
194 }
195
196 //! \tparam TAcc The accelerator type.
197 //! \tparam TDev The device type.
198 //! \param dev The device instance
199 //! \param kernelFnObj The kernel function object which should be executed.
200 //! \param args The kernel invocation arguments.
201 //! \return KernelFunctionAttributes instance. Instance is filled with values returned by the accelerator API
202 //! depending on the specific kernel. The default version always returns the instance with fields which are set to
203 //! zero.
205 template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
206 ALPAKA_FN_HOST auto getFunctionAttributes(TDev const& dev, TKernelFnObj const& kernelFnObj, TArgs&&... args)
208 {
210 dev,
211 kernelFnObj,
212 std::forward<TArgs>(args)...);
213 }
214
215#if ALPAKA_COMP_CLANG
216# pragma clang diagnostic push
217# pragma clang diagnostic ignored \
218 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
219#endif
220//! \tparam TAcc The accelerator type.
221//! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
222//! \param blockThreadExtent The block thread extent.
223//! \param threadElemExtent The thread element extent.
224//! \param args,... The kernel invocation arguments.
225//! \return The OpenMP schedule information as an alpaka::omp::Schedule object if the kernel specialized the
226//! OmpSchedule trait, an object of another type if the kernel didn't specialize the trait.
227#if ALPAKA_COMP_CLANG
228# pragma clang diagnostic pop
229#endif
230 template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
232 TKernelFnObj const& kernelFnObj,
233 Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
234 Vec<TDim, Idx<TAcc>> const& threadElemExtent,
235 TArgs const&... args)
236 {
238 kernelFnObj,
239 blockThreadExtent,
240 threadElemExtent,
241 args...);
242 }
243
244#if ALPAKA_COMP_CLANG
245# pragma clang diagnostic push
246# pragma clang diagnostic ignored \
247 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
248#endif
249
250
251 //! Check if a type used as kernel argument is trivially copyable
252 //!
253 //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
254 //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
255 //! of side effects.
256 //!
257 //! It's implementation defined whether the closure type of a lambda is trivially copyable.
258 //! Therefor the default implementation is true for trivially copyable or empty (stateless) types.
259 //!
260 //! @tparam T type to check
261 //! @{
262 template<typename T, typename = void>
264 : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
265 {
266 };
267
268 template<typename T>
270
271 //! @}
272
273 namespace detail
274 {
275 //! Check that the return of TKernelFnObj is void
276 template<typename TAcc, typename TSfinae = void>
278 {
279 template<typename TKernelFnObj, typename... TArgs>
280 void operator()(TKernelFnObj const&, TArgs const&...)
281 {
282 using Result = std::invoke_result_t<TKernelFnObj, TAcc const&, TArgs const&...>;
283 static_assert(std::is_same_v<Result, void>, "The TKernelFnObj is required to return void!");
284 }
285 };
286
287 // asserts that T is trivially copyable. We put this in a separate function so we can see which T would fail
288 // the test, when called from a fold expression.
289 template<typename T>
291 {
292 static_assert(isKernelArgumentTriviallyCopyable<T>, "The kernel argument T must be trivially copyable!");
293 }
294 } // namespace detail
295
296 //! Check if the kernel type is trivially copyable
297 //!
298 //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
299 //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
300 //! of side effects.
301 //!
302 //! The default implementation is true for trivially copyable types (or for extended lambda expressions for CUDA).
303 //!
304 //! @tparam T type to check
305 //! @{
306 template<typename T, typename = void>
309 : std::bool_constant<
310 std::is_trivially_copyable_v<T> || __nv_is_extended_device_lambda_closure_type(T)
311 || __nv_is_extended_host_device_lambda_closure_type(T)>
312#else
313 : std::is_trivially_copyable<T>
314#endif
315 {
316 };
317
318 template<typename T>
320
321//! @}
322
323//! Creates a kernel execution task.
324//!
325//! \tparam TAcc The accelerator type.
326//! \param workDiv The index domain work division.
327//! \param kernelFnObj The kernel function object which should be executed.
328//! \param args,... The kernel invocation arguments.
329//! \return The kernel execution task.
330#if ALPAKA_COMP_CLANG
331# pragma clang diagnostic pop
332#endif
333 template<typename TAcc, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
334 ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
335 {
336 // check for void return type
337 detail::CheckFnReturnType<TAcc>{}(kernelFnObj, args...);
338
339#if ALPAKA_COMP_NVCC
340 static_assert(
341 isKernelTriviallyCopyable<TKernelFnObj>,
342 "Kernels must be trivially copyable or an extended CUDA lambda expression!");
343#else
344 static_assert(isKernelTriviallyCopyable<TKernelFnObj>, "Kernels must be trivially copyable!");
345#endif
346 (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
347 static_assert(
349 "The dimensions of TAcc and TWorkDiv have to be identical!");
350 static_assert(
351 std::is_same_v<Idx<std::decay_t<TWorkDiv>>, Idx<TAcc>>,
352 "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
353
354#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
355 std::cout << __func__ << " workDiv: " << workDiv << ", kernelFnObj: " << core::demangled<decltype(kernelFnObj)>
356 << std::endl;
357#endif
359 workDiv,
360 kernelFnObj,
361 std::forward<TArgs>(args)...);
362 }
363
364#if ALPAKA_COMP_CLANG
365# pragma clang diagnostic push
366# pragma clang diagnostic ignored \
367 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
368#endif
369//! Executes the given kernel in the given queue.
370//!
371//! \tparam TAcc The accelerator type.
372//! \param queue The queue to enqueue the view copy task into.
373//! \param workDiv The index domain work division.
374//! \param kernelFnObj The kernel function object which should be executed.
375//! \param args,... The kernel invocation arguments.
376#if ALPAKA_COMP_CLANG
377# pragma clang diagnostic pop
378#endif
379 template<concepts::Acc TAcc, typename TQueue, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
380 ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
381 -> void
382 {
383 enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
384 }
385
386#if ALPAKA_COMP_CLANG
387# pragma clang diagnostic push
388# pragma clang diagnostic ignored \
389 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
390#endif
391//! Executes the given kernel in the given queue.
392//!
393//! \tparam TTag The tag type.
394//! \param queue The queue to enqueue the view copy task into.
395//! \param workDiv The index domain work division.
396//! \param kernelFnObj The kernel function object which should be executed.
397//! \param args,... The kernel invocation arguments.
398#if ALPAKA_COMP_CLANG
399# pragma clang diagnostic pop
400#endif
401 template<concepts::Tag TTag, typename TQueue, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
402 ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
403 -> void
404 {
405 enqueue(
406 queue,
407 createTaskKernel<TagToAcc<TTag, Dim<std::decay_t<TWorkDiv>>, Idx<std::decay_t<TWorkDiv>>>>(
408 workDiv,
409 kernelFnObj,
410 std::forward<TArgs>(args)...));
411 }
412
413} // namespace alpaka
#define ALPAKA_COMP_NVCC
Definition Config.hpp:128
A n-dimensional vector.
Definition Vec.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
#define ALPAKA_FN_HOST_ACC
Definition Common.hpp:39
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition Common.hpp:82
constexpr std::string_view demangled
void assertKernelArgIsTriviallyCopyable()
Definition Traits.hpp:290
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition Traits.hpp:111
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
constexpr bool isKernelTriviallyCopyable
Definition Traits.hpp:319
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:206
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:334
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
Definition Traits.hpp:380
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args)
Definition Traits.hpp:231
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition Traits.hpp:183
constexpr bool isKernelArgumentTriviallyCopyable
Definition Traits.hpp:269
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
Check if a type used as kernel argument is trivially copyable.
Definition Traits.hpp:265
Check if the kernel type is trivially copyable.
Definition Traits.hpp:315
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
Check that the return of TKernelFnObj is void.
Definition Traits.hpp:278
void operator()(TKernelFnObj const &, TArgs const &...)
Definition Traits.hpp:280
The trait for getting the size of the block shared dynamic memory of a kernel.
Definition Traits.hpp:47
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition Traits.hpp:65
The kernel execution task creation trait.
Definition Traits.hpp:37
The structure template to access to the functions attributes of a kernel function object.
Definition Traits.hpp:81
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:87
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
Definition Traits.hpp:130
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> TraitNotSpecialized
Definition Traits.hpp:155
The trait for getting the warp size required by a kernel.
Definition Traits.hpp:106