alpaka
Abstraction Library for Parallel Kernel Acceleration
Traits.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Sergei Bastrakov, Jan Stephan, Bernhard Manfred Gruber,
2  * Andrea Bocci, Aurora Perego, Mehmet Yusufoglu
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
9 #include "alpaka/core/Common.hpp"
10 #include "alpaka/core/Debug.hpp"
13 #include "alpaka/dim/Traits.hpp"
14 #include "alpaka/idx/Traits.hpp"
16 #include "alpaka/queue/Traits.hpp"
17 #include "alpaka/vec/Vec.hpp"
19 
20 #include <type_traits>
21 
22 //! The alpaka accelerator library.
23 namespace alpaka
24 {
25  //! The kernel traits.
26  namespace trait
27  {
28  //! The kernel execution task creation trait.
29  template<
30  typename TAcc,
31  typename TWorkDiv,
32  typename TKernelFnObj,
33  typename... TArgs/*,
34  typename TSfinae = void*/>
36 
37  //! The trait for getting the size of the block shared dynamic memory of a kernel.
38  //!
39  //! \tparam TKernelFnObj The kernel function object.
40  //! \tparam TAcc The accelerator.
41  //!
42  //! The default implementation returns 0.
43  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
45  {
46 #if BOOST_COMP_CLANG
47 # pragma clang diagnostic push
48 # pragma clang diagnostic ignored \
49  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
50 #endif
51  //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
52  //! \param blockThreadExtent The block thread extent.
53  //! \param threadElemExtent The thread element extent.
54  //! \tparam TArgs The kernel invocation argument types pack.
55  //! \param args,... The kernel invocation arguments.
56  //! \return The size of the shared memory allocated for a block in bytes.
57  //! The default version always returns zero.
58 #if BOOST_COMP_CLANG
59 # pragma clang diagnostic pop
60 #endif
62  template<typename TDim, typename... TArgs>
64  [[maybe_unused]] TKernelFnObj const& kernelFnObj,
65  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
66  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
67  [[maybe_unused]] TArgs const&... args) -> std::size_t
68  {
69  return 0u;
70  }
71  };
72 
73  //! \brief The structure template to access to the functions attributes of a kernel function object.
74  //! \tparam TAcc The accelerator type
75  //! \tparam TKernelFnObj Kernel function object type.
76  //! \tparam TArgs Kernel function object argument types as a parameter pack.
77  template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
79  {
80  //! \param dev The device instance
81  //! \param kernelFn The kernel function object which should be executed.
82  //! \param args The kernel invocation arguments.
83  //! \return KernelFunctionAttributes data structure instance. The default version always returns the
84  //! instance with fields which are set to zero.
86  [[maybe_unused]] TDev const& dev,
87  [[maybe_unused]] TKernelFnObj const& kernelFn,
88  [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
89  {
90  std::string const str
91  = std::string(__func__) + " function is not specialised for the given arguments.\n";
92  throw std::invalid_argument{str};
93  }
94  };
95 
96  //! The trait for getting the warp size required by a kernel.
97  //!
98  //! \tparam TKernelFnObj The kernel function object.
99  //! \tparam TAcc The accelerator.
100  //!
101  //! The default implementation returns 0, which lets the accelerator compiler and runtime choose the warp size.
102  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
103  struct WarpSize : std::integral_constant<std::uint32_t, 0>
104  {
105  };
106 
107  //! This is a shortcut for the trait defined above
108  template<typename TKernelFnObj, typename TAcc>
109  inline constexpr std::uint32_t warpSize = WarpSize<TKernelFnObj, TAcc>::value;
110 
111  //! The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
112  //!
113  //! Has no effect on other accelerators.
114  //!
115  //! A user could either specialize this trait for their kernel, or define a public static member
116  //! ompScheduleKind of type alpaka::omp::Schedule, and additionally also int member ompScheduleChunkSize. In
117  //! the latter case, alpaka never odr-uses these members.
118  //!
119  //! In case schedule kind and chunk size are compile-time constants, setting then inside kernel may benefit
120  //! performance.
121  //!
122  //! \tparam TKernelFnObj The kernel function object.
123  //! \tparam TAcc The accelerator.
124  //!
125  //! The default implementation behaves as if the trait was not specialized.
126  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
127  struct OmpSchedule
128  {
129  private:
130  //! Type returned when the trait is not specialized
131  struct TraitNotSpecialized
132  {
133  };
134 
135  public:
136 #if BOOST_COMP_CLANG
137 # pragma clang diagnostic push
138 # pragma clang diagnostic ignored \
139  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
140 #endif
141  //! \param kernelFnObj The kernel object for which the schedule should be returned.
142  //! \param blockThreadExtent The block thread extent.
143  //! \param threadElemExtent The thread element extent.
144  //! \tparam TArgs The kernel invocation argument types pack.
145  //! \param args,... The kernel invocation arguments.
146  //! \return The OpenMP schedule information as an alpaka::omp::Schedule object,
147  //! returning an object of any other type is treated as if the trait is not specialized.
148 #if BOOST_COMP_CLANG
149 # pragma clang diagnostic pop
150 #endif
152  template<typename TDim, typename... TArgs>
154  [[maybe_unused]] TKernelFnObj const& kernelFnObj,
155  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
156  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
157  [[maybe_unused]] TArgs const&... args) -> TraitNotSpecialized
158  {
159  return TraitNotSpecialized{};
160  }
161  };
162  } // namespace trait
163 
164 #if BOOST_COMP_CLANG
165 # pragma clang diagnostic push
166 # pragma clang diagnostic ignored \
167  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
168 #endif
169 //! \tparam TAcc The accelerator type.
170 //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
171 //! \param blockThreadExtent The block thread extent.
172 //! \param threadElemExtent The thread element extent.
173 //! \param args,... The kernel invocation arguments.
174 //! \return The size of the shared memory allocated for a block in bytes.
175 //! The default implementation always returns zero.
176 #if BOOST_COMP_CLANG
177 # pragma clang diagnostic pop
178 #endif
180  template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
182  TKernelFnObj const& kernelFnObj,
183  Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
184  Vec<TDim, Idx<TAcc>> const& threadElemExtent,
185  TArgs const&... args) -> std::size_t
186  {
188  kernelFnObj,
189  blockThreadExtent,
190  threadElemExtent,
191  args...);
192  }
193 
194  //! \tparam TAcc The accelerator type.
195  //! \tparam TDev The device type.
196  //! \param dev The device instance
197  //! \param kernelFnObj The kernel function object which should be executed.
198  //! \param args The kernel invocation arguments.
199  //! \return KernelFunctionAttributes instance. Instance is filled with values returned by the accelerator API
200  //! depending on the specific kernel. The default version always returns the instance with fields which are set to
201  //! zero.
203  template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
204  ALPAKA_FN_HOST auto getFunctionAttributes(TDev const& dev, TKernelFnObj const& kernelFnObj, TArgs&&... args)
206  {
208  dev,
209  kernelFnObj,
210  std::forward<TArgs>(args)...);
211  }
212 
213 #if BOOST_COMP_CLANG
214 # pragma clang diagnostic push
215 # pragma clang diagnostic ignored \
216  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
217 #endif
218 //! \tparam TAcc The accelerator type.
219 //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
220 //! \param blockThreadExtent The block thread extent.
221 //! \param threadElemExtent The thread element extent.
222 //! \param args,... The kernel invocation arguments.
223 //! \return The OpenMP schedule information as an alpaka::omp::Schedule object if the kernel specialized the
224 //! OmpSchedule trait, an object of another type if the kernel didn't specialize the trait.
225 #if BOOST_COMP_CLANG
226 # pragma clang diagnostic pop
227 #endif
228  template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
230  TKernelFnObj const& kernelFnObj,
231  Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
232  Vec<TDim, Idx<TAcc>> const& threadElemExtent,
233  TArgs const&... args)
234  {
236  kernelFnObj,
237  blockThreadExtent,
238  threadElemExtent,
239  args...);
240  }
241 
242 #if BOOST_COMP_CLANG
243 # pragma clang diagnostic push
244 # pragma clang diagnostic ignored \
245  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
246 #endif
247 
248 
249  //! Check if a type used as kernel argument is trivially copyable
250  //!
251  //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
252  //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
253  //! of side effects.
254  //!
255  //! It's implementation defined whether the closure type of a lambda is trivially copyable.
256  //! Therefor the default implementation is true for trivially copyable or empty (stateless) types.
257  //!
258  //! @tparam T type to check
259  //! @{
260  template<typename T, typename = void>
262  : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
263  {
264  };
265 
266  template<typename T>
268 
269  //! @}
270 
271  namespace detail
272  {
273  //! Check that the return of TKernelFnObj is void
274  template<typename TAcc, typename TSfinae = void>
276  {
277  template<typename TKernelFnObj, typename... TArgs>
278  void operator()(TKernelFnObj const&, TArgs const&...)
279  {
280  using Result = std::invoke_result_t<TKernelFnObj, TAcc const&, TArgs const&...>;
281  static_assert(std::is_same_v<Result, void>, "The TKernelFnObj is required to return void!");
282  }
283  };
284 
285  // asserts that T is trivially copyable. We put this in a separate function so we can see which T would fail
286  // the test, when called from a fold expression.
287  template<typename T>
289  {
290  static_assert(isKernelArgumentTriviallyCopyable<T>, "The kernel argument T must be trivially copyable!");
291  }
292  } // namespace detail
293 
294  //! Check if the kernel type is trivially copyable
295  //!
296  //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
297  //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
298  //! of side effects.
299  //!
300  //! The default implementation is true for trivially copyable types (or for extended lambda expressions for CUDA).
301  //!
302  //! @tparam T type to check
303  //! @{
304  template<typename T, typename = void>
306 #if BOOST_COMP_NVCC
307  : std::bool_constant<
308  std::is_trivially_copyable_v<T> || __nv_is_extended_device_lambda_closure_type(T)
309  || __nv_is_extended_host_device_lambda_closure_type(T)>
310 #else
311  : std::is_trivially_copyable<T>
312 #endif
313  {
314  };
315 
316  template<typename T>
318 
319 //! @}
320 
321 //! Creates a kernel execution task.
322 //!
323 //! \tparam TAcc The accelerator type.
324 //! \param workDiv The index domain work division.
325 //! \param kernelFnObj The kernel function object which should be executed.
326 //! \param args,... The kernel invocation arguments.
327 //! \return The kernel execution task.
328 #if BOOST_COMP_CLANG
329 # pragma clang diagnostic pop
330 #endif
331  template<typename TAcc, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
332  ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
333  {
334  // check for void return type
335  detail::CheckFnReturnType<TAcc>{}(kernelFnObj, args...);
336 
337 #if BOOST_COMP_NVCC
338  static_assert(
339  isKernelTriviallyCopyable<TKernelFnObj>,
340  "Kernels must be trivially copyable or an extended CUDA lambda expression!");
341 #else
342  static_assert(isKernelTriviallyCopyable<TKernelFnObj>, "Kernels must be trivially copyable!");
343 #endif
344  (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
345  static_assert(
346  Dim<std::decay_t<TWorkDiv>>::value == Dim<TAcc>::value,
347  "The dimensions of TAcc and TWorkDiv have to be identical!");
348  static_assert(
349  std::is_same_v<Idx<std::decay_t<TWorkDiv>>, Idx<TAcc>>,
350  "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
351 
352 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
353  std::cout << __func__ << " workDiv: " << workDiv << ", kernelFnObj: " << core::demangled<decltype(kernelFnObj)>
354  << std::endl;
355 #endif
357  workDiv,
358  kernelFnObj,
359  std::forward<TArgs>(args)...);
360  }
361 
362 #if BOOST_COMP_CLANG
363 # pragma clang diagnostic push
364 # pragma clang diagnostic ignored \
365  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
366 #endif
367 //! Executes the given kernel in the given queue.
368 //!
369 //! \tparam TAcc The accelerator type.
370 //! \param queue The queue to enqueue the view copy task into.
371 //! \param workDiv The index domain work division.
372 //! \param kernelFnObj The kernel function object which should be executed.
373 //! \param args,... The kernel invocation arguments.
374 #if BOOST_COMP_CLANG
375 # pragma clang diagnostic pop
376 #endif
377  template<typename TAcc, typename TQueue, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
378  ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
379  -> void
380  {
381  enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
382  }
383 } // namespace alpaka
A n-dimensional vector.
Definition: Vec.hpp:38
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition: Common.hpp:82
const std::string demangled
void assertKernelArgIsTriviallyCopyable()
Definition: Traits.hpp:288
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition: Traits.hpp:109
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
constexpr bool isKernelTriviallyCopyable
Definition: Traits.hpp:317
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args)
Definition: Traits.hpp:229
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition: Traits.hpp:181
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition: Traits.hpp:204
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition: Traits.hpp:332
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
Definition: Traits.hpp:378
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
constexpr bool isKernelArgumentTriviallyCopyable
Definition: Traits.hpp:267
Check if a type used as kernel argument is trivially copyable.
Definition: Traits.hpp:263
Check if the kernel type is trivially copyable.
Definition: Traits.hpp:313
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
Check that the return of TKernelFnObj is void.
Definition: Traits.hpp:276
void operator()(TKernelFnObj const &, TArgs const &...)
Definition: Traits.hpp:278
The trait for getting the size of the block shared dynamic memory of a kernel.
Definition: Traits.hpp:45
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> std::size_t
Definition: Traits.hpp:63
The kernel execution task creation trait.
Definition: Traits.hpp:35
The structure template to access to the functions attributes of a kernel function object.
Definition: Traits.hpp:79
static ALPAKA_FN_HOST auto getFunctionAttributes([[maybe_unused]] TDev const &dev, [[maybe_unused]] TKernelFnObj const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition: Traits.hpp:85
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
Definition: Traits.hpp:128
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> TraitNotSpecialized
Definition: Traits.hpp:153
The trait for getting the warp size required by a kernel.
Definition: Traits.hpp:104