alpaka
Abstraction Library for Parallel Kernel Acceleration
Traits.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Sergei Bastrakov, Jan Stephan, Bernhard Manfred Gruber,
2  * Andrea Bocci, Aurora Perego
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
9 #include "alpaka/core/Common.hpp"
10 #include "alpaka/core/Debug.hpp"
13 #include "alpaka/dim/Traits.hpp"
14 #include "alpaka/idx/Traits.hpp"
15 #include "alpaka/queue/Traits.hpp"
16 #include "alpaka/vec/Vec.hpp"
18 
19 #include <type_traits>
20 
21 //! The alpaka accelerator library.
22 namespace alpaka
23 {
24  //! The kernel traits.
25  namespace trait
26  {
27  //! The kernel execution task creation trait.
28  template<
29  typename TAcc,
30  typename TWorkDiv,
31  typename TKernelFnObj,
32  typename... TArgs/*,
33  typename TSfinae = void*/>
35 
36  //! The trait for getting the size of the block shared dynamic memory of a kernel.
37  //!
38  //! \tparam TKernelFnObj The kernel function object.
39  //! \tparam TAcc The accelerator.
40  //!
41  //! The default implementation returns 0.
42  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
44  {
45 #if BOOST_COMP_CLANG
46 # pragma clang diagnostic push
47 # pragma clang diagnostic ignored \
48  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
49 #endif
50  //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
51  //! \param blockThreadExtent The block thread extent.
52  //! \param threadElemExtent The thread element extent.
53  //! \tparam TArgs The kernel invocation argument types pack.
54  //! \param args,... The kernel invocation arguments.
55  //! \return The size of the shared memory allocated for a block in bytes.
56  //! The default version always returns zero.
57 #if BOOST_COMP_CLANG
58 # pragma clang diagnostic pop
59 #endif
61  template<typename TDim, typename... TArgs>
63  [[maybe_unused]] TKernelFnObj const& kernelFnObj,
64  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
65  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
66  [[maybe_unused]] TArgs const&... args) -> std::size_t
67  {
68  return 0u;
69  }
70  };
71 
72  //! The trait for getting the warp size required by a kernel.
73  //!
74  //! \tparam TKernelFnObj The kernel function object.
75  //! \tparam TAcc The accelerator.
76  //!
77  //! The default implementation returns 0, which lets the accelerator compiler and runtime choose the warp size.
78  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
79  struct WarpSize : std::integral_constant<std::uint32_t, 0>
80  {
81  };
82 
83  //! This is a shortcut for the trait defined above
84  template<typename TKernelFnObj, typename TAcc>
85  inline constexpr std::uint32_t warpSize = WarpSize<TKernelFnObj, TAcc>::value;
86 
87  //! The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
88  //!
89  //! Has no effect on other accelerators.
90  //!
91  //! A user could either specialize this trait for their kernel, or define a public static member
92  //! ompScheduleKind of type alpaka::omp::Schedule, and additionally also int member ompScheduleChunkSize. In
93  //! the latter case, alpaka never odr-uses these members.
94  //!
95  //! In case schedule kind and chunk size are compile-time constants, setting then inside kernel may benefit
96  //! performance.
97  //!
98  //! \tparam TKernelFnObj The kernel function object.
99  //! \tparam TAcc The accelerator.
100  //!
101  //! The default implementation behaves as if the trait was not specialized.
102  template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
103  struct OmpSchedule
104  {
105  private:
106  //! Type returned when the trait is not specialized
107  struct TraitNotSpecialized
108  {
109  };
110 
111  public:
112 #if BOOST_COMP_CLANG
113 # pragma clang diagnostic push
114 # pragma clang diagnostic ignored \
115  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
116 #endif
117  //! \param kernelFnObj The kernel object for which the schedule should be returned.
118  //! \param blockThreadExtent The block thread extent.
119  //! \param threadElemExtent The thread element extent.
120  //! \tparam TArgs The kernel invocation argument types pack.
121  //! \param args,... The kernel invocation arguments.
122  //! \return The OpenMP schedule information as an alpaka::omp::Schedule object,
123  //! returning an object of any other type is treated as if the trait is not specialized.
124 #if BOOST_COMP_CLANG
125 # pragma clang diagnostic pop
126 #endif
128  template<typename TDim, typename... TArgs>
130  [[maybe_unused]] TKernelFnObj const& kernelFnObj,
131  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
132  [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
133  [[maybe_unused]] TArgs const&... args) -> TraitNotSpecialized
134  {
135  return TraitNotSpecialized{};
136  }
137  };
138  } // namespace trait
139 
140 #if BOOST_COMP_CLANG
141 # pragma clang diagnostic push
142 # pragma clang diagnostic ignored \
143  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
144 #endif
145  //! \tparam TAcc The accelerator type.
146  //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
147  //! \param blockThreadExtent The block thread extent.
148  //! \param threadElemExtent The thread element extent.
149  //! \param args,... The kernel invocation arguments.
150  //! \return The size of the shared memory allocated for a block in bytes.
151  //! The default implementation always returns zero.
152 #if BOOST_COMP_CLANG
153 # pragma clang diagnostic pop
154 #endif
156  template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
158  TKernelFnObj const& kernelFnObj,
159  Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
160  Vec<TDim, Idx<TAcc>> const& threadElemExtent,
161  TArgs const&... args) -> std::size_t
162  {
164  kernelFnObj,
165  blockThreadExtent,
166  threadElemExtent,
167  args...);
168  }
169 
170 #if BOOST_COMP_CLANG
171 # pragma clang diagnostic push
172 # pragma clang diagnostic ignored \
173  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
174 #endif
175  //! \tparam TAcc The accelerator type.
176  //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
177  //! \param blockThreadExtent The block thread extent.
178  //! \param threadElemExtent The thread element extent.
179  //! \param args,... The kernel invocation arguments.
180  //! \return The OpenMP schedule information as an alpaka::omp::Schedule object if the kernel specialized the
181  //! OmpSchedule trait, an object of another type if the kernel didn't specialize the trait.
182 #if BOOST_COMP_CLANG
183 # pragma clang diagnostic pop
184 #endif
185  template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
187  TKernelFnObj const& kernelFnObj,
188  Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
189  Vec<TDim, Idx<TAcc>> const& threadElemExtent,
190  TArgs const&... args)
191  {
193  kernelFnObj,
194  blockThreadExtent,
195  threadElemExtent,
196  args...);
197  }
198 
199 #if BOOST_COMP_CLANG
200 # pragma clang diagnostic push
201 # pragma clang diagnostic ignored \
202  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
203 #endif
204 
205 
206  //! Check if a type used as kernel argument is trivially copyable
207  //!
208  //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
209  //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
210  //! of side effects.
211  //!
212  //! It's implementation defined whether the closure type of a lambda is trivially copyable.
213  //! Therefor the default implementation is true for trivially copyable or empty (stateless) types.
214  //!
215  //! @tparam T type to check
216  //! @{
217  template<typename T, typename = void>
219  : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
220  {
221  };
222 
223  template<typename T>
225 
226  //! @}
227 
228  namespace detail
229  {
230  //! Check that the return of TKernelFnObj is void
231  template<typename TAcc, typename TSfinae = void>
233  {
234  template<typename TKernelFnObj, typename... TArgs>
235  void operator()(TKernelFnObj const&, TArgs const&...)
236  {
237  using Result = std::invoke_result_t<TKernelFnObj, TAcc const&, TArgs const&...>;
238  static_assert(std::is_same_v<Result, void>, "The TKernelFnObj is required to return void!");
239  }
240  };
241 
242  // asserts that T is trivially copyable. We put this in a separate function so we can see which T would fail
243  // the test, when called from a fold expression.
244  template<typename T>
246  {
247  static_assert(isKernelArgumentTriviallyCopyable<T>, "The kernel argument T must be trivially copyable!");
248  }
249  } // namespace detail
250 
251 //! Creates a kernel execution task.
252 //!
253 //! \tparam TAcc The accelerator type.
254 //! \param workDiv The index domain work division.
255 //! \param kernelFnObj The kernel function object which should be executed.
256 //! \param args,... The kernel invocation arguments.
257 //! \return The kernel execution task.
258 #if BOOST_COMP_CLANG
259 # pragma clang diagnostic pop
260 #endif
261  template<typename TAcc, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
262  ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
263  {
264  // check for void return type
265  detail::CheckFnReturnType<TAcc>{}(kernelFnObj, args...);
266 
267 #if BOOST_COMP_NVCC
268  static_assert(
269  std::is_trivially_copyable_v<TKernelFnObj> || __nv_is_extended_device_lambda_closure_type(TKernelFnObj)
270  || __nv_is_extended_host_device_lambda_closure_type(TKernelFnObj),
271  "Kernels must be trivially copyable or an extended CUDA lambda expression!");
272 #else
273  static_assert(std::is_trivially_copyable_v<TKernelFnObj>, "Kernels must be trivially copyable!");
274 #endif
275  (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
276  static_assert(
277  Dim<std::decay_t<TWorkDiv>>::value == Dim<TAcc>::value,
278  "The dimensions of TAcc and TWorkDiv have to be identical!");
279  static_assert(
280  std::is_same_v<Idx<std::decay_t<TWorkDiv>>, Idx<TAcc>>,
281  "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
282 
283 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
284  std::cout << __func__ << " workDiv: " << workDiv << ", kernelFnObj: " << core::demangled<decltype(kernelFnObj)>
285  << std::endl;
286 #endif
288  workDiv,
289  kernelFnObj,
290  std::forward<TArgs>(args)...);
291  }
292 
293 #if BOOST_COMP_CLANG
294 # pragma clang diagnostic push
295 # pragma clang diagnostic ignored \
296  "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
297 #endif
298 //! Executes the given kernel in the given queue.
299 //!
300 //! \tparam TAcc The accelerator type.
301 //! \param queue The queue to enqueue the view copy task into.
302 //! \param workDiv The index domain work division.
303 //! \param kernelFnObj The kernel function object which should be executed.
304 //! \param args,... The kernel invocation arguments.
305 #if BOOST_COMP_CLANG
306 # pragma clang diagnostic pop
307 #endif
308  template<typename TAcc, typename TQueue, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
309  ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
310  -> void
311  {
312  enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
313  }
314 } // namespace alpaka
A n-dimensional vector.
Definition: Vec.hpp:38
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition: Common.hpp:82
const std::string demangled
void assertKernelArgIsTriviallyCopyable()
Definition: Traits.hpp:245
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition: Traits.hpp:85
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args)
Definition: Traits.hpp:186
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc >> const &blockThreadExtent, Vec< TDim, Idx< TAcc >> const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition: Traits.hpp:157
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition: Traits.hpp:262
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
Definition: Traits.hpp:309
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
constexpr bool isKernelArgumentTriviallyCopyable
Definition: Traits.hpp:224
Check if a type used as kernel argument is trivially copyable.
Definition: Traits.hpp:220
Check that the return of TKernelFnObj is void.
Definition: Traits.hpp:233
void operator()(TKernelFnObj const &, TArgs const &...)
Definition: Traits.hpp:235
The trait for getting the size of the block shared dynamic memory of a kernel.
Definition: Traits.hpp:44
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> std::size_t
Definition: Traits.hpp:62
The kernel execution task creation trait.
Definition: Traits.hpp:34
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
Definition: Traits.hpp:104
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule([[maybe_unused]] TKernelFnObj const &kernelFnObj, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &blockThreadExtent, [[maybe_unused]] Vec< TDim, Idx< TAcc >> const &threadElemExtent, [[maybe_unused]] TArgs const &... args) -> TraitNotSpecialized
Definition: Traits.hpp:129
The trait for getting the warp size required by a kernel.
Definition: Traits.hpp:80