alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Traits.hpp
Go to the documentation of this file.
1/* Copyright 2023 Axel Huebl, Benjamin Worpitz, René Widera, Sergei Bastrakov, Jan Stephan, Bernhard Manfred Gruber,
2 * Andrea Bocci, Aurora Perego, Mehmet Yusufoglu
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
10#include "alpaka/core/Debug.hpp"
13#include "alpaka/dim/Traits.hpp"
14#include "alpaka/idx/Traits.hpp"
17#include "alpaka/vec/Vec.hpp"
19
20#include <type_traits>
21
22//! The alpaka accelerator library.
23namespace alpaka
24{
25 //! The kernel traits.
26 namespace trait
27 {
28 //! The kernel execution task creation trait.
29 template<
30 typename TAcc,
31 typename TWorkDiv,
32 typename TKernelFnObj,
33 typename... TArgs/*,
34 typename TSfinae = void*/>
36
37 //! The trait for getting the size of the block shared dynamic memory of a kernel.
38 //!
39 //! \tparam TKernelFnObj The kernel function object.
40 //! \tparam TAcc The accelerator.
41 //!
42 //! The default implementation returns 0.
43 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
45 {
46#if BOOST_COMP_CLANG
47# pragma clang diagnostic push
48# pragma clang diagnostic ignored \
49 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
50#endif
51 //! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
52 //! \param blockThreadExtent The block thread extent.
53 //! \param threadElemExtent The thread element extent.
54 //! \tparam TArgs The kernel invocation argument types pack.
55 //! \param args,... The kernel invocation arguments.
56 //! \return The size of the shared memory allocated for a block in bytes.
57 //! The default version always returns zero.
58#if BOOST_COMP_CLANG
59# pragma clang diagnostic pop
60#endif
62 template<typename TDim, typename... TArgs>
64 [[maybe_unused]] TKernelFnObj const& kernelFnObj,
65 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
66 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
67 [[maybe_unused]] TArgs const&... args) -> std::size_t
68 {
69 return 0u;
70 }
71 };
72
73 //! \brief The structure template to access to the functions attributes of a kernel function object.
74 //! \tparam TAcc The accelerator type
75 //! \tparam TKernelFnObj Kernel function object type.
76 //! \tparam TArgs Kernel function object argument types as a parameter pack.
77 template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
79 {
80 //! \param dev The device instance
81 //! \param kernelFn The kernel function object which should be executed.
82 //! \param args The kernel invocation arguments.
83 //! \return KernelFunctionAttributes data structure instance. The default version always returns the
84 //! instance with fields which are set to zero.
86 [[maybe_unused]] TDev const& dev,
87 [[maybe_unused]] TKernelFnObj const& kernelFn,
88 [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
89 {
90 std::string const str
91 = std::string(__func__) + " function is not specialised for the given arguments.\n";
92 throw std::invalid_argument{str};
93 }
94 };
95
96 //! The trait for getting the warp size required by a kernel.
97 //!
98 //! \tparam TKernelFnObj The kernel function object.
99 //! \tparam TAcc The accelerator.
100 //!
101 //! The default implementation returns 0, which lets the accelerator compiler and runtime choose the warp size.
102 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
103 struct WarpSize : std::integral_constant<std::uint32_t, 0>
104 {
105 };
106
107 //! This is a shortcut for the trait defined above
108 template<typename TKernelFnObj, typename TAcc>
109 inline constexpr std::uint32_t warpSize = WarpSize<TKernelFnObj, TAcc>::value;
110
111 //! The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
112 //!
113 //! Has no effect on other accelerators.
114 //!
115 //! A user could either specialize this trait for their kernel, or define a public static member
116 //! ompScheduleKind of type alpaka::omp::Schedule, and additionally also int member ompScheduleChunkSize. In
117 //! the latter case, alpaka never odr-uses these members.
118 //!
119 //! In case schedule kind and chunk size are compile-time constants, setting then inside kernel may benefit
120 //! performance.
121 //!
122 //! \tparam TKernelFnObj The kernel function object.
123 //! \tparam TAcc The accelerator.
124 //!
125 //! The default implementation behaves as if the trait was not specialized.
126 template<typename TKernelFnObj, typename TAcc, typename TSfinae = void>
128 {
129 private:
130 //! Type returned when the trait is not specialized
131 struct TraitNotSpecialized
132 {
133 };
134
135 public:
136#if BOOST_COMP_CLANG
137# pragma clang diagnostic push
138# pragma clang diagnostic ignored \
139 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
140#endif
141 //! \param kernelFnObj The kernel object for which the schedule should be returned.
142 //! \param blockThreadExtent The block thread extent.
143 //! \param threadElemExtent The thread element extent.
144 //! \tparam TArgs The kernel invocation argument types pack.
145 //! \param args,... The kernel invocation arguments.
146 //! \return The OpenMP schedule information as an alpaka::omp::Schedule object,
147 //! returning an object of any other type is treated as if the trait is not specialized.
148#if BOOST_COMP_CLANG
149# pragma clang diagnostic pop
150#endif
152 template<typename TDim, typename... TArgs>
154 [[maybe_unused]] TKernelFnObj const& kernelFnObj,
155 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
156 [[maybe_unused]] Vec<TDim, Idx<TAcc>> const& threadElemExtent,
157 [[maybe_unused]] TArgs const&... args) -> TraitNotSpecialized
158 {
159 return TraitNotSpecialized{};
160 }
161 };
162 } // namespace trait
163
164#if BOOST_COMP_CLANG
165# pragma clang diagnostic push
166# pragma clang diagnostic ignored \
167 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
168#endif
169//! \tparam TAcc The accelerator type.
170//! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
171//! \param blockThreadExtent The block thread extent.
172//! \param threadElemExtent The thread element extent.
173//! \param args,... The kernel invocation arguments.
174//! \return The size of the shared memory allocated for a block in bytes.
175//! The default implementation always returns zero.
176#if BOOST_COMP_CLANG
177# pragma clang diagnostic pop
178#endif
180 template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
182 TKernelFnObj const& kernelFnObj,
183 Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
184 Vec<TDim, Idx<TAcc>> const& threadElemExtent,
185 TArgs const&... args) -> std::size_t
186 {
188 kernelFnObj,
189 blockThreadExtent,
190 threadElemExtent,
191 args...);
192 }
193
194 //! \tparam TAcc The accelerator type.
195 //! \tparam TDev The device type.
196 //! \param dev The device instance
197 //! \param kernelFnObj The kernel function object which should be executed.
198 //! \param args The kernel invocation arguments.
199 //! \return KernelFunctionAttributes instance. Instance is filled with values returned by the accelerator API
200 //! depending on the specific kernel. The default version always returns the instance with fields which are set to
201 //! zero.
203 template<typename TAcc, typename TDev, typename TKernelFnObj, typename... TArgs>
204 ALPAKA_FN_HOST auto getFunctionAttributes(TDev const& dev, TKernelFnObj const& kernelFnObj, TArgs&&... args)
206 {
208 dev,
209 kernelFnObj,
210 std::forward<TArgs>(args)...);
211 }
212
213#if BOOST_COMP_CLANG
214# pragma clang diagnostic push
215# pragma clang diagnostic ignored \
216 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
217#endif
218//! \tparam TAcc The accelerator type.
219//! \param kernelFnObj The kernel object for which the block shared memory size should be calculated.
220//! \param blockThreadExtent The block thread extent.
221//! \param threadElemExtent The thread element extent.
222//! \param args,... The kernel invocation arguments.
223//! \return The OpenMP schedule information as an alpaka::omp::Schedule object if the kernel specialized the
224//! OmpSchedule trait, an object of another type if the kernel didn't specialize the trait.
225#if BOOST_COMP_CLANG
226# pragma clang diagnostic pop
227#endif
228 template<typename TAcc, typename TKernelFnObj, typename TDim, typename... TArgs>
230 TKernelFnObj const& kernelFnObj,
231 Vec<TDim, Idx<TAcc>> const& blockThreadExtent,
232 Vec<TDim, Idx<TAcc>> const& threadElemExtent,
233 TArgs const&... args)
234 {
236 kernelFnObj,
237 blockThreadExtent,
238 threadElemExtent,
239 args...);
240 }
241
242#if BOOST_COMP_CLANG
243# pragma clang diagnostic push
244# pragma clang diagnostic ignored \
245 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
246#endif
247
248
249 //! Check if a type used as kernel argument is trivially copyable
250 //!
251 //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
252 //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
253 //! of side effects.
254 //!
255 //! It's implementation defined whether the closure type of a lambda is trivially copyable.
256 //! Therefor the default implementation is true for trivially copyable or empty (stateless) types.
257 //!
258 //! @tparam T type to check
259 //! @{
260 template<typename T, typename = void>
262 : std::bool_constant<std::is_empty_v<T> || std::is_trivially_copyable_v<T>>
263 {
264 };
265
266 template<typename T>
268
269 //! @}
270
271 namespace detail
272 {
273 //! Check that the return of TKernelFnObj is void
274 template<typename TAcc, typename TSfinae = void>
276 {
277 template<typename TKernelFnObj, typename... TArgs>
278 void operator()(TKernelFnObj const&, TArgs const&...)
279 {
280 using Result = std::invoke_result_t<TKernelFnObj, TAcc const&, TArgs const&...>;
281 static_assert(std::is_same_v<Result, void>, "The TKernelFnObj is required to return void!");
282 }
283 };
284
285 // asserts that T is trivially copyable. We put this in a separate function so we can see which T would fail
286 // the test, when called from a fold expression.
287 template<typename T>
289 {
290 static_assert(isKernelArgumentTriviallyCopyable<T>, "The kernel argument T must be trivially copyable!");
291 }
292 } // namespace detail
293
294 //! Check if the kernel type is trivially copyable
295 //!
296 //! \attention In case this trait is specialized for a user type the user should be sure that the result of calling
297 //! the copy constructor is equal to use memcpy to duplicate the object. An existing destructor should be free
298 //! of side effects.
299 //!
300 //! The default implementation is true for trivially copyable types (or for extended lambda expressions for CUDA).
301 //!
302 //! @tparam T type to check
303 //! @{
304 template<typename T, typename = void>
306#if BOOST_COMP_NVCC
307 : std::bool_constant<
308 std::is_trivially_copyable_v<T> || __nv_is_extended_device_lambda_closure_type(T)
309 || __nv_is_extended_host_device_lambda_closure_type(T)>
310#else
311 : std::is_trivially_copyable<T>
312#endif
313 {
314 };
315
316 template<typename T>
318
319//! @}
320
321//! Creates a kernel execution task.
322//!
323//! \tparam TAcc The accelerator type.
324//! \param workDiv The index domain work division.
325//! \param kernelFnObj The kernel function object which should be executed.
326//! \param args,... The kernel invocation arguments.
327//! \return The kernel execution task.
328#if BOOST_COMP_CLANG
329# pragma clang diagnostic pop
330#endif
331 template<typename TAcc, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
332 ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
333 {
334 // check for void return type
335 detail::CheckFnReturnType<TAcc>{}(kernelFnObj, args...);
336
337#if BOOST_COMP_NVCC
338 static_assert(
339 isKernelTriviallyCopyable<TKernelFnObj>,
340 "Kernels must be trivially copyable or an extended CUDA lambda expression!");
341#else
342 static_assert(isKernelTriviallyCopyable<TKernelFnObj>, "Kernels must be trivially copyable!");
343#endif
344 (detail::assertKernelArgIsTriviallyCopyable<std::decay_t<TArgs>>(), ...);
345 static_assert(
347 "The dimensions of TAcc and TWorkDiv have to be identical!");
348 static_assert(
349 std::is_same_v<Idx<std::decay_t<TWorkDiv>>, Idx<TAcc>>,
350 "The idx type of TAcc and the idx type of TWorkDiv have to be identical!");
351
352#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
353 std::cout << __func__ << " workDiv: " << workDiv << ", kernelFnObj: " << core::demangled<decltype(kernelFnObj)>
354 << std::endl;
355#endif
357 workDiv,
358 kernelFnObj,
359 std::forward<TArgs>(args)...);
360 }
361
362#if BOOST_COMP_CLANG
363# pragma clang diagnostic push
364# pragma clang diagnostic ignored \
365 "-Wdocumentation" // clang does not support the syntax for variadic template arguments "args,..."
366#endif
367//! Executes the given kernel in the given queue.
368//!
369//! \tparam TAcc The accelerator type.
370//! \param queue The queue to enqueue the view copy task into.
371//! \param workDiv The index domain work division.
372//! \param kernelFnObj The kernel function object which should be executed.
373//! \param args,... The kernel invocation arguments.
374#if BOOST_COMP_CLANG
375# pragma clang diagnostic pop
376#endif
377 template<typename TAcc, typename TQueue, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
378 ALPAKA_FN_HOST auto exec(TQueue& queue, TWorkDiv const& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
379 -> void
380 {
381 enqueue(queue, createTaskKernel<TAcc>(workDiv, kernelFnObj, std::forward<TArgs>(args)...));
382 }
383} // namespace alpaka
A n-dimensional vector.
Definition Vec.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
#define ALPAKA_FN_HOST_ACC
Definition Common.hpp:39
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition Common.hpp:82
const std::string demangled
void assertKernelArgIsTriviallyCopyable()
Definition Traits.hpp:288
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition Traits.hpp:109
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
constexpr bool isKernelTriviallyCopyable
Definition Traits.hpp:317
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:204
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
ALPAKA_FN_HOST auto exec(TQueue &queue, TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> void
Executes the given kernel in the given queue.
Definition Traits.hpp:378
ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args)
Definition Traits.hpp:229
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition Traits.hpp:181
constexpr bool isKernelArgumentTriviallyCopyable
Definition Traits.hpp:267
Check if a type used as kernel argument is trivially copyable.
Definition Traits.hpp:263
Check if the kernel type is trivially copyable.
Definition Traits.hpp:313
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
Check that the return of TKernelFnObj is void.
Definition Traits.hpp:276
void operator()(TKernelFnObj const &, TArgs const &...)
Definition Traits.hpp:278
The trait for getting the size of the block shared dynamic memory of a kernel.
Definition Traits.hpp:45
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC auto getBlockSharedMemDynSizeBytes(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> std::size_t
Definition Traits.hpp:63
The kernel execution task creation trait.
Definition Traits.hpp:35
The structure template to access to the functions attributes of a kernel function object.
Definition Traits.hpp:79
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:85
The trait for getting the schedule to use when a kernel is run using the CpuOmp2Blocks accelerator.
Definition Traits.hpp:128
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST auto getOmpSchedule(TKernelFnObj const &kernelFnObj, Vec< TDim, Idx< TAcc > > const &blockThreadExtent, Vec< TDim, Idx< TAcc > > const &threadElemExtent, TArgs const &... args) -> TraitNotSpecialized
Definition Traits.hpp:153
The trait for getting the warp size required by a kernel.
Definition Traits.hpp:104