alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
TaskKernelCpuSerial.hpp
Go to the documentation of this file.
1/* Copyright 2022 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Specialized traits.
10#include "alpaka/dim/Traits.hpp"
11#include "alpaka/idx/Traits.hpp"
13
14// Implementation details.
16#include "alpaka/core/Decay.hpp"
17#include "alpaka/dev/DevCpu.hpp"
23
24#include <functional>
25#include <tuple>
26#include <type_traits>
27#include <utility>
28#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
29# include <iostream>
30#endif
31
32#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
33
34namespace alpaka
35{
36 //! The CPU serial execution task implementation.
37 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
38 class TaskKernelCpuSerial final : public WorkDivMembers<TDim, TIdx>
39 {
40 public:
41 template<typename TWorkDiv>
42 ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv&& workDiv, TKernelFnObj kernelFnObj, TArgs&&... args)
43 : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
44 , m_kernelFnObj(std::move(kernelFnObj))
45 , m_args(std::forward<TArgs>(args)...)
46 {
47 static_assert(
48 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
49 "The work division and the execution task have to be of the same dimensionality!");
50 }
51
52 //! Executes the kernel function object.
53 ALPAKA_FN_HOST auto operator()() const -> void
54 {
56
57 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
58 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
59 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
60
61 // Get the size of the block shared dynamic memory.
62 auto const blockSharedMemDynSizeBytes = std::apply(
63 [&](std::decay_t<TArgs> const&... args)
64 {
65 return getBlockSharedMemDynSizeBytes<AccCpuSerial<TDim, TIdx>>(
66 m_kernelFnObj,
67 blockThreadExtent,
68 threadElemExtent,
69 args...);
70 },
71 m_args);
72
73# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
74 std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
75 << std::endl;
76# endif
77
79 *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
80 blockSharedMemDynSizeBytes);
81
82 // Execute the blocks serially.
84 gridBlockExtent,
85 [&](Vec<TDim, TIdx> const& blockThreadIdx)
86 {
87 acc.m_gridBlockIdx = blockThreadIdx;
88
89 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
90
91 // After a block has been processed, the shared memory has to be deleted.
92 freeSharedVars(acc);
93 });
94 }
95
96 private:
97 TKernelFnObj m_kernelFnObj;
98 std::tuple<std::decay_t<TArgs>...> m_args;
99 };
100
101 namespace trait
102 {
103 //! The CPU serial execution task accelerator type trait specialization.
104 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
105 struct AccType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
106 {
107 using type = AccCpuSerial<TDim, TIdx>;
108 };
109
110 //! The CPU serial execution task device type trait specialization.
111 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
112 struct DevType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
113 {
114 using type = DevCpu;
115 };
116
117 //! The CPU serial execution task dimension getter trait specialization.
118 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
119 struct DimType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
120 {
121 using type = TDim;
122 };
123
124 //! The CPU serial execution task platform type trait specialization.
125 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
126 struct PlatformType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
127 {
128 using type = PlatformCpu;
129 };
130
131 //! The CPU serial execution task idx type trait specialization.
132 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
133 struct IdxType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
134 {
135 using type = TIdx;
136 };
137
138 //! \brief Specialisation of the class template FunctionAttributes
139 //! \tparam TDev The device type.
140 //! \tparam TDim The dimensionality of the accelerator device properties.
141 //! \tparam TIdx The idx type of the accelerator device properties.
142 //! \tparam TKernelFn Kernel function object type.
143 //! \tparam TArgs Kernel function object argument types as a parameter pack.
144 template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
145 struct FunctionAttributes<AccCpuSerial<TDim, TIdx>, TDev, TKernelFn, TArgs...>
146 {
147 //! \param dev The device instance
148 //! \param kernelFn The kernel function object which should be executed.
149 //! \param args The kernel invocation arguments.
150 //! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
151 //! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
153 TDev const& dev,
154 [[maybe_unused]] TKernelFn const& kernelFn,
155 [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
156 {
157 alpaka::KernelFunctionAttributes kernelFunctionAttributes;
158
159 // set function properties for maxThreadsPerBlock to device properties, since API doesn't have function
160 // properties function.
161 auto const& props = alpaka::getAccDevProps<AccCpuSerial<TDim, TIdx>>(dev);
162 kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
163 kernelFunctionAttributes.maxDynamicSharedSizeBytes
164 = static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024);
165 return kernelFunctionAttributes;
166 }
167 };
168 } // namespace trait
169} // namespace alpaka
170
171#endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
The CPU serial accelerator.
The CPU serial execution task implementation.
ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv &&workDiv, TKernelFnObj kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A n-dimensional vector.
Definition Vec.hpp:38
A basic class holding the work division as grid block extent, block thread and thread element extent.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:81
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition Traits.hpp:54
STL namespace.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:85