alpaka
Abstraction Library for Parallel Kernel Acceleration
TaskKernelCpuSerial.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Specialized traits.
8 #include "alpaka/acc/Traits.hpp"
9 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/dim/Traits.hpp"
11 #include "alpaka/idx/Traits.hpp"
13 
14 // Implementation details.
16 #include "alpaka/core/Decay.hpp"
17 #include "alpaka/dev/DevCpu.hpp"
19 #include "alpaka/kernel/Traits.hpp"
20 #include "alpaka/meta/NdLoop.hpp"
23 
24 #include <functional>
25 #include <tuple>
26 #include <type_traits>
27 #include <utility>
28 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
29 # include <iostream>
30 #endif
31 
32 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
33 
34 namespace alpaka
35 {
36  //! The CPU serial execution task implementation.
37  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
38  class TaskKernelCpuSerial final : public WorkDivMembers<TDim, TIdx>
39  {
40  public:
41  template<typename TWorkDiv>
42  ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv&& workDiv, TKernelFnObj kernelFnObj, TArgs&&... args)
43  : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
44  , m_kernelFnObj(std::move(kernelFnObj))
45  , m_args(std::forward<TArgs>(args)...)
46  {
47  static_assert(
48  Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
49  "The work division and the execution task have to be of the same dimensionality!");
50  }
51 
52  //! Executes the kernel function object.
53  ALPAKA_FN_HOST auto operator()() const -> void
54  {
56 
57  auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
58  auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
59  auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
60 
61  // Get the size of the block shared dynamic memory.
62  auto const blockSharedMemDynSizeBytes = std::apply(
63  [&](std::decay_t<TArgs> const&... args)
64  {
65  return getBlockSharedMemDynSizeBytes<AccCpuSerial<TDim, TIdx>>(
66  m_kernelFnObj,
67  blockThreadExtent,
68  threadElemExtent,
69  args...);
70  },
71  m_args);
72 
73 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
74  std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
75  << std::endl;
76 # endif
77 
79  *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
80  blockSharedMemDynSizeBytes);
81 
82  // Execute the blocks serially.
84  gridBlockExtent,
85  [&](Vec<TDim, TIdx> const& blockThreadIdx)
86  {
87  acc.m_gridBlockIdx = blockThreadIdx;
88 
89  std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
90 
91  // After a block has been processed, the shared memory has to be deleted.
92  freeSharedVars(acc);
93  });
94  }
95 
96  private:
97  TKernelFnObj m_kernelFnObj;
98  std::tuple<std::decay_t<TArgs>...> m_args;
99  };
100 
101  namespace trait
102  {
103  //! The CPU serial execution task accelerator type trait specialization.
104  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
105  struct AccType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
106  {
108  };
109 
110  //! The CPU serial execution task device type trait specialization.
111  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
112  struct DevType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
113  {
114  using type = DevCpu;
115  };
116 
117  //! The CPU serial execution task dimension getter trait specialization.
118  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
119  struct DimType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
120  {
121  using type = TDim;
122  };
123 
124  //! The CPU serial execution task platform type trait specialization.
125  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
126  struct PlatformType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
127  {
128  using type = PlatformCpu;
129  };
130 
131  //! The CPU serial execution task idx type trait specialization.
132  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
133  struct IdxType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
134  {
135  using type = TIdx;
136  };
137 
138  //! \brief Specialisation of the class template FunctionAttributes
139  //! \tparam TDev The device type.
140  //! \tparam TDim The dimensionality of the accelerator device properties.
141  //! \tparam TIdx The idx type of the accelerator device properties.
142  //! \tparam TKernelFn Kernel function object type.
143  //! \tparam TArgs Kernel function object argument types as a parameter pack.
144  template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
145  struct FunctionAttributes<AccCpuSerial<TDim, TIdx>, TDev, TKernelFn, TArgs...>
146  {
147  //! \param dev The device instance
148  //! \param kernelFn The kernel function object which should be executed.
149  //! \param args The kernel invocation arguments.
150  //! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
151  //! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
153  TDev const& dev,
154  [[maybe_unused]] TKernelFn const& kernelFn,
155  [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
156  {
157  alpaka::KernelFunctionAttributes kernelFunctionAttributes;
158 
159  // set function properties for maxThreadsPerBlock to device properties, since API doesn't have function
160  // properties function.
161  auto const& props = alpaka::getAccDevProps<AccCpuSerial<TDim, TIdx>>(dev);
162  kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
163  kernelFunctionAttributes.maxDynamicSharedSizeBytes
164  = static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024);
165  return kernelFunctionAttributes;
166  }
167  };
168  } // namespace trait
169 } // namespace alpaka
170 
171 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
The CPU serial accelerator.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU serial execution task implementation.
ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv &&workDiv, TKernelFnObj kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition: Traits.hpp:54
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
The CPU device platform.
Definition: PlatformCpu.hpp:18
The accelerator type trait.
Definition: Traits.hpp:37
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, [[maybe_unused]] TKernelFn const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
The structure template to access to the functions attributes of a kernel function object.
Definition: Traits.hpp:79
The idx type trait.
Definition: Traits.hpp:25
The platform type trait.
Definition: Traits.hpp:30