alpaka
Abstraction Library for Parallel Kernel Acceleration
TaskKernelCpuSerial.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Specialized traits.
8 #include "alpaka/acc/Traits.hpp"
9 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/dim/Traits.hpp"
11 #include "alpaka/idx/Traits.hpp"
13 
14 // Implementation details.
16 #include "alpaka/core/Decay.hpp"
17 #include "alpaka/dev/DevCpu.hpp"
18 #include "alpaka/kernel/Traits.hpp"
19 #include "alpaka/meta/NdLoop.hpp"
21 
22 #include <functional>
23 #include <tuple>
24 #include <type_traits>
25 #include <utility>
26 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
27 # include <iostream>
28 #endif
29 
30 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
31 
32 namespace alpaka
33 {
34  //! The CPU serial execution task implementation.
35  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
36  class TaskKernelCpuSerial final : public WorkDivMembers<TDim, TIdx>
37  {
38  public:
39  template<typename TWorkDiv>
40  ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv&& workDiv, TKernelFnObj kernelFnObj, TArgs&&... args)
41  : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
42  , m_kernelFnObj(std::move(kernelFnObj))
43  , m_args(std::forward<TArgs>(args)...)
44  {
45  static_assert(
46  Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
47  "The work division and the execution task have to be of the same dimensionality!");
48  }
49 
50  //! Executes the kernel function object.
51  ALPAKA_FN_HOST auto operator()() const -> void
52  {
54 
55  auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
56  auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
57  auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
58 
59  // Get the size of the block shared dynamic memory.
60  auto const blockSharedMemDynSizeBytes = std::apply(
61  [&](std::decay_t<TArgs> const&... args)
62  {
63  return getBlockSharedMemDynSizeBytes<AccCpuSerial<TDim, TIdx>>(
64  m_kernelFnObj,
65  blockThreadExtent,
66  threadElemExtent,
67  args...);
68  },
69  m_args);
70 
71 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
72  std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
73  << std::endl;
74 # endif
75 
77  *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
78  blockSharedMemDynSizeBytes);
79 
80  if(blockThreadExtent.prod() != static_cast<TIdx>(1u))
81  {
82  throw std::runtime_error("A block for the serial accelerator can only ever have one single thread!");
83  }
84 
85  // Execute the blocks serially.
87  gridBlockExtent,
88  [&](Vec<TDim, TIdx> const& blockThreadIdx)
89  {
90  acc.m_gridBlockIdx = blockThreadIdx;
91 
92  std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
93 
94  // After a block has been processed, the shared memory has to be deleted.
95  freeSharedVars(acc);
96  });
97  }
98 
99  private:
100  TKernelFnObj m_kernelFnObj;
101  std::tuple<std::decay_t<TArgs>...> m_args;
102  };
103 
104  namespace trait
105  {
106  //! The CPU serial execution task accelerator type trait specialization.
107  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
108  struct AccType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
109  {
111  };
112 
113  //! The CPU serial execution task device type trait specialization.
114  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
115  struct DevType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
116  {
117  using type = DevCpu;
118  };
119 
120  //! The CPU serial execution task dimension getter trait specialization.
121  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
122  struct DimType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
123  {
124  using type = TDim;
125  };
126 
127  //! The CPU serial execution task platform type trait specialization.
128  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
129  struct PlatformType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
130  {
131  using type = PlatformCpu;
132  };
133 
134  //! The CPU serial execution task idx type trait specialization.
135  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
136  struct IdxType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
137  {
138  using type = TIdx;
139  };
140  } // namespace trait
141 } // namespace alpaka
142 
143 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
The CPU serial accelerator.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU serial execution task implementation.
ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv &&workDiv, TKernelFnObj kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition: NdLoop.hpp:81
The alpaka accelerator library.
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition: Traits.hpp:54
The CPU device platform.
Definition: PlatformCpu.hpp:18
The accelerator type trait.
Definition: Traits.hpp:37
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
The idx type trait.
Definition: Traits.hpp:25
The platform type trait.
Definition: Traits.hpp:30