alpaka
Abstraction Library for Parallel Kernel Acceleration
AccCpuSerial.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Base classes.
24 
25 // Specialized traits.
26 #include "alpaka/acc/Traits.hpp"
27 #include "alpaka/dev/Traits.hpp"
28 #include "alpaka/idx/Traits.hpp"
29 #include "alpaka/kernel/Traits.hpp"
31 
32 // Implementation details.
33 #include "alpaka/acc/Tag.hpp"
35 #include "alpaka/dev/DevCpu.hpp"
36 
37 #include <memory>
38 #include <typeinfo>
39 
40 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
41 
42 namespace alpaka
43 {
44  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
45  class TaskKernelCpuSerial;
46 
47  //! The CPU serial accelerator.
48  //!
49  //! This accelerator allows serial kernel execution on a CPU device.
50  //! The block idx is restricted to 1x1x1 and all blocks are executed serially so there is no parallelism at all.
51  template<typename TDim, typename TIdx>
52  class AccCpuSerial final
53  : public WorkDivMembers<TDim, TIdx>
54  , public gb::IdxGbRef<TDim, TIdx>
55  , public bt::IdxBtZero<TDim, TIdx>
56  , public AtomicHierarchy<
57  AtomicCpu, // grid atomics
58  AtomicNoOp, // block atomics
59  AtomicNoOp> // thread atomics
60  , public math::MathStdLib
61  , public BlockSharedMemDynMember<>
62  , public BlockSharedMemStMember<>
63  , public BlockSyncNoOp
64  , public IntrinsicCpu
65  , public MemFenceCpuSerial
66 # ifdef ALPAKA_DISABLE_VENDOR_RNG
67  , public rand::RandDefault
68 # else
69  , public rand::RandStdLib
70 # endif
71  , public warp::WarpSingleThread
72  , public interface::Implements<ConceptAcc, AccCpuSerial<TDim, TIdx>>
73  {
74  static_assert(
75  sizeof(TIdx) >= sizeof(int),
76  "Index type is not supported, consider using int or a larger type.");
77 
78  public:
79  // Partial specialization with the correct TDim and TIdx is not allowed.
80  template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
81  friend class ::alpaka::TaskKernelCpuSerial;
82 
83  AccCpuSerial(AccCpuSerial const&) = delete;
85  auto operator=(AccCpuSerial const&) -> AccCpuSerial& = delete;
86  auto operator=(AccCpuSerial&&) -> AccCpuSerial& = delete;
87 
88  private:
89  template<typename TWorkDiv>
90  ALPAKA_FN_HOST AccCpuSerial(TWorkDiv const& workDiv, size_t const& blockSharedMemDynSizeBytes)
91  : WorkDivMembers<TDim, TIdx>(workDiv)
92  , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
93  , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
95  , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
96  {
97  }
98 
99  private:
100  // getIdx
101  Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
102  };
103 
104  namespace trait
105  {
106  //! The CPU serial accelerator accelerator type trait specialization.
107  template<typename TDim, typename TIdx>
108  struct AccType<AccCpuSerial<TDim, TIdx>>
109  {
111  };
112 
113  //! The CPU serial single thread accelerator type trait specialization.
114  template<typename TDim, typename TIdx>
115  struct IsSingleThreadAcc<AccCpuSerial<TDim, TIdx>> : std::true_type
116  {
117  };
118 
119  //! The CPU serial multi thread accelerator type trait specialization.
120  template<typename TDim, typename TIdx>
121  struct IsMultiThreadAcc<AccCpuSerial<TDim, TIdx>> : std::false_type
122  {
123  };
124 
125  //! The CPU serial accelerator device properties get trait specialization.
126  template<typename TDim, typename TIdx>
127  struct GetAccDevProps<AccCpuSerial<TDim, TIdx>>
128  {
130  {
131  return {// m_multiProcessorCount
132  static_cast<TIdx>(1),
133  // m_gridBlockExtentMax
135  // m_gridBlockCountMax
137  // m_blockThreadExtentMax
139  // m_blockThreadCountMax
140  static_cast<TIdx>(1),
141  // m_threadElemExtentMax
143  // m_threadElemCountMax
145  // m_sharedMemSizeBytes
146  static_cast<size_t>(AccCpuSerial<TDim, TIdx>::staticAllocBytes()),
147  // m_globalMemSizeBytes
148  getMemBytes(dev)};
149  }
150  };
151 
152  //! The CPU serial accelerator name trait specialization.
153  template<typename TDim, typename TIdx>
154  struct GetAccName<AccCpuSerial<TDim, TIdx>>
155  {
156  ALPAKA_FN_HOST static auto getAccName() -> std::string
157  {
158  return "AccCpuSerial<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
159  }
160  };
161 
162  //! The CPU serial accelerator device type trait specialization.
163  template<typename TDim, typename TIdx>
164  struct DevType<AccCpuSerial<TDim, TIdx>>
165  {
166  using type = DevCpu;
167  };
168 
169  //! The CPU serial accelerator dimension getter trait specialization.
170  template<typename TDim, typename TIdx>
171  struct DimType<AccCpuSerial<TDim, TIdx>>
172  {
173  using type = TDim;
174  };
175 
176  //! The CPU serial accelerator execution task type trait specialization.
177  template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
178  struct CreateTaskKernel<AccCpuSerial<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
179  {
181  TWorkDiv const& workDiv,
182  TKernelFnObj const& kernelFnObj,
183  TArgs&&... args)
184  {
185  if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
186  {
187  throw std::runtime_error(
188  "The given work division is not valid for a single thread Acc: "
189  + getAccName<AccCpuSerial<TDim, TIdx>>() + ". Threads per block should be 1!");
190  }
191 
192  return TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>(
193  workDiv,
194  kernelFnObj,
195  std::forward<TArgs>(args)...);
196  }
197  };
198 
199  //! The CPU serial execution task platform type trait specialization.
200  template<typename TDim, typename TIdx>
201  struct PlatformType<AccCpuSerial<TDim, TIdx>>
202  {
203  using type = PlatformCpu;
204  };
205 
206  //! The CPU serial accelerator idx type trait specialization.
207  template<typename TDim, typename TIdx>
208  struct IdxType<AccCpuSerial<TDim, TIdx>>
209  {
210  using type = TIdx;
211  };
212 
213  template<typename TDim, typename TIdx>
214  struct AccToTag<alpaka::AccCpuSerial<TDim, TIdx>>
215  {
217  };
218 
219  template<typename TDim, typename TIdx>
220  struct TagToAcc<alpaka::TagCpuSerial, TDim, TIdx>
221  {
223  };
224  } // namespace trait
225 } // namespace alpaka
226 
227 #endif
The CPU serial accelerator.
auto operator=(AccCpuSerial &&) -> AccCpuSerial &=delete
auto operator=(AccCpuSerial const &) -> AccCpuSerial &=delete
AccCpuSerial(AccCpuSerial &&)=delete
AccCpuSerial(AccCpuSerial const &)=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
static constexpr auto staticAllocBytes() -> std::uint32_t
auto staticMemBegin() const -> uint8_t *
auto staticMemCapacity() const -> std::uint32_t
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
The no op block synchronization.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU intrinsic.
The serial CPU memory fence.
The CPU serial execution task implementation.
A n-dimensional vector.
Definition: Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition: Vec.hpp:133
A basic class holding the work division as grid block extent, block thread and thread element extent.
A zero block thread index provider.
Definition: IdxBtZero.hpp:19
A IdxGbRef grid block index.
Definition: IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition: IdxGbRef.hpp:22
The standard library math trait specializations.
Definition: MathStdLib.hpp:249
"Tiny" state mersenne twister implementation
Definition: RandStdLib.hpp:20
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition: Traits.hpp:100
The acceleration properties on a device.
Definition: AccDevProps.hpp:18
The CPU device platform.
Definition: PlatformCpu.hpp:18
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
The accelerator type trait.
Definition: Traits.hpp:37
static ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
The kernel execution task creation trait.
Definition: Traits.hpp:35
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
static ALPAKA_FN_HOST auto getAccDevProps(DevCpu const &dev) -> AccDevProps< TDim, TIdx >
The device properties get trait.
Definition: Traits.hpp:61
static ALPAKA_FN_HOST auto getAccName() -> std::string
The accelerator name trait.
Definition: Traits.hpp:68
The idx type trait.
Definition: Traits.hpp:25
The multi thread accelerator trait.
Definition: Traits.hpp:56
The single thread accelerator trait.
Definition: Traits.hpp:46
The platform type trait.
Definition: Traits.hpp:30