alpaka
Abstraction Library for Parallel Kernel Acceleration
AccCpuThreads.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Base classes.
23 
24 // Specialized traits.
25 #include "alpaka/acc/Traits.hpp"
26 #include "alpaka/dev/Traits.hpp"
27 #include "alpaka/idx/Traits.hpp"
28 #include "alpaka/kernel/Traits.hpp"
30 
31 // Implementation details.
32 #include "alpaka/acc/Tag.hpp"
34 #include "alpaka/core/ClipCast.hpp"
35 #include "alpaka/core/Concepts.hpp"
36 #include "alpaka/dev/DevCpu.hpp"
37 
38 #include <memory>
39 #include <thread>
40 #include <typeinfo>
41 
42 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
43 
44 namespace alpaka
45 {
46  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
47  class TaskKernelCpuThreads;
48 
49  //! The CPU threads accelerator.
50  //!
51  //! This accelerator allows parallel kernel execution on a CPU device.
52  //! It uses std::thread to implement the parallelism.
53  template<typename TDim, typename TIdx>
54  class AccCpuThreads final
55  : public WorkDivMembers<TDim, TIdx>
56  , public gb::IdxGbRef<TDim, TIdx>
57  , public bt::IdxBtRefThreadIdMap<TDim, TIdx>
58  , public AtomicHierarchy<
59  AtomicCpu, // grid atomics
60  AtomicCpu, // block atomics
61  AtomicCpu> // thread atomics
62  , public math::MathStdLib
63  , public BlockSharedMemDynMember<>
65  , public BlockSyncBarrierThread<TIdx>
66  , public IntrinsicCpu
67  , public MemFenceCpu
68 # ifdef ALPAKA_DISABLE_VENDOR_RNG
69  , public rand::RandDefault
70 # else
71  , public rand::RandStdLib
72 # endif
73  , public warp::WarpSingleThread
74  , public concepts::Implements<ConceptAcc, AccCpuThreads<TDim, TIdx>>
75  {
76  static_assert(
77  sizeof(TIdx) >= sizeof(int),
78  "Index type is not supported, consider using int or a larger type.");
79 
80  public:
81  // Partial specialization with the correct TDim and TIdx is not allowed.
82  template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
83  friend class ::alpaka::TaskKernelCpuThreads;
84 
85  AccCpuThreads(AccCpuThreads const&) = delete;
87  auto operator=(AccCpuThreads const&) -> AccCpuThreads& = delete;
88  auto operator=(AccCpuThreads&&) -> AccCpuThreads& = delete;
89 
90  private:
91  template<typename TWorkDiv>
92  ALPAKA_FN_HOST AccCpuThreads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
93  : WorkDivMembers<TDim, TIdx>(workDiv)
94  , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
95  , bt::IdxBtRefThreadIdMap<TDim, TIdx>(m_threadToIndexMap)
96  , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
100  [this]() { syncBlockThreads(*this); },
101  [this]() noexcept { return (m_idMasterThread == std::this_thread::get_id()); })
102  , BlockSyncBarrierThread<TIdx>(getWorkDiv<Block, Threads>(workDiv).prod())
103  , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
104  {
105  }
106 
107  private:
108  // getIdx
109  std::mutex mutable m_mtxMapInsert; //!< The mutex used to secure insertion into the ThreadIdToIdxMap.
110  typename bt::IdxBtRefThreadIdMap<TDim, TIdx>::
111  ThreadIdToIdxMap mutable m_threadToIndexMap; //!< The mapping of thread id's to indices.
112  Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
113 
114  // allocBlockSharedArr
115  std::thread::id mutable m_idMasterThread; //!< The id of the master thread.
116  };
117 
118  namespace trait
119  {
120  //! The CPU threads accelerator accelerator type trait specialization.
121  template<typename TDim, typename TIdx>
122  struct AccType<AccCpuThreads<TDim, TIdx>>
123  {
125  };
126 
127  //! The CPU threads single thread accelerator type trait specialization.
128  template<typename TDim, typename TIdx>
129  struct IsSingleThreadAcc<AccCpuThreads<TDim, TIdx>> : std::false_type
130  {
131  };
132 
133  //! The CPU threads multi thread accelerator type trait specialization.
134  template<typename TDim, typename TIdx>
135  struct IsMultiThreadAcc<AccCpuThreads<TDim, TIdx>> : std::true_type
136  {
137  };
138 
139  //! The CPU threads accelerator device properties get trait specialization.
140  template<typename TDim, typename TIdx>
141  struct GetAccDevProps<AccCpuThreads<TDim, TIdx>>
142  {
144  {
145 # ifdef ALPAKA_CI
146  auto const blockThreadCountMax(static_cast<TIdx>(8));
147 # else
148  // \TODO: Magic number. What is the maximum? Just set a reasonable value? There is a implementation
149  // defined maximum where the creation of a new thread crashes. std::thread::hardware_concurrency can
150  // return 0, so 1 is the default case?
151  auto const blockThreadCountMax = std::max(
152  static_cast<TIdx>(1),
153  alpaka::core::clipCast<TIdx>(std::thread::hardware_concurrency() * 8));
154 # endif
155  auto const memBytes = getMemBytes(dev);
156  return {// m_multiProcessorCount
157  static_cast<TIdx>(1),
158  // m_gridBlockExtentMax
160  // m_gridBlockCountMax
162  // m_blockThreadExtentMax
163  Vec<TDim, TIdx>::all(blockThreadCountMax),
164  // m_blockThreadCountMax
165  blockThreadCountMax,
166  // m_threadElemExtentMax
168  // m_threadElemCountMax
170  // m_sharedMemSizeBytes
171  memBytes,
172  // m_globalMemSizeBytes
173  memBytes};
174  }
175  };
176 
177  //! The CPU threads accelerator name trait specialization.
178  template<typename TDim, typename TIdx>
179  struct GetAccName<AccCpuThreads<TDim, TIdx>>
180  {
181  ALPAKA_FN_HOST static auto getAccName() -> std::string
182  {
183  return "AccCpuThreads<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
184  }
185  };
186 
187  //! The CPU threads accelerator device type trait specialization.
188  template<typename TDim, typename TIdx>
189  struct DevType<AccCpuThreads<TDim, TIdx>>
190  {
191  using type = DevCpu;
192  };
193 
194  //! The CPU threads accelerator dimension getter trait specialization.
195  template<typename TDim, typename TIdx>
196  struct DimType<AccCpuThreads<TDim, TIdx>>
197  {
198  using type = TDim;
199  };
200 
201  //! The CPU threads accelerator execution task type trait specialization.
202  template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
203  struct CreateTaskKernel<AccCpuThreads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
204  {
206  TWorkDiv const& workDiv,
207  TKernelFnObj const& kernelFnObj,
208  TArgs&&... args)
209  {
210  return TaskKernelCpuThreads<TDim, TIdx, TKernelFnObj, TArgs...>(
211  workDiv,
212  kernelFnObj,
213  std::forward<TArgs>(args)...);
214  }
215  };
216 
217  //! The CPU threads execution task platform type trait specialization.
218  template<typename TDim, typename TIdx>
219  struct PlatformType<AccCpuThreads<TDim, TIdx>>
220  {
221  using type = PlatformCpu;
222  };
223 
224  //! The CPU threads accelerator idx type trait specialization.
225  template<typename TDim, typename TIdx>
226  struct IdxType<AccCpuThreads<TDim, TIdx>>
227  {
228  using type = TIdx;
229  };
230 
231  template<typename TDim, typename TIdx>
232  struct AccToTag<alpaka::AccCpuThreads<TDim, TIdx>>
233  {
235  };
236 
237  template<typename TDim, typename TIdx>
238  struct TagToAcc<alpaka::TagCpuThreads, TDim, TIdx>
239  {
241  };
242  } // namespace trait
243 } // namespace alpaka
244 
245 #endif
The CPU threads accelerator.
auto operator=(AccCpuThreads const &) -> AccCpuThreads &=delete
AccCpuThreads(AccCpuThreads &&)=delete
AccCpuThreads(AccCpuThreads const &)=delete
auto operator=(AccCpuThreads &&) -> AccCpuThreads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemBegin() const -> uint8_t *
auto staticMemCapacity() const -> std::uint32_t
The thread id map barrier block synchronization.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU intrinsic.
The default CPU memory fence.
Definition: MemFenceCpu.hpp:16
The CPU threads execution task.
A n-dimensional vector.
Definition: Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition: Vec.hpp:116
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto zeros() -> Vec< TDim, TIdx >
Zero value constructor.
Definition: Vec.hpp:126
A basic class holding the work division as grid block extent, block thread and thread element extent.
The threads accelerator index provider.
ALPAKA_FN_HOST IdxBtRefThreadIdMap(ThreadIdToIdxMap const &mThreadToIndices)
A IdxGbRef grid block index.
Definition: IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition: IdxGbRef.hpp:22
The standard library math trait specializations.
Definition: MathStdLib.hpp:249
"Tiny" state mersenne twister implementation
Definition: RandStdLib.hpp:20
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition: Traits.hpp:36
The acceleration properties on a device.
Definition: AccDevProps.hpp:18
The CPU device platform.
Definition: PlatformCpu.hpp:18
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
The accelerator type trait.
Definition: Traits.hpp:37
static ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
The kernel execution task creation trait.
Definition: Traits.hpp:34
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
static ALPAKA_FN_HOST auto getAccDevProps(DevCpu const &dev) -> AccDevProps< TDim, TIdx >
The device properties get trait.
Definition: Traits.hpp:61
static ALPAKA_FN_HOST auto getAccName() -> std::string
The accelerator name trait.
Definition: Traits.hpp:68
The idx type trait.
Definition: Traits.hpp:25
The multi thread accelerator trait.
Definition: Traits.hpp:56
The single thread accelerator trait.
Definition: Traits.hpp:46
The platform type trait.
Definition: Traits.hpp:30