alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuThreads.hpp
Go to the documentation of this file.
1/* Copyright 2025 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
23
24// Specialized traits.
25#include "alpaka/acc/Traits.hpp"
26#include "alpaka/dev/Traits.hpp"
27#include "alpaka/idx/Traits.hpp"
30
31// Implementation details.
32#include "alpaka/acc/Tag.hpp"
36#include "alpaka/dev/DevCpu.hpp"
37
38#ifdef __cpp_lib_format
39# include <format>
40#endif
41#include <memory>
42#include <string>
43#include <thread>
44
45#ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
46
47namespace alpaka
48{
49 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
50 class TaskKernelCpuThreads;
51
52 //! The CPU threads accelerator.
53 //!
54 //! This accelerator allows parallel kernel execution on a CPU device.
55 //! It uses std::thread to implement the parallelism.
56 template<typename TDim, typename TIdx>
57 class AccCpuThreads final
58 : public WorkDivMembers<TDim, TIdx>
59 , public gb::IdxGbRef<TDim, TIdx>
60 , public bt::IdxBtRefThreadIdMap<TDim, TIdx>
61 , public AtomicHierarchy<
62 AtomicCpu, // grid atomics
63 AtomicCpu, // block atomics
64 AtomicCpu> // thread atomics
65 , public math::MathStdLib
68 , public BlockSyncBarrierThread<TIdx>
69 , public IntrinsicCpu
70 , public MemFenceCpu
71# ifdef ALPAKA_DISABLE_VENDOR_RNG
72 , public rand::RandDefault
73# else
74 , public rand::RandStdLib
75# endif
77 , public interface::Implements<ConceptAcc, AccCpuThreads<TDim, TIdx>>
78 {
79 static_assert(
80 sizeof(TIdx) >= sizeof(int),
81 "Index type is not supported, consider using int or a larger type.");
82
83 public:
84 // Partial specialization with the correct TDim and TIdx is not allowed.
85 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
86 friend class ::alpaka::TaskKernelCpuThreads;
87
88 AccCpuThreads(AccCpuThreads const&) = delete;
90 auto operator=(AccCpuThreads const&) -> AccCpuThreads& = delete;
92
93 private:
94 template<typename TWorkDiv>
95 ALPAKA_FN_HOST AccCpuThreads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
96 : WorkDivMembers<TDim, TIdx>(workDiv)
97 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
98 , bt::IdxBtRefThreadIdMap<TDim, TIdx>(m_threadToIndexMap)
99 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
103 [this]() { syncBlockThreads(*this); },
104 [this]() noexcept { return (m_idMasterThread == std::this_thread::get_id()); })
105 , BlockSyncBarrierThread<TIdx>(getWorkDiv<Block, Threads>(workDiv).prod())
106 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
107 {
108 }
109
110 private:
111 // getIdx
112 std::mutex mutable m_mtxMapInsert; //!< The mutex used to secure insertion into the ThreadIdToIdxMap.
113 typename bt::IdxBtRefThreadIdMap<TDim, TIdx>::
114 ThreadIdToIdxMap mutable m_threadToIndexMap; //!< The mapping of thread id's to indices.
115 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
116
117 // allocBlockSharedArr
118 std::thread::id mutable m_idMasterThread; //!< The id of the master thread.
119 };
120
121 namespace trait
122 {
123 //! The CPU threads accelerator accelerator type trait specialization.
124 template<typename TDim, typename TIdx>
125 struct AccType<AccCpuThreads<TDim, TIdx>>
126 {
127 using type = AccCpuThreads<TDim, TIdx>;
128 };
129
130 //! The CPU threads single thread accelerator type trait specialization.
131 template<typename TDim, typename TIdx>
132 struct IsSingleThreadAcc<AccCpuThreads<TDim, TIdx>> : std::false_type
133 {
134 };
135
136 //! The CPU threads multi thread accelerator type trait specialization.
137 template<typename TDim, typename TIdx>
138 struct IsMultiThreadAcc<AccCpuThreads<TDim, TIdx>> : std::true_type
139 {
140 };
141
142 //! The CPU threads accelerator device properties get trait specialization.
143 template<typename TDim, typename TIdx>
144 struct GetAccDevProps<AccCpuThreads<TDim, TIdx>>
145 {
146 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
147 {
148# ifdef ALPAKA_CI
149 auto const blockThreadCountMax = static_cast<TIdx>(8);
150# else
151 // \TODO: Magic number. What is the maximum? Just set a reasonable value? There is a implementation
152 // defined maximum where the creation of a new thread crashes. std::thread::hardware_concurrency can
153 // return 0, so 1 is the default case?
154 auto const blockThreadCountMax = std::max(
155 static_cast<TIdx>(1),
156 alpaka::core::clipCast<TIdx>(std::thread::hardware_concurrency() * 8));
157# endif
158 auto const memBytes = getMemBytes(dev);
159 return {// m_multiProcessorCount
160 static_cast<TIdx>(1),
161 // m_gridBlockExtentMax
162 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
163 // m_gridBlockCountMax
164 std::numeric_limits<TIdx>::max(),
165 // m_blockThreadExtentMax
166 Vec<TDim, TIdx>::all(blockThreadCountMax),
167 // m_blockThreadCountMax
168 blockThreadCountMax,
169 // m_threadElemExtentMax
170 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
171 // m_threadElemCountMax
172 std::numeric_limits<TIdx>::max(),
173 // m_sharedMemSizeBytes
174 memBytes,
175 // m_globalMemSizeBytes
176 memBytes};
177 }
178 };
179
180 //! The CPU threads accelerator name trait specialization.
181 template<typename TDim, typename TIdx>
182 struct GetAccName<AccCpuThreads<TDim, TIdx>>
183 {
184 ALPAKA_FN_HOST static auto getAccName() -> std::string
185 {
186# if ALPAKA_COMP_CLANG
187# pragma clang diagnostic push
188# pragma clang diagnostic ignored "-Wexit-time-destructors"
189# endif
190 using namespace std::literals;
191 static std::string const accName =
192# ifdef __cpp_lib_format
193 std::format("AccCpuThreads<{},{}>", TDim::value, core::demangled<TIdx>);
194# else
195 "AccCpuThreads<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>) + ">"s;
196# endif
197 return accName;
198# if ALPAKA_COMP_CLANG
199# pragma clang diagnostic pop
200# endif
201 }
202 };
203
204 //! The CPU threads accelerator device type trait specialization.
205 template<typename TDim, typename TIdx>
206 struct DevType<AccCpuThreads<TDim, TIdx>>
207 {
208 using type = DevCpu;
209 };
210
211 //! The CPU threads accelerator dimension getter trait specialization.
212 template<typename TDim, typename TIdx>
213 struct DimType<AccCpuThreads<TDim, TIdx>>
214 {
215 using type = TDim;
216 };
217
218 //! The CPU threads accelerator execution task type trait specialization.
219 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
220 struct CreateTaskKernel<AccCpuThreads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
221 {
223 TWorkDiv const& workDiv,
224 TKernelFnObj const& kernelFnObj,
225 TArgs&&... args)
226 {
227 return TaskKernelCpuThreads<TDim, TIdx, TKernelFnObj, TArgs...>(
228 workDiv,
229 kernelFnObj,
230 std::forward<TArgs>(args)...);
231 }
232 };
233
234 //! The CPU threads execution task platform type trait specialization.
235 template<typename TDim, typename TIdx>
236 struct PlatformType<AccCpuThreads<TDim, TIdx>>
237 {
238 using type = PlatformCpu;
239 };
240
241 //! The CPU threads accelerator idx type trait specialization.
242 template<typename TDim, typename TIdx>
243 struct IdxType<AccCpuThreads<TDim, TIdx>>
244 {
245 using type = TIdx;
246 };
247
248 template<typename TDim, typename TIdx>
249 struct AccToTag<alpaka::AccCpuThreads<TDim, TIdx>>
250 {
251 using type = alpaka::TagCpuThreads;
252 };
253
254 template<typename TDim, typename TIdx>
255 struct TagToAcc<alpaka::TagCpuThreads, TDim, TIdx>
256 {
258 };
259 } // namespace trait
260} // namespace alpaka
261
262#endif
The CPU threads accelerator.
auto operator=(AccCpuThreads const &) -> AccCpuThreads &=delete
AccCpuThreads(AccCpuThreads &&)=delete
AccCpuThreads(AccCpuThreads const &)=delete
auto operator=(AccCpuThreads &&) -> AccCpuThreads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemCapacity() const -> std::uint32_t
The thread id map barrier block synchronization.
The CPU intrinsic.
The default CPU memory fence.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
The threads accelerator index provider.
ALPAKA_FN_HOST IdxBtRefThreadIdMap(ThreadIdToIdxMap const &mThreadToIndices)
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69