alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuThreads.hpp
Go to the documentation of this file.
1/* Copyright 2024 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
23
24// Specialized traits.
25#include "alpaka/acc/Traits.hpp"
26#include "alpaka/dev/Traits.hpp"
27#include "alpaka/idx/Traits.hpp"
30
31// Implementation details.
32#include "alpaka/acc/Tag.hpp"
36#include "alpaka/dev/DevCpu.hpp"
37
38#include <memory>
39#include <thread>
40#include <typeinfo>
41
42#ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
43
44namespace alpaka
45{
46 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
47 class TaskKernelCpuThreads;
48
49 //! The CPU threads accelerator.
50 //!
51 //! This accelerator allows parallel kernel execution on a CPU device.
52 //! It uses std::thread to implement the parallelism.
53 template<typename TDim, typename TIdx>
54 class AccCpuThreads final
55 : public WorkDivMembers<TDim, TIdx>
56 , public gb::IdxGbRef<TDim, TIdx>
57 , public bt::IdxBtRefThreadIdMap<TDim, TIdx>
58 , public AtomicHierarchy<
59 AtomicCpu, // grid atomics
60 AtomicCpu, // block atomics
61 AtomicCpu> // thread atomics
62 , public math::MathStdLib
65 , public BlockSyncBarrierThread<TIdx>
66 , public IntrinsicCpu
67 , public MemFenceCpu
68# ifdef ALPAKA_DISABLE_VENDOR_RNG
69 , public rand::RandDefault
70# else
71 , public rand::RandStdLib
72# endif
74 , public interface::Implements<ConceptAcc, AccCpuThreads<TDim, TIdx>>
75 {
76 static_assert(
77 sizeof(TIdx) >= sizeof(int),
78 "Index type is not supported, consider using int or a larger type.");
79
80 public:
81 // Partial specialization with the correct TDim and TIdx is not allowed.
82 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
83 friend class ::alpaka::TaskKernelCpuThreads;
84
85 AccCpuThreads(AccCpuThreads const&) = delete;
87 auto operator=(AccCpuThreads const&) -> AccCpuThreads& = delete;
89
90 private:
91 template<typename TWorkDiv>
92 ALPAKA_FN_HOST AccCpuThreads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
93 : WorkDivMembers<TDim, TIdx>(workDiv)
94 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
95 , bt::IdxBtRefThreadIdMap<TDim, TIdx>(m_threadToIndexMap)
96 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
100 [this]() { syncBlockThreads(*this); },
101 [this]() noexcept { return (m_idMasterThread == std::this_thread::get_id()); })
102 , BlockSyncBarrierThread<TIdx>(getWorkDiv<Block, Threads>(workDiv).prod())
103 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
104 {
105 }
106
107 private:
108 // getIdx
109 std::mutex mutable m_mtxMapInsert; //!< The mutex used to secure insertion into the ThreadIdToIdxMap.
110 typename bt::IdxBtRefThreadIdMap<TDim, TIdx>::
111 ThreadIdToIdxMap mutable m_threadToIndexMap; //!< The mapping of thread id's to indices.
112 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
113
114 // allocBlockSharedArr
115 std::thread::id mutable m_idMasterThread; //!< The id of the master thread.
116 };
117
118 namespace trait
119 {
120 //! The CPU threads accelerator accelerator type trait specialization.
121 template<typename TDim, typename TIdx>
122 struct AccType<AccCpuThreads<TDim, TIdx>>
123 {
124 using type = AccCpuThreads<TDim, TIdx>;
125 };
126
127 //! The CPU threads single thread accelerator type trait specialization.
128 template<typename TDim, typename TIdx>
129 struct IsSingleThreadAcc<AccCpuThreads<TDim, TIdx>> : std::false_type
130 {
131 };
132
133 //! The CPU threads multi thread accelerator type trait specialization.
134 template<typename TDim, typename TIdx>
135 struct IsMultiThreadAcc<AccCpuThreads<TDim, TIdx>> : std::true_type
136 {
137 };
138
139 //! The CPU threads accelerator device properties get trait specialization.
140 template<typename TDim, typename TIdx>
141 struct GetAccDevProps<AccCpuThreads<TDim, TIdx>>
142 {
143 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
144 {
145# ifdef ALPAKA_CI
146 auto const blockThreadCountMax = static_cast<TIdx>(8);
147# else
148 // \TODO: Magic number. What is the maximum? Just set a reasonable value? There is a implementation
149 // defined maximum where the creation of a new thread crashes. std::thread::hardware_concurrency can
150 // return 0, so 1 is the default case?
151 auto const blockThreadCountMax = std::max(
152 static_cast<TIdx>(1),
153 alpaka::core::clipCast<TIdx>(std::thread::hardware_concurrency() * 8));
154# endif
155 auto const memBytes = getMemBytes(dev);
156 return {// m_multiProcessorCount
157 static_cast<TIdx>(1),
158 // m_gridBlockExtentMax
159 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
160 // m_gridBlockCountMax
161 std::numeric_limits<TIdx>::max(),
162 // m_blockThreadExtentMax
163 Vec<TDim, TIdx>::all(blockThreadCountMax),
164 // m_blockThreadCountMax
165 blockThreadCountMax,
166 // m_threadElemExtentMax
167 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
168 // m_threadElemCountMax
169 std::numeric_limits<TIdx>::max(),
170 // m_sharedMemSizeBytes
171 memBytes,
172 // m_globalMemSizeBytes
173 memBytes};
174 }
175 };
176
177 //! The CPU threads accelerator name trait specialization.
178 template<typename TDim, typename TIdx>
179 struct GetAccName<AccCpuThreads<TDim, TIdx>>
180 {
181 ALPAKA_FN_HOST static auto getAccName() -> std::string
182 {
183 return "AccCpuThreads<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
184 }
185 };
186
187 //! The CPU threads accelerator device type trait specialization.
188 template<typename TDim, typename TIdx>
189 struct DevType<AccCpuThreads<TDim, TIdx>>
190 {
191 using type = DevCpu;
192 };
193
194 //! The CPU threads accelerator dimension getter trait specialization.
195 template<typename TDim, typename TIdx>
196 struct DimType<AccCpuThreads<TDim, TIdx>>
197 {
198 using type = TDim;
199 };
200
201 //! The CPU threads accelerator execution task type trait specialization.
202 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
203 struct CreateTaskKernel<AccCpuThreads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
204 {
206 TWorkDiv const& workDiv,
207 TKernelFnObj const& kernelFnObj,
208 TArgs&&... args)
209 {
210 return TaskKernelCpuThreads<TDim, TIdx, TKernelFnObj, TArgs...>(
211 workDiv,
212 kernelFnObj,
213 std::forward<TArgs>(args)...);
214 }
215 };
216
217 //! The CPU threads execution task platform type trait specialization.
218 template<typename TDim, typename TIdx>
219 struct PlatformType<AccCpuThreads<TDim, TIdx>>
220 {
221 using type = PlatformCpu;
222 };
223
224 //! The CPU threads accelerator idx type trait specialization.
225 template<typename TDim, typename TIdx>
226 struct IdxType<AccCpuThreads<TDim, TIdx>>
227 {
228 using type = TIdx;
229 };
230
231 template<typename TDim, typename TIdx>
232 struct AccToTag<alpaka::AccCpuThreads<TDim, TIdx>>
233 {
234 using type = alpaka::TagCpuThreads;
235 };
236
237 template<typename TDim, typename TIdx>
238 struct TagToAcc<alpaka::TagCpuThreads, TDim, TIdx>
239 {
241 };
242 } // namespace trait
243} // namespace alpaka
244
245#endif
The CPU threads accelerator.
auto operator=(AccCpuThreads const &) -> AccCpuThreads &=delete
AccCpuThreads(AccCpuThreads &&)=delete
AccCpuThreads(AccCpuThreads const &)=delete
auto operator=(AccCpuThreads &&) -> AccCpuThreads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemCapacity() const -> std::uint32_t
The thread id map barrier block synchronization.
The CPU intrinsic.
The default CPU memory fence.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
The threads accelerator index provider.
ALPAKA_FN_HOST IdxBtRefThreadIdMap(ThreadIdToIdxMap const &mThreadToIndices)
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69