alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuSerial.hpp
Go to the documentation of this file.
1/* Copyright 2025 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
24
25// Specialized traits.
26#include "alpaka/acc/Traits.hpp"
27#include "alpaka/dev/Traits.hpp"
28#include "alpaka/idx/Traits.hpp"
31
32// Implementation details.
33#include "alpaka/acc/Tag.hpp"
35#include "alpaka/dev/DevCpu.hpp"
36
37#ifdef __cpp_lib_format
38# include <format>
39#endif
40#include <memory>
41#include <string>
42
43#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
44
45namespace alpaka
46{
47 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
48 class TaskKernelCpuSerial;
49
50 //! The CPU serial accelerator.
51 //!
52 //! This accelerator allows serial kernel execution on a CPU device.
53 //! The block idx is restricted to 1x1x1 and all blocks are executed serially so there is no parallelism at all.
54 template<typename TDim, typename TIdx>
55 class AccCpuSerial final
56 : public WorkDivMembers<TDim, TIdx>
57 , public gb::IdxGbRef<TDim, TIdx>
58 , public bt::IdxBtZero<TDim, TIdx>
59 , public AtomicHierarchy<
60 AtomicCpu, // grid atomics
61 AtomicNoOp, // block atomics
62 AtomicNoOp> // thread atomics
63 , public math::MathStdLib
65 , public BlockSharedMemStMember<>
66 , public BlockSyncNoOp
67 , public IntrinsicCpu
68 , public MemFenceCpuSerial
69# ifdef ALPAKA_DISABLE_VENDOR_RNG
70 , public rand::RandDefault
71# else
72 , public rand::RandStdLib
73# endif
75 , public interface::Implements<ConceptAcc, AccCpuSerial<TDim, TIdx>>
76 {
77 static_assert(
78 sizeof(TIdx) >= sizeof(int),
79 "Index type is not supported, consider using int or a larger type.");
80
81 public:
82 // Partial specialization with the correct TDim and TIdx is not allowed.
83 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
84 friend class ::alpaka::TaskKernelCpuSerial;
85
86 AccCpuSerial(AccCpuSerial const&) = delete;
88 auto operator=(AccCpuSerial const&) -> AccCpuSerial& = delete;
89 auto operator=(AccCpuSerial&&) -> AccCpuSerial& = delete;
90
91 private:
92 template<typename TWorkDiv>
93 ALPAKA_FN_HOST AccCpuSerial(TWorkDiv const& workDiv, size_t const& blockSharedMemDynSizeBytes)
94 : WorkDivMembers<TDim, TIdx>(workDiv)
95 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
96 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
98 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
99 {
100 }
101
102 private:
103 // getIdx
104 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
105 };
106
107 namespace trait
108 {
109 //! The CPU serial accelerator accelerator type trait specialization.
110 template<typename TDim, typename TIdx>
111 struct AccType<AccCpuSerial<TDim, TIdx>>
112 {
113 using type = AccCpuSerial<TDim, TIdx>;
114 };
115
116 //! The CPU serial single thread accelerator type trait specialization.
117 template<typename TDim, typename TIdx>
118 struct IsSingleThreadAcc<AccCpuSerial<TDim, TIdx>> : std::true_type
119 {
120 };
121
122 //! The CPU serial multi thread accelerator type trait specialization.
123 template<typename TDim, typename TIdx>
124 struct IsMultiThreadAcc<AccCpuSerial<TDim, TIdx>> : std::false_type
125 {
126 };
127
128 //! The CPU serial accelerator device properties get trait specialization.
129 template<typename TDim, typename TIdx>
130 struct GetAccDevProps<AccCpuSerial<TDim, TIdx>>
131 {
132 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
133 {
134 return {// m_multiProcessorCount
135 static_cast<TIdx>(1),
136 // m_gridBlockExtentMax
137 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
138 // m_gridBlockCountMax
139 std::numeric_limits<TIdx>::max(),
140 // m_blockThreadExtentMax
142 // m_blockThreadCountMax
143 static_cast<TIdx>(1),
144 // m_threadElemExtentMax
145 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
146 // m_threadElemCountMax
147 std::numeric_limits<TIdx>::max(),
148 // m_sharedMemSizeBytes
149 static_cast<size_t>(AccCpuSerial<TDim, TIdx>::staticAllocBytes()),
150 // m_globalMemSizeBytes
151 getMemBytes(dev)};
152 }
153 };
154
155 //! The CPU serial accelerator name trait specialization.
156 template<typename TDim, typename TIdx>
157 struct GetAccName<AccCpuSerial<TDim, TIdx>>
158 {
159 ALPAKA_FN_HOST static auto getAccName() -> std::string
160 {
161# if ALPAKA_COMP_CLANG
162# pragma clang diagnostic push
163# pragma clang diagnostic ignored "-Wexit-time-destructors"
164# endif
165 using namespace std::literals;
166 static std::string const accName =
167# ifdef __cpp_lib_format
168 std::format("AccCpuSerial<{},{}>", TDim::value, core::demangled<TIdx>);
169# else
170 "AccCpuSerial<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>) + ">"s;
171# endif
172 return accName;
173# if ALPAKA_COMP_CLANG
174# pragma clang diagnostic pop
175# endif
176 }
177 };
178
179 //! The CPU serial accelerator device type trait specialization.
180 template<typename TDim, typename TIdx>
181 struct DevType<AccCpuSerial<TDim, TIdx>>
182 {
183 using type = DevCpu;
184 };
185
186 //! The CPU serial accelerator dimension getter trait specialization.
187 template<typename TDim, typename TIdx>
188 struct DimType<AccCpuSerial<TDim, TIdx>>
189 {
190 using type = TDim;
191 };
192
193 //! The CPU serial accelerator execution task type trait specialization.
194 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
195 struct CreateTaskKernel<AccCpuSerial<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
196 {
198 TWorkDiv const& workDiv,
199 TKernelFnObj const& kernelFnObj,
200 TArgs&&... args)
201 {
202 if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
203 {
204 throw std::runtime_error(
205 "The given work division is not valid for a single thread Acc: "
206 + getAccName<AccCpuSerial<TDim, TIdx>>() + ". Threads per block should be 1!");
207 }
208
209 return TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>(
210 workDiv,
211 kernelFnObj,
212 std::forward<TArgs>(args)...);
213 }
214 };
215
216 //! The CPU serial execution task platform type trait specialization.
217 template<typename TDim, typename TIdx>
218 struct PlatformType<AccCpuSerial<TDim, TIdx>>
219 {
220 using type = PlatformCpu;
221 };
222
223 //! The CPU serial accelerator idx type trait specialization.
224 template<typename TDim, typename TIdx>
225 struct IdxType<AccCpuSerial<TDim, TIdx>>
226 {
227 using type = TIdx;
228 };
229
230 template<typename TDim, typename TIdx>
231 struct AccToTag<alpaka::AccCpuSerial<TDim, TIdx>>
232 {
233 using type = alpaka::TagCpuSerial;
234 };
235
236 template<typename TDim, typename TIdx>
237 struct TagToAcc<alpaka::TagCpuSerial, TDim, TIdx>
238 {
240 };
241 } // namespace trait
242} // namespace alpaka
243
244#endif
The CPU serial accelerator.
auto operator=(AccCpuSerial &&) -> AccCpuSerial &=delete
auto operator=(AccCpuSerial const &) -> AccCpuSerial &=delete
AccCpuSerial(AccCpuSerial &&)=delete
AccCpuSerial(AccCpuSerial const &)=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
static constexpr auto staticAllocBytes() -> std::uint32_t
auto staticMemCapacity() const -> std::uint32_t
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
The no op block synchronization.
The CPU intrinsic.
The serial CPU memory fence.
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
A basic class holding the work division as grid block extent, block thread and thread element extent.
A zero block thread index provider.
Definition IdxBtZero.hpp:19
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:100
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69