alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuOmp2Blocks.hpp
Go to the documentation of this file.
1/* Copyright 2025 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
25
26// Specialized traits.
27#include "alpaka/acc/Traits.hpp"
28#include "alpaka/dev/Traits.hpp"
29#include "alpaka/idx/Traits.hpp"
32
33// Implementation details.
34#include "alpaka/acc/Tag.hpp"
37#include "alpaka/dev/DevCpu.hpp"
38
39#ifdef __cpp_lib_format
40# include <format>
41#endif
42#include <limits>
43#include <string>
44
45#ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
46
47# if _OPENMP < 200203
48# error If ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
49# endif
50
51namespace alpaka
52{
53 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
55
56 //! The CPU OpenMP 2.0 block accelerator.
57 //!
58 //! This accelerator allows parallel kernel execution on a CPU device.
59 //! It uses OpenMP 2.0 to implement the grid block parallelism.
60 //! The block idx is restricted to 1x1x1.
61 template<typename TDim, typename TIdx>
62 class AccCpuOmp2Blocks final
63 : public WorkDivMembers<TDim, TIdx>
64 , public gb::IdxGbRef<TDim, TIdx>
65 , public bt::IdxBtZero<TDim, TIdx>
66 , public AtomicHierarchy<
67 AtomicCpu, // grid atomics
68 AtomicOmpBuiltIn, // block atomics
69 AtomicNoOp> // thread atomics
70 , public math::MathStdLib
72 , public BlockSharedMemStMember<>
73 , public BlockSyncNoOp
74 , public IntrinsicCpu
75 , public MemFenceOmp2Blocks
76# ifdef ALPAKA_DISABLE_VENDOR_RNG
77 , public rand::RandDefault
78# else
79 , public rand::RandStdLib
80# endif
82 , public interface::Implements<ConceptAcc, AccCpuOmp2Blocks<TDim, TIdx>>
83 {
84 static_assert(
85 sizeof(TIdx) >= sizeof(int),
86 "Index type is not supported, consider using int or a larger type.");
87
88 public:
89 // Partial specialization with the correct TDim and TIdx is not allowed.
90 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
91 friend class ::alpaka::TaskKernelCpuOmp2Blocks;
92
95 auto operator=(AccCpuOmp2Blocks const&) -> AccCpuOmp2Blocks& = delete;
97
98 private:
99 template<typename TWorkDiv>
100 ALPAKA_FN_HOST AccCpuOmp2Blocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
101 : WorkDivMembers<TDim, TIdx>(workDiv)
102 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
103 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
105 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
106 {
107 }
108
109 private:
110 // getIdx
111 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
112 };
113
114 namespace trait
115 {
116 //! The CPU OpenMP 2.0 block accelerator accelerator type trait specialization.
117 template<typename TDim, typename TIdx>
118 struct AccType<AccCpuOmp2Blocks<TDim, TIdx>>
119 {
120 using type = AccCpuOmp2Blocks<TDim, TIdx>;
121 };
122
123 //! The CPU OpenMP 2.0 block single thread accelerator type trait specialization.
124 template<typename TDim, typename TIdx>
125 struct IsSingleThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::true_type
126 {
127 };
128
129 //! The CPU OpenMP 2.0 block multi thread accelerator type trait specialization.
130 template<typename TDim, typename TIdx>
131 struct IsMultiThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::false_type
132 {
133 };
134
135 //! The CPU OpenMP 2.0 block accelerator device properties get trait specialization.
136 template<typename TDim, typename TIdx>
137 struct GetAccDevProps<AccCpuOmp2Blocks<TDim, TIdx>>
138 {
140 {
141 return {// m_multiProcessorCount
142 alpaka::core::clipCast<TIdx>(omp_get_max_threads()),
143 // m_gridBlockExtentMax
144 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
145 // m_gridBlockCountMax
146 std::numeric_limits<TIdx>::max(),
147 // m_blockThreadExtentMax
149 // m_blockThreadCountMax
150 static_cast<TIdx>(1),
151 // m_threadElemExtentMax
152 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
153 // m_threadElemCountMax
154 std::numeric_limits<TIdx>::max(),
155 // m_sharedMemSizeBytes
157 // m_globalMemSizeBytes
158 getMemBytes(dev)};
159 }
160 };
161
162 //! The CPU OpenMP 2.0 block accelerator name trait specialization.
163 template<typename TDim, typename TIdx>
164 struct GetAccName<AccCpuOmp2Blocks<TDim, TIdx>>
165 {
166 ALPAKA_FN_HOST static auto getAccName() -> std::string
167 {
168# if ALPAKA_COMP_CLANG
169# pragma clang diagnostic push
170# pragma clang diagnostic ignored "-Wexit-time-destructors"
171# endif
172 using namespace std::literals;
173 static std::string const accName =
174# ifdef __cpp_lib_format
175 std::format("AccCpuOmp2Blocks<{},{}>", TDim::value, core::demangled<TIdx>);
176# else
177 "AccCpuOmp2Blocks<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>)
178 + ">"s;
179# endif
180 return accName;
181# if ALPAKA_COMP_CLANG
182# pragma clang diagnostic pop
183# endif
184 }
185 };
186
187 //! The CPU OpenMP 2.0 block accelerator device type trait specialization.
188 template<typename TDim, typename TIdx>
189 struct DevType<AccCpuOmp2Blocks<TDim, TIdx>>
190 {
191 using type = DevCpu;
192 };
193
194 //! The CPU OpenMP 2.0 block accelerator dimension getter trait specialization.
195 template<typename TDim, typename TIdx>
196 struct DimType<AccCpuOmp2Blocks<TDim, TIdx>>
197 {
198 using type = TDim;
199 };
200
201 //! The CPU OpenMP 2.0 block accelerator execution task type trait specialization.
202 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
203 struct CreateTaskKernel<AccCpuOmp2Blocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
204 {
206 TWorkDiv const& workDiv,
207 TKernelFnObj const& kernelFnObj,
208 TArgs&&... args)
209 {
210 if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
211 {
212 throw std::runtime_error(
213 "The given work division is not valid for a single thread Acc: "
214 + getAccName<AccCpuOmp2Blocks<TDim, TIdx>>() + ". Threads per block should be 1!");
215 }
216
217 return TaskKernelCpuOmp2Blocks<TDim, TIdx, TKernelFnObj, TArgs...>(
218 workDiv,
219 kernelFnObj,
220 std::forward<TArgs>(args)...);
221 }
222 };
223
224 //! The CPU OpenMP 2.0 block execution task platform type trait specialization.
225 template<typename TDim, typename TIdx>
226 struct PlatformType<AccCpuOmp2Blocks<TDim, TIdx>>
227 {
228 using type = PlatformCpu;
229 };
230
231 //! The CPU OpenMP 2.0 block accelerator idx type trait specialization.
232 template<typename TDim, typename TIdx>
233 struct IdxType<AccCpuOmp2Blocks<TDim, TIdx>>
234 {
235 using type = TIdx;
236 };
237
238 template<typename TDim, typename TIdx>
239 struct AccToTag<alpaka::AccCpuOmp2Blocks<TDim, TIdx>>
240 {
241 using type = alpaka::TagCpuOmp2Blocks;
242 };
243
244 template<typename TDim, typename TIdx>
245 struct TagToAcc<alpaka::TagCpuOmp2Blocks, TDim, TIdx>
246 {
248 };
249 } // namespace trait
250} // namespace alpaka
251
252#endif
The CPU OpenMP 2.0 block accelerator.
AccCpuOmp2Blocks(AccCpuOmp2Blocks const &)=delete
auto operator=(AccCpuOmp2Blocks const &) -> AccCpuOmp2Blocks &=delete
AccCpuOmp2Blocks(AccCpuOmp2Blocks &&)=delete
auto operator=(AccCpuOmp2Blocks &&) -> AccCpuOmp2Blocks &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
static constexpr auto staticAllocBytes() -> std::uint32_t
auto staticMemCapacity() const -> std::uint32_t
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
The no op block synchronization.
The CPU device handle.
Definition DevCpu.hpp:56
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
The CPU OpenMP 2.0 block accelerator execution task.
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
A zero block thread index provider.
Definition IdxBtZero.hpp:19
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:100
The acceleration properties on a device.
The CPU device platform.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
The accelerator type trait.
Definition Traits.hpp:37
The kernel execution task creation trait.
Definition Traits.hpp:35
The device type trait.
Definition Traits.hpp:23
The dimension getter type trait.
Definition Traits.hpp:14
The device properties get trait.
Definition Traits.hpp:61
The accelerator name trait.
Definition Traits.hpp:68
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69
The idx type trait.
Definition Traits.hpp:25
The multi thread accelerator trait.
Definition Traits.hpp:56
The single thread accelerator trait.
Definition Traits.hpp:46
The platform type trait.
Definition Traits.hpp:30