alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuOmp2Threads.hpp
Go to the documentation of this file.
1/* Copyright 2025 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
24
25// Specialized traits.
26#include "alpaka/acc/Traits.hpp"
27#include "alpaka/dev/Traits.hpp"
28#include "alpaka/idx/Traits.hpp"
31
32// Implementation details.
33#include "alpaka/acc/Tag.hpp"
36#include "alpaka/dev/DevCpu.hpp"
37
38#ifdef __cpp_lib_format
39# include <format>
40#endif
41#include <limits>
42#include <string>
43
44#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
45
46# if _OPENMP < 200203
47# error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
48# endif
49
50# include <omp.h>
51
52namespace alpaka
53{
54 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
55 class TaskKernelCpuOmp2Threads;
56
57 //! The CPU OpenMP 2.0 thread accelerator.
58 //!
59 //! This accelerator allows parallel kernel execution on a CPU device.
60 //! It uses OpenMP 2.0 to implement the block thread parallelism.
61 template<typename TDim, typename TIdx>
63 : public WorkDivMembers<TDim, TIdx>
64 , public gb::IdxGbRef<TDim, TIdx>
65 , public bt::IdxBtOmp<TDim, TIdx>
66 , public AtomicHierarchy<
67 AtomicCpu, // grid atomics
68 AtomicOmpBuiltIn, // block atomics
69 AtomicOmpBuiltIn> // thread atomics
70 , public math::MathStdLib
73 , public BlockSyncBarrierOmp
74 , public IntrinsicCpu
75 , public MemFenceOmp2Threads
76# ifdef ALPAKA_DISABLE_VENDOR_RNG
77 , public rand::RandDefault
78# else
79 , public rand::RandStdLib
80# endif
82 , public interface::Implements<ConceptAcc, AccCpuOmp2Threads<TDim, TIdx>>
83 {
84 static_assert(
85 sizeof(TIdx) >= sizeof(int),
86 "Index type is not supported, consider using int or a larger type.");
87
88 public:
89 // Partial specialization with the correct TDim and TIdx is not allowed.
90 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
91 friend class ::alpaka::TaskKernelCpuOmp2Threads;
92
97
98 private:
99 template<typename TWorkDiv>
100 ALPAKA_FN_HOST AccCpuOmp2Threads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
101 : WorkDivMembers<TDim, TIdx>(workDiv)
102 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
103 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
107 [this]() { syncBlockThreads(*this); },
108 []() noexcept { return (::omp_get_thread_num() == 0); })
109 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
110 {
111 }
112
113 private:
114 // getIdx
115 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
116 };
117
118 namespace trait
119 {
120 //! The CPU OpenMP 2.0 thread accelerator accelerator type trait specialization.
121 template<typename TDim, typename TIdx>
122 struct AccType<AccCpuOmp2Threads<TDim, TIdx>>
123 {
124 using type = AccCpuOmp2Threads<TDim, TIdx>;
125 };
126
127 //! The CPU OpenMP 2.0 thread single thread accelerator type trait specialization.
128 template<typename TDim, typename TIdx>
129 struct IsSingleThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::false_type
130 {
131 };
132
133 //! The CPU OpenMP 2.0 thread multi thread accelerator type trait specialization.
134 template<typename TDim, typename TIdx>
135 struct IsMultiThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::true_type
136 {
137 };
138
139 //! The CPU OpenMP 2.0 thread accelerator device properties get trait specialization.
140 template<typename TDim, typename TIdx>
141 struct GetAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>
142 {
143 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> alpaka::AccDevProps<TDim, TIdx>
144 {
145# ifdef ALPAKA_CI
146 auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(std::min(4, ::omp_get_max_threads()));
147# else
148 auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(::omp_get_max_threads());
149# endif
150 auto const memBytes = getMemBytes(dev);
151 return {// m_multiProcessorCount
152 static_cast<TIdx>(1),
153 // m_gridBlockExtentMax
154 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
155 // m_gridBlockCountMax
156 std::numeric_limits<TIdx>::max(),
157 // m_blockThreadExtentMax
158 Vec<TDim, TIdx>::all(blockThreadCountMax),
159 // m_blockThreadCountMax
160 blockThreadCountMax,
161 // m_threadElemExtentMax
162 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
163 // m_threadElemCountMax
164 std::numeric_limits<TIdx>::max(),
165 // m_sharedMemSizeBytes
166 memBytes,
167 // m_globalMemSizeBytes
168 memBytes};
169 }
170 };
171
172 //! The CPU OpenMP 2.0 thread accelerator name trait specialization.
173 template<typename TDim, typename TIdx>
174 struct GetAccName<AccCpuOmp2Threads<TDim, TIdx>>
175 {
176 ALPAKA_FN_HOST static auto getAccName() -> std::string
177 {
178# if ALPAKA_COMP_CLANG
179# pragma clang diagnostic push
180# pragma clang diagnostic ignored "-Wexit-time-destructors"
181# endif
182 using namespace std::literals;
183 static std::string const accName =
184# ifdef __cpp_lib_format
185 std::format("AccCpuOmp2Threads<{},{}>", TDim::value, core::demangled<TIdx>);
186# else
187 "AccCpuOmp2Threads<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>)
188 + ">"s;
189# endif
190 return accName;
191# if ALPAKA_COMP_CLANG
192# pragma clang diagnostic pop
193# endif
194 }
195 };
196
197 //! The CPU OpenMP 2.0 thread accelerator device type trait specialization.
198 template<typename TDim, typename TIdx>
199 struct DevType<AccCpuOmp2Threads<TDim, TIdx>>
200 {
201 using type = DevCpu;
202 };
203
204 //! The CPU OpenMP 2.0 thread accelerator dimension getter trait specialization.
205 template<typename TDim, typename TIdx>
206 struct DimType<AccCpuOmp2Threads<TDim, TIdx>>
207 {
208 using type = TDim;
209 };
210
211 //! The CPU OpenMP 2.0 thread accelerator execution task type trait specialization.
212 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
213 struct CreateTaskKernel<AccCpuOmp2Threads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
214 {
216 TWorkDiv const& workDiv,
217 TKernelFnObj const& kernelFnObj,
218 TArgs&&... args)
219 {
220 return TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>(
221 workDiv,
222 kernelFnObj,
223 std::forward<TArgs>(args)...);
224 }
225 };
226
227 //! The CPU OpenMP 2.0 thread execution task platform type trait specialization.
228 template<typename TDim, typename TIdx>
229 struct PlatformType<AccCpuOmp2Threads<TDim, TIdx>>
230 {
231 using type = PlatformCpu;
232 };
233
234 //! The CPU OpenMP 2.0 thread accelerator idx type trait specialization.
235 template<typename TDim, typename TIdx>
236 struct IdxType<AccCpuOmp2Threads<TDim, TIdx>>
237 {
238 using type = TIdx;
239 };
240
241 template<typename TDim, typename TIdx>
242 struct AccToTag<alpaka::AccCpuOmp2Threads<TDim, TIdx>>
243 {
244 using type = alpaka::TagCpuOmp2Threads;
245 };
246
247 template<typename TDim, typename TIdx>
248 struct TagToAcc<alpaka::TagCpuOmp2Threads, TDim, TIdx>
249 {
251 };
252 } // namespace trait
253} // namespace alpaka
254
255#endif
The CPU OpenMP 2.0 thread accelerator.
AccCpuOmp2Threads(AccCpuOmp2Threads &&)=delete
AccCpuOmp2Threads(AccCpuOmp2Threads const &)=delete
auto operator=(AccCpuOmp2Threads &&) -> AccCpuOmp2Threads &=delete
auto operator=(AccCpuOmp2Threads const &) -> AccCpuOmp2Threads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemCapacity() const -> std::uint32_t
The OpenMP barrier block synchronization.
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
The OpenMP accelerator index provider.
Definition IdxBtOmp.hpp:26
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
The acceleration properties on a device.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69