alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuOmp2Threads.hpp
Go to the documentation of this file.
1/* Copyright 2024 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
24
25// Specialized traits.
26#include "alpaka/acc/Traits.hpp"
27#include "alpaka/dev/Traits.hpp"
28#include "alpaka/idx/Traits.hpp"
31
32// Implementation details.
33#include "alpaka/acc/Tag.hpp"
36#include "alpaka/dev/DevCpu.hpp"
37
38#include <limits>
39#include <typeinfo>
40
41#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
42
43# if _OPENMP < 200203
44# error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
45# endif
46
47# include <omp.h>
48
49namespace alpaka
50{
51 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
52 class TaskKernelCpuOmp2Threads;
53
54 //! The CPU OpenMP 2.0 thread accelerator.
55 //!
56 //! This accelerator allows parallel kernel execution on a CPU device.
57 //! It uses OpenMP 2.0 to implement the block thread parallelism.
58 template<typename TDim, typename TIdx>
60 : public WorkDivMembers<TDim, TIdx>
61 , public gb::IdxGbRef<TDim, TIdx>
62 , public bt::IdxBtOmp<TDim, TIdx>
63 , public AtomicHierarchy<
64 AtomicCpu, // grid atomics
65 AtomicOmpBuiltIn, // block atomics
66 AtomicOmpBuiltIn> // thread atomics
67 , public math::MathStdLib
70 , public BlockSyncBarrierOmp
71 , public IntrinsicCpu
72 , public MemFenceOmp2Threads
73# ifdef ALPAKA_DISABLE_VENDOR_RNG
74 , public rand::RandDefault
75# else
76 , public rand::RandStdLib
77# endif
79 , public interface::Implements<ConceptAcc, AccCpuOmp2Threads<TDim, TIdx>>
80 {
81 static_assert(
82 sizeof(TIdx) >= sizeof(int),
83 "Index type is not supported, consider using int or a larger type.");
84
85 public:
86 // Partial specialization with the correct TDim and TIdx is not allowed.
87 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
88 friend class ::alpaka::TaskKernelCpuOmp2Threads;
89
94
95 private:
96 template<typename TWorkDiv>
97 ALPAKA_FN_HOST AccCpuOmp2Threads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
98 : WorkDivMembers<TDim, TIdx>(workDiv)
99 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
100 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
104 [this]() { syncBlockThreads(*this); },
105 []() noexcept { return (::omp_get_thread_num() == 0); })
106 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
107 {
108 }
109
110 private:
111 // getIdx
112 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
113 };
114
115 namespace trait
116 {
117 //! The CPU OpenMP 2.0 thread accelerator accelerator type trait specialization.
118 template<typename TDim, typename TIdx>
119 struct AccType<AccCpuOmp2Threads<TDim, TIdx>>
120 {
121 using type = AccCpuOmp2Threads<TDim, TIdx>;
122 };
123
124 //! The CPU OpenMP 2.0 thread single thread accelerator type trait specialization.
125 template<typename TDim, typename TIdx>
126 struct IsSingleThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::false_type
127 {
128 };
129
130 //! The CPU OpenMP 2.0 thread multi thread accelerator type trait specialization.
131 template<typename TDim, typename TIdx>
132 struct IsMultiThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::true_type
133 {
134 };
135
136 //! The CPU OpenMP 2.0 thread accelerator device properties get trait specialization.
137 template<typename TDim, typename TIdx>
138 struct GetAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>
139 {
140 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> alpaka::AccDevProps<TDim, TIdx>
141 {
142# ifdef ALPAKA_CI
143 auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(std::min(4, ::omp_get_max_threads()));
144# else
145 auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(::omp_get_max_threads());
146# endif
147 auto const memBytes = getMemBytes(dev);
148 return {// m_multiProcessorCount
149 static_cast<TIdx>(1),
150 // m_gridBlockExtentMax
151 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
152 // m_gridBlockCountMax
153 std::numeric_limits<TIdx>::max(),
154 // m_blockThreadExtentMax
155 Vec<TDim, TIdx>::all(blockThreadCountMax),
156 // m_blockThreadCountMax
157 blockThreadCountMax,
158 // m_threadElemExtentMax
159 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
160 // m_threadElemCountMax
161 std::numeric_limits<TIdx>::max(),
162 // m_sharedMemSizeBytes
163 memBytes,
164 // m_globalMemSizeBytes
165 memBytes};
166 }
167 };
168
169 //! The CPU OpenMP 2.0 thread accelerator name trait specialization.
170 template<typename TDim, typename TIdx>
171 struct GetAccName<AccCpuOmp2Threads<TDim, TIdx>>
172 {
173 ALPAKA_FN_HOST static auto getAccName() -> std::string
174 {
175 return "AccCpuOmp2Threads<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
176 }
177 };
178
179 //! The CPU OpenMP 2.0 thread accelerator device type trait specialization.
180 template<typename TDim, typename TIdx>
181 struct DevType<AccCpuOmp2Threads<TDim, TIdx>>
182 {
183 using type = DevCpu;
184 };
185
186 //! The CPU OpenMP 2.0 thread accelerator dimension getter trait specialization.
187 template<typename TDim, typename TIdx>
188 struct DimType<AccCpuOmp2Threads<TDim, TIdx>>
189 {
190 using type = TDim;
191 };
192
193 //! The CPU OpenMP 2.0 thread accelerator execution task type trait specialization.
194 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
195 struct CreateTaskKernel<AccCpuOmp2Threads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
196 {
198 TWorkDiv const& workDiv,
199 TKernelFnObj const& kernelFnObj,
200 TArgs&&... args)
201 {
202 return TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>(
203 workDiv,
204 kernelFnObj,
205 std::forward<TArgs>(args)...);
206 }
207 };
208
209 //! The CPU OpenMP 2.0 thread execution task platform type trait specialization.
210 template<typename TDim, typename TIdx>
211 struct PlatformType<AccCpuOmp2Threads<TDim, TIdx>>
212 {
213 using type = PlatformCpu;
214 };
215
216 //! The CPU OpenMP 2.0 thread accelerator idx type trait specialization.
217 template<typename TDim, typename TIdx>
218 struct IdxType<AccCpuOmp2Threads<TDim, TIdx>>
219 {
220 using type = TIdx;
221 };
222
223 template<typename TDim, typename TIdx>
224 struct AccToTag<alpaka::AccCpuOmp2Threads<TDim, TIdx>>
225 {
226 using type = alpaka::TagCpuOmp2Threads;
227 };
228
229 template<typename TDim, typename TIdx>
230 struct TagToAcc<alpaka::TagCpuOmp2Threads, TDim, TIdx>
231 {
233 };
234 } // namespace trait
235} // namespace alpaka
236
237#endif
The CPU OpenMP 2.0 thread accelerator.
AccCpuOmp2Threads(AccCpuOmp2Threads &&)=delete
AccCpuOmp2Threads(AccCpuOmp2Threads const &)=delete
auto operator=(AccCpuOmp2Threads &&) -> AccCpuOmp2Threads &=delete
auto operator=(AccCpuOmp2Threads const &) -> AccCpuOmp2Threads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemCapacity() const -> std::uint32_t
The OpenMP barrier block synchronization.
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
The OpenMP accelerator index provider.
Definition IdxBtOmp.hpp:26
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
The acceleration properties on a device.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69