alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuOmp2Blocks.hpp
Go to the documentation of this file.
1/* Copyright 2024 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Base classes.
25
26// Specialized traits.
27#include "alpaka/acc/Traits.hpp"
28#include "alpaka/dev/Traits.hpp"
29#include "alpaka/idx/Traits.hpp"
32
33// Implementation details.
34#include "alpaka/acc/Tag.hpp"
37#include "alpaka/dev/DevCpu.hpp"
38
39#include <limits>
40#include <typeinfo>
41
42#ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
43
44# if _OPENMP < 200203
45# error If ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
46# endif
47
48namespace alpaka
49{
50 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
52
53 //! The CPU OpenMP 2.0 block accelerator.
54 //!
55 //! This accelerator allows parallel kernel execution on a CPU device.
56 //! It uses OpenMP 2.0 to implement the grid block parallelism.
57 //! The block idx is restricted to 1x1x1.
58 template<typename TDim, typename TIdx>
59 class AccCpuOmp2Blocks final
60 : public WorkDivMembers<TDim, TIdx>
61 , public gb::IdxGbRef<TDim, TIdx>
62 , public bt::IdxBtZero<TDim, TIdx>
63 , public AtomicHierarchy<
64 AtomicCpu, // grid atomics
65 AtomicOmpBuiltIn, // block atomics
66 AtomicNoOp> // thread atomics
67 , public math::MathStdLib
69 , public BlockSharedMemStMember<>
70 , public BlockSyncNoOp
71 , public IntrinsicCpu
72 , public MemFenceOmp2Blocks
73# ifdef ALPAKA_DISABLE_VENDOR_RNG
74 , public rand::RandDefault
75# else
76 , public rand::RandStdLib
77# endif
79 , public interface::Implements<ConceptAcc, AccCpuOmp2Blocks<TDim, TIdx>>
80 {
81 static_assert(
82 sizeof(TIdx) >= sizeof(int),
83 "Index type is not supported, consider using int or a larger type.");
84
85 public:
86 // Partial specialization with the correct TDim and TIdx is not allowed.
87 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
88 friend class ::alpaka::TaskKernelCpuOmp2Blocks;
89
92 auto operator=(AccCpuOmp2Blocks const&) -> AccCpuOmp2Blocks& = delete;
94
95 private:
96 template<typename TWorkDiv>
97 ALPAKA_FN_HOST AccCpuOmp2Blocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
98 : WorkDivMembers<TDim, TIdx>(workDiv)
99 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
100 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
102 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
103 {
104 }
105
106 private:
107 // getIdx
108 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
109 };
110
111 namespace trait
112 {
113 //! The CPU OpenMP 2.0 block accelerator accelerator type trait specialization.
114 template<typename TDim, typename TIdx>
115 struct AccType<AccCpuOmp2Blocks<TDim, TIdx>>
116 {
117 using type = AccCpuOmp2Blocks<TDim, TIdx>;
118 };
119
120 //! The CPU OpenMP 2.0 block single thread accelerator type trait specialization.
121 template<typename TDim, typename TIdx>
122 struct IsSingleThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::true_type
123 {
124 };
125
126 //! The CPU OpenMP 2.0 block multi thread accelerator type trait specialization.
127 template<typename TDim, typename TIdx>
128 struct IsMultiThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::false_type
129 {
130 };
131
132 //! The CPU OpenMP 2.0 block accelerator device properties get trait specialization.
133 template<typename TDim, typename TIdx>
134 struct GetAccDevProps<AccCpuOmp2Blocks<TDim, TIdx>>
135 {
137 {
138 return {// m_multiProcessorCount
139 alpaka::core::clipCast<TIdx>(omp_get_max_threads()),
140 // m_gridBlockExtentMax
141 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
142 // m_gridBlockCountMax
143 std::numeric_limits<TIdx>::max(),
144 // m_blockThreadExtentMax
146 // m_blockThreadCountMax
147 static_cast<TIdx>(1),
148 // m_threadElemExtentMax
149 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
150 // m_threadElemCountMax
151 std::numeric_limits<TIdx>::max(),
152 // m_sharedMemSizeBytes
154 // m_globalMemSizeBytes
155 getMemBytes(dev)};
156 }
157 };
158
159 //! The CPU OpenMP 2.0 block accelerator name trait specialization.
160 template<typename TDim, typename TIdx>
161 struct GetAccName<AccCpuOmp2Blocks<TDim, TIdx>>
162 {
163 ALPAKA_FN_HOST static auto getAccName() -> std::string
164 {
165 return "AccCpuOmp2Blocks<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
166 }
167 };
168
169 //! The CPU OpenMP 2.0 block accelerator device type trait specialization.
170 template<typename TDim, typename TIdx>
171 struct DevType<AccCpuOmp2Blocks<TDim, TIdx>>
172 {
173 using type = DevCpu;
174 };
175
176 //! The CPU OpenMP 2.0 block accelerator dimension getter trait specialization.
177 template<typename TDim, typename TIdx>
178 struct DimType<AccCpuOmp2Blocks<TDim, TIdx>>
179 {
180 using type = TDim;
181 };
182
183 //! The CPU OpenMP 2.0 block accelerator execution task type trait specialization.
184 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
185 struct CreateTaskKernel<AccCpuOmp2Blocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
186 {
188 TWorkDiv const& workDiv,
189 TKernelFnObj const& kernelFnObj,
190 TArgs&&... args)
191 {
192 if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
193 {
194 throw std::runtime_error(
195 "The given work division is not valid for a single thread Acc: "
196 + getAccName<AccCpuOmp2Blocks<TDim, TIdx>>() + ". Threads per block should be 1!");
197 }
198
199 return TaskKernelCpuOmp2Blocks<TDim, TIdx, TKernelFnObj, TArgs...>(
200 workDiv,
201 kernelFnObj,
202 std::forward<TArgs>(args)...);
203 }
204 };
205
206 //! The CPU OpenMP 2.0 block execution task platform type trait specialization.
207 template<typename TDim, typename TIdx>
208 struct PlatformType<AccCpuOmp2Blocks<TDim, TIdx>>
209 {
210 using type = PlatformCpu;
211 };
212
213 //! The CPU OpenMP 2.0 block accelerator idx type trait specialization.
214 template<typename TDim, typename TIdx>
215 struct IdxType<AccCpuOmp2Blocks<TDim, TIdx>>
216 {
217 using type = TIdx;
218 };
219
220 template<typename TDim, typename TIdx>
221 struct AccToTag<alpaka::AccCpuOmp2Blocks<TDim, TIdx>>
222 {
223 using type = alpaka::TagCpuOmp2Blocks;
224 };
225
226 template<typename TDim, typename TIdx>
227 struct TagToAcc<alpaka::TagCpuOmp2Blocks, TDim, TIdx>
228 {
230 };
231 } // namespace trait
232} // namespace alpaka
233
234#endif
The CPU OpenMP 2.0 block accelerator.
AccCpuOmp2Blocks(AccCpuOmp2Blocks const &)=delete
auto operator=(AccCpuOmp2Blocks const &) -> AccCpuOmp2Blocks &=delete
AccCpuOmp2Blocks(AccCpuOmp2Blocks &&)=delete
auto operator=(AccCpuOmp2Blocks &&) -> AccCpuOmp2Blocks &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
static constexpr auto staticAllocBytes() -> std::uint32_t
auto staticMemCapacity() const -> std::uint32_t
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
The no op block synchronization.
The CPU device handle.
Definition DevCpu.hpp:56
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
The CPU OpenMP 2.0 block accelerator execution task.
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
A basic class holding the work division as grid block extent, block thread and thread element extent.
A zero block thread index provider.
Definition IdxBtZero.hpp:19
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22
The standard library math trait specializations.
"Tiny" state mersenne twister implementation
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:100
The acceleration properties on a device.
The CPU device platform.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
The accelerator type trait.
Definition Traits.hpp:37
The kernel execution task creation trait.
Definition Traits.hpp:35
The device type trait.
Definition Traits.hpp:23
The dimension getter type trait.
Definition Traits.hpp:14
The device properties get trait.
Definition Traits.hpp:61
The accelerator name trait.
Definition Traits.hpp:68
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69
The idx type trait.
Definition Traits.hpp:25
The multi thread accelerator trait.
Definition Traits.hpp:56
The single thread accelerator trait.
Definition Traits.hpp:46
The platform type trait.
Definition Traits.hpp:30