alpaka
Abstraction Library for Parallel Kernel Acceleration
AccCpuOmp2Threads.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Base classes.
24 
25 // Specialized traits.
26 #include "alpaka/acc/Traits.hpp"
27 #include "alpaka/dev/Traits.hpp"
28 #include "alpaka/idx/Traits.hpp"
29 #include "alpaka/kernel/Traits.hpp"
31 
32 // Implementation details.
33 #include "alpaka/acc/Tag.hpp"
34 #include "alpaka/core/ClipCast.hpp"
36 #include "alpaka/dev/DevCpu.hpp"
37 
38 #include <limits>
39 #include <typeinfo>
40 
41 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
42 
43 # if _OPENMP < 200203
44 # error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
45 # endif
46 
47 # include <omp.h>
48 
49 namespace alpaka
50 {
51  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
52  class TaskKernelCpuOmp2Threads;
53 
54  //! The CPU OpenMP 2.0 thread accelerator.
55  //!
56  //! This accelerator allows parallel kernel execution on a CPU device.
57  //! It uses OpenMP 2.0 to implement the block thread parallelism.
58  template<typename TDim, typename TIdx>
59  class AccCpuOmp2Threads final
60  : public WorkDivMembers<TDim, TIdx>
61  , public gb::IdxGbRef<TDim, TIdx>
62  , public bt::IdxBtOmp<TDim, TIdx>
63  , public AtomicHierarchy<
64  AtomicCpu, // grid atomics
65  AtomicOmpBuiltIn, // block atomics
66  AtomicOmpBuiltIn> // thread atomics
67  , public math::MathStdLib
68  , public BlockSharedMemDynMember<>
70  , public BlockSyncBarrierOmp
71  , public IntrinsicCpu
72  , public MemFenceOmp2Threads
73 # ifdef ALPAKA_DISABLE_VENDOR_RNG
74  , public rand::RandDefault
75 # else
76  , public rand::RandStdLib
77 # endif
78  , public warp::WarpSingleThread
79  , public interface::Implements<ConceptAcc, AccCpuOmp2Threads<TDim, TIdx>>
80  {
81  static_assert(
82  sizeof(TIdx) >= sizeof(int),
83  "Index type is not supported, consider using int or a larger type.");
84 
85  public:
86  // Partial specialization with the correct TDim and TIdx is not allowed.
87  template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
88  friend class ::alpaka::TaskKernelCpuOmp2Threads;
89 
92  auto operator=(AccCpuOmp2Threads const&) -> AccCpuOmp2Threads& = delete;
94 
95  private:
96  template<typename TWorkDiv>
97  ALPAKA_FN_HOST AccCpuOmp2Threads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
98  : WorkDivMembers<TDim, TIdx>(workDiv)
99  , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
100  , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
102  staticMemBegin(),
104  [this]() { syncBlockThreads(*this); },
105  []() noexcept { return (::omp_get_thread_num() == 0); })
106  , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
107  {
108  }
109 
110  private:
111  // getIdx
112  Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
113  };
114 
115  namespace trait
116  {
117  //! The CPU OpenMP 2.0 thread accelerator accelerator type trait specialization.
118  template<typename TDim, typename TIdx>
119  struct AccType<AccCpuOmp2Threads<TDim, TIdx>>
120  {
122  };
123 
124  //! The CPU OpenMP 2.0 thread single thread accelerator type trait specialization.
125  template<typename TDim, typename TIdx>
126  struct IsSingleThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::false_type
127  {
128  };
129 
130  //! The CPU OpenMP 2.0 thread multi thread accelerator type trait specialization.
131  template<typename TDim, typename TIdx>
132  struct IsMultiThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::true_type
133  {
134  };
135 
136  //! The CPU OpenMP 2.0 thread accelerator device properties get trait specialization.
137  template<typename TDim, typename TIdx>
138  struct GetAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>
139  {
141  {
142 # ifdef ALPAKA_CI
143  auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(std::min(4, ::omp_get_max_threads()));
144 # else
145  auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(::omp_get_max_threads());
146 # endif
147  auto const memBytes = getMemBytes(dev);
148  return {// m_multiProcessorCount
149  static_cast<TIdx>(1),
150  // m_gridBlockExtentMax
152  // m_gridBlockCountMax
154  // m_blockThreadExtentMax
155  Vec<TDim, TIdx>::all(blockThreadCountMax),
156  // m_blockThreadCountMax
157  blockThreadCountMax,
158  // m_threadElemExtentMax
160  // m_threadElemCountMax
162  // m_sharedMemSizeBytes
163  memBytes,
164  // m_globalMemSizeBytes
165  memBytes};
166  }
167  };
168 
169  //! The CPU OpenMP 2.0 thread accelerator name trait specialization.
170  template<typename TDim, typename TIdx>
171  struct GetAccName<AccCpuOmp2Threads<TDim, TIdx>>
172  {
173  ALPAKA_FN_HOST static auto getAccName() -> std::string
174  {
175  return "AccCpuOmp2Threads<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
176  }
177  };
178 
179  //! The CPU OpenMP 2.0 thread accelerator device type trait specialization.
180  template<typename TDim, typename TIdx>
181  struct DevType<AccCpuOmp2Threads<TDim, TIdx>>
182  {
183  using type = DevCpu;
184  };
185 
186  //! The CPU OpenMP 2.0 thread accelerator dimension getter trait specialization.
187  template<typename TDim, typename TIdx>
188  struct DimType<AccCpuOmp2Threads<TDim, TIdx>>
189  {
190  using type = TDim;
191  };
192 
193  //! The CPU OpenMP 2.0 thread accelerator execution task type trait specialization.
194  template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
195  struct CreateTaskKernel<AccCpuOmp2Threads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
196  {
198  TWorkDiv const& workDiv,
199  TKernelFnObj const& kernelFnObj,
200  TArgs&&... args)
201  {
202  return TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>(
203  workDiv,
204  kernelFnObj,
205  std::forward<TArgs>(args)...);
206  }
207  };
208 
209  //! The CPU OpenMP 2.0 thread execution task platform type trait specialization.
210  template<typename TDim, typename TIdx>
211  struct PlatformType<AccCpuOmp2Threads<TDim, TIdx>>
212  {
213  using type = PlatformCpu;
214  };
215 
216  //! The CPU OpenMP 2.0 thread accelerator idx type trait specialization.
217  template<typename TDim, typename TIdx>
218  struct IdxType<AccCpuOmp2Threads<TDim, TIdx>>
219  {
220  using type = TIdx;
221  };
222 
223  template<typename TDim, typename TIdx>
224  struct AccToTag<alpaka::AccCpuOmp2Threads<TDim, TIdx>>
225  {
227  };
228 
229  template<typename TDim, typename TIdx>
230  struct TagToAcc<alpaka::TagCpuOmp2Threads, TDim, TIdx>
231  {
233  };
234  } // namespace trait
235 } // namespace alpaka
236 
237 #endif
The CPU OpenMP 2.0 thread accelerator.
AccCpuOmp2Threads(AccCpuOmp2Threads &&)=delete
AccCpuOmp2Threads(AccCpuOmp2Threads const &)=delete
auto operator=(AccCpuOmp2Threads &&) -> AccCpuOmp2Threads &=delete
auto operator=(AccCpuOmp2Threads const &) -> AccCpuOmp2Threads &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
auto staticMemBegin() const -> uint8_t *
auto staticMemCapacity() const -> std::uint32_t
The OpenMP barrier block synchronization.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
The CPU OpenMP 2.0 thread accelerator execution task.
A n-dimensional vector.
Definition: Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition: Vec.hpp:116
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto zeros() -> Vec< TDim, TIdx >
Zero value constructor.
Definition: Vec.hpp:126
A basic class holding the work division as grid block extent, block thread and thread element extent.
The OpenMP accelerator index provider.
Definition: IdxBtOmp.hpp:26
A IdxGbRef grid block index.
Definition: IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition: IdxGbRef.hpp:22
The standard library math trait specializations.
Definition: MathStdLib.hpp:249
"Tiny" state mersenne twister implementation
Definition: RandStdLib.hpp:20
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition: Traits.hpp:36
The acceleration properties on a device.
Definition: AccDevProps.hpp:18
The CPU device platform.
Definition: PlatformCpu.hpp:18
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
The accelerator type trait.
Definition: Traits.hpp:37
static ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
The kernel execution task creation trait.
Definition: Traits.hpp:35
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
static ALPAKA_FN_HOST auto getAccDevProps(DevCpu const &dev) -> alpaka::AccDevProps< TDim, TIdx >
The device properties get trait.
Definition: Traits.hpp:61
static ALPAKA_FN_HOST auto getAccName() -> std::string
The accelerator name trait.
Definition: Traits.hpp:68
The idx type trait.
Definition: Traits.hpp:25
The multi thread accelerator trait.
Definition: Traits.hpp:56
The single thread accelerator trait.
Definition: Traits.hpp:46
The platform type trait.
Definition: Traits.hpp:30