alpaka
Abstraction Library for Parallel Kernel Acceleration
AccCpuOmp2Blocks.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Huebl, Benjamin Worpitz, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Base classes.
25 
26 // Specialized traits.
27 #include "alpaka/acc/Traits.hpp"
28 #include "alpaka/dev/Traits.hpp"
29 #include "alpaka/idx/Traits.hpp"
30 #include "alpaka/kernel/Traits.hpp"
32 
33 // Implementation details.
34 #include "alpaka/acc/Tag.hpp"
35 #include "alpaka/core/ClipCast.hpp"
37 #include "alpaka/dev/DevCpu.hpp"
38 
39 #include <limits>
40 #include <typeinfo>
41 
42 #ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
43 
44 # if _OPENMP < 200203
45 # error If ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
46 # endif
47 
48 namespace alpaka
49 {
50  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
52 
53  //! The CPU OpenMP 2.0 block accelerator.
54  //!
55  //! This accelerator allows parallel kernel execution on a CPU device.
56  //! It uses OpenMP 2.0 to implement the grid block parallelism.
57  //! The block idx is restricted to 1x1x1.
58  template<typename TDim, typename TIdx>
59  class AccCpuOmp2Blocks final
60  : public WorkDivMembers<TDim, TIdx>
61  , public gb::IdxGbRef<TDim, TIdx>
62  , public bt::IdxBtZero<TDim, TIdx>
63  , public AtomicHierarchy<
64  AtomicCpu, // grid atomics
65  AtomicOmpBuiltIn, // block atomics
66  AtomicNoOp> // thread atomics
67  , public math::MathStdLib
68  , public BlockSharedMemDynMember<>
69  , public BlockSharedMemStMember<>
70  , public BlockSyncNoOp
71  , public IntrinsicCpu
72  , public MemFenceOmp2Blocks
73 # ifdef ALPAKA_DISABLE_VENDOR_RNG
74  , public rand::RandDefault
75 # else
76  , public rand::RandStdLib
77 # endif
78  , public warp::WarpSingleThread
79  , public interface::Implements<ConceptAcc, AccCpuOmp2Blocks<TDim, TIdx>>
80  {
81  static_assert(
82  sizeof(TIdx) >= sizeof(int),
83  "Index type is not supported, consider using int or a larger type.");
84 
85  public:
86  // Partial specialization with the correct TDim and TIdx is not allowed.
87  template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
88  friend class ::alpaka::TaskKernelCpuOmp2Blocks;
89 
92  auto operator=(AccCpuOmp2Blocks const&) -> AccCpuOmp2Blocks& = delete;
94 
95  private:
96  template<typename TWorkDiv>
97  ALPAKA_FN_HOST AccCpuOmp2Blocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
98  : WorkDivMembers<TDim, TIdx>(workDiv)
99  , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
100  , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
102  , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
103  {
104  }
105 
106  private:
107  // getIdx
108  Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
109  };
110 
111  namespace trait
112  {
113  //! The CPU OpenMP 2.0 block accelerator accelerator type trait specialization.
114  template<typename TDim, typename TIdx>
115  struct AccType<AccCpuOmp2Blocks<TDim, TIdx>>
116  {
118  };
119 
120  //! The CPU OpenMP 2.0 block single thread accelerator type trait specialization.
121  template<typename TDim, typename TIdx>
122  struct IsSingleThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::true_type
123  {
124  };
125 
126  //! The CPU OpenMP 2.0 block multi thread accelerator type trait specialization.
127  template<typename TDim, typename TIdx>
128  struct IsMultiThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::false_type
129  {
130  };
131 
132  //! The CPU OpenMP 2.0 block accelerator device properties get trait specialization.
133  template<typename TDim, typename TIdx>
134  struct GetAccDevProps<AccCpuOmp2Blocks<TDim, TIdx>>
135  {
137  {
138  return {// m_multiProcessorCount
139  alpaka::core::clipCast<TIdx>(omp_get_max_threads()),
140  // m_gridBlockExtentMax
142  // m_gridBlockCountMax
144  // m_blockThreadExtentMax
146  // m_blockThreadCountMax
147  static_cast<TIdx>(1),
148  // m_threadElemExtentMax
150  // m_threadElemCountMax
152  // m_sharedMemSizeBytes
153  static_cast<size_t>(AccCpuOmp2Blocks<TDim, TIdx>::staticAllocBytes()),
154  // m_globalMemSizeBytes
155  getMemBytes(dev)};
156  }
157  };
158 
159  //! The CPU OpenMP 2.0 block accelerator name trait specialization.
160  template<typename TDim, typename TIdx>
161  struct GetAccName<AccCpuOmp2Blocks<TDim, TIdx>>
162  {
163  ALPAKA_FN_HOST static auto getAccName() -> std::string
164  {
165  return "AccCpuOmp2Blocks<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
166  }
167  };
168 
169  //! The CPU OpenMP 2.0 block accelerator device type trait specialization.
170  template<typename TDim, typename TIdx>
171  struct DevType<AccCpuOmp2Blocks<TDim, TIdx>>
172  {
173  using type = DevCpu;
174  };
175 
176  //! The CPU OpenMP 2.0 block accelerator dimension getter trait specialization.
177  template<typename TDim, typename TIdx>
178  struct DimType<AccCpuOmp2Blocks<TDim, TIdx>>
179  {
180  using type = TDim;
181  };
182 
183  //! The CPU OpenMP 2.0 block accelerator execution task type trait specialization.
184  template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
185  struct CreateTaskKernel<AccCpuOmp2Blocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
186  {
188  TWorkDiv const& workDiv,
189  TKernelFnObj const& kernelFnObj,
190  TArgs&&... args)
191  {
192  if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
193  {
194  throw std::runtime_error(
195  "The given work division is not valid for a single thread Acc: "
196  + getAccName<AccCpuOmp2Blocks<TDim, TIdx>>() + ". Threads per block should be 1!");
197  }
198 
199  return TaskKernelCpuOmp2Blocks<TDim, TIdx, TKernelFnObj, TArgs...>(
200  workDiv,
201  kernelFnObj,
202  std::forward<TArgs>(args)...);
203  }
204  };
205 
206  //! The CPU OpenMP 2.0 block execution task platform type trait specialization.
207  template<typename TDim, typename TIdx>
208  struct PlatformType<AccCpuOmp2Blocks<TDim, TIdx>>
209  {
210  using type = PlatformCpu;
211  };
212 
213  //! The CPU OpenMP 2.0 block accelerator idx type trait specialization.
214  template<typename TDim, typename TIdx>
215  struct IdxType<AccCpuOmp2Blocks<TDim, TIdx>>
216  {
217  using type = TIdx;
218  };
219 
220  template<typename TDim, typename TIdx>
221  struct AccToTag<alpaka::AccCpuOmp2Blocks<TDim, TIdx>>
222  {
224  };
225 
226  template<typename TDim, typename TIdx>
227  struct TagToAcc<alpaka::TagCpuOmp2Blocks, TDim, TIdx>
228  {
230  };
231  } // namespace trait
232 } // namespace alpaka
233 
234 #endif
The CPU OpenMP 2.0 block accelerator.
AccCpuOmp2Blocks(AccCpuOmp2Blocks const &)=delete
auto operator=(AccCpuOmp2Blocks const &) -> AccCpuOmp2Blocks &=delete
AccCpuOmp2Blocks(AccCpuOmp2Blocks &&)=delete
auto operator=(AccCpuOmp2Blocks &&) -> AccCpuOmp2Blocks &=delete
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
static constexpr auto staticAllocBytes() -> std::uint32_t
auto staticMemBegin() const -> uint8_t *
auto staticMemCapacity() const -> std::uint32_t
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
The no op block synchronization.
The CPU device handle.
Definition: DevCpu.hpp:56
The CPU intrinsic.
The CPU OpenMP 2.0 block memory fence.
The CPU OpenMP 2.0 block accelerator execution task.
A n-dimensional vector.
Definition: Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition: Vec.hpp:133
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition: Vec.hpp:116
A basic class holding the work division as grid block extent, block thread and thread element extent.
A zero block thread index provider.
Definition: IdxBtZero.hpp:19
A IdxGbRef grid block index.
Definition: IdxGbRef.hpp:20
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition: IdxGbRef.hpp:22
The standard library math trait specializations.
Definition: MathStdLib.hpp:249
"Tiny" state mersenne twister implementation
Definition: RandStdLib.hpp:20
The single-threaded warp to emulate it on CPUs.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition: Traits.hpp:100
The acceleration properties on a device.
Definition: AccDevProps.hpp:18
The CPU device platform.
Definition: PlatformCpu.hpp:18
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
The accelerator type trait.
Definition: Traits.hpp:37
static ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
The kernel execution task creation trait.
Definition: Traits.hpp:35
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
static ALPAKA_FN_HOST auto getAccDevProps(DevCpu const &dev) -> alpaka::AccDevProps< TDim, TIdx >
The device properties get trait.
Definition: Traits.hpp:61
static ALPAKA_FN_HOST auto getAccName() -> std::string
The accelerator name trait.
Definition: Traits.hpp:68
The idx type trait.
Definition: Traits.hpp:25
The multi thread accelerator trait.
Definition: Traits.hpp:56
The single thread accelerator trait.
Definition: Traits.hpp:46
The platform type trait.
Definition: Traits.hpp:30