alpaka
Abstraction Library for Parallel Kernel Acceleration
AccCpuTbbBlocks.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Huebl, Benjamin Worpitz, Erik Zenker, RenĂ© Widera, Jan Stephan, Bernhard Manfred Gruber,
2  * Andrea Bocci
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
8 // Base classes.
25 
26 // Specialized traits.
27 #include "alpaka/acc/Traits.hpp"
28 #include "alpaka/dev/Traits.hpp"
29 #include "alpaka/idx/Traits.hpp"
30 #include "alpaka/kernel/Traits.hpp"
32 
33 // Implementation details.
34 #include "alpaka/acc/Tag.hpp"
35 #include "alpaka/core/ClipCast.hpp"
37 #include "alpaka/dev/DevCpu.hpp"
38 
39 #include <memory>
40 #include <typeinfo>
41 
42 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
43 
44 # include <tbb/tbb.h>
45 
46 namespace alpaka
47 {
48  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
49  class TaskKernelCpuTbbBlocks;
50 
51  //! The CPU TBB block accelerator.
52  template<typename TDim, typename TIdx>
53  class AccCpuTbbBlocks final
54  : public WorkDivMembers<TDim, TIdx>
55  , public gb::IdxGbRef<TDim, TIdx>
56  , public bt::IdxBtZero<TDim, TIdx>
57  , public AtomicHierarchy<
58  AtomicCpu, // grid atomics
59  AtomicCpu, // block atomics
60  AtomicNoOp> // thread atomics
61  , public math::MathStdLib
62  , public BlockSharedMemDynMember<>
63  , public BlockSharedMemStMember<>
64  , public BlockSyncNoOp
65  , public IntrinsicCpu
66  , public MemFenceCpu
67 # ifdef ALPAKA_DISABLE_VENDOR_RNG
68  , public rand::RandDefault
69 # else
70  , public rand::RandStdLib
71 # endif
72  , public warp::WarpSingleThread
73  , public interface::Implements<ConceptAcc, AccCpuTbbBlocks<TDim, TIdx>>
74  {
75  static_assert(
76  sizeof(TIdx) >= sizeof(int),
77  "Index type is not supported, consider using int or a larger type.");
78 
79  public:
80  // Partial specialization with the correct TDim and TIdx is not allowed.
81  template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
82  friend class ::alpaka::TaskKernelCpuTbbBlocks;
83 
84  AccCpuTbbBlocks(AccCpuTbbBlocks const&) = delete;
85  AccCpuTbbBlocks(AccCpuTbbBlocks&&) = delete;
86  auto operator=(AccCpuTbbBlocks const&) -> AccCpuTbbBlocks& = delete;
87  auto operator=(AccCpuTbbBlocks&&) -> AccCpuTbbBlocks& = delete;
88 
89  private:
90  template<typename TWorkDiv>
91  ALPAKA_FN_HOST AccCpuTbbBlocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
92  : WorkDivMembers<TDim, TIdx>(workDiv)
93  , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
94  , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
95  , BlockSharedMemStMember<>(staticMemBegin(), staticMemCapacity())
96  , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
97  {
98  }
99 
100  private:
101  // getIdx
102  Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
103  };
104 
105  namespace trait
106  {
107  //! The CPU TBB block accelerator type trait specialization.
108  template<typename TDim, typename TIdx>
109  struct AccType<AccCpuTbbBlocks<TDim, TIdx>>
110  {
111  using type = AccCpuTbbBlocks<TDim, TIdx>;
112  };
113 
114  //! The CPU TBB block single thread accelerator type trait specialization.
115  template<typename TDim, typename TIdx>
116  struct IsSingleThreadAcc<AccCpuTbbBlocks<TDim, TIdx>> : std::true_type
117  {
118  };
119 
120  //! The CPU TBB block multi thread accelerator type trait specialization.
121  template<typename TDim, typename TIdx>
122  struct IsMultiThreadAcc<AccCpuTbbBlocks<TDim, TIdx>> : std::false_type
123  {
124  };
125 
126  //! The CPU TBB block accelerator device properties get trait specialization.
127  template<typename TDim, typename TIdx>
128  struct GetAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>
129  {
130  ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
131  {
132  return {// m_multiProcessorCount
133  alpaka::core::clipCast<TIdx>(tbb::this_task_arena::max_concurrency()),
134  // m_gridBlockExtentMax
136  // m_gridBlockCountMax
138  // m_blockThreadExtentMax
140  // m_blockThreadCountMax
141  static_cast<TIdx>(1),
142  // m_threadElemExtentMax
144  // m_threadElemCountMax
146  // m_sharedMemSizeBytes
147  static_cast<size_t>(AccCpuTbbBlocks<TDim, TIdx>::staticAllocBytes()),
148  // m_globalMemSizeBytes
149  getMemBytes(dev)};
150  }
151  };
152 
153  //! The CPU TBB block accelerator name trait specialization.
154  template<typename TDim, typename TIdx>
155  struct GetAccName<AccCpuTbbBlocks<TDim, TIdx>>
156  {
157  ALPAKA_FN_HOST static auto getAccName() -> std::string
158  {
159  return "AccCpuTbbBlocks<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";
160  }
161  };
162 
163  //! The CPU TBB block accelerator device type trait specialization.
164  template<typename TDim, typename TIdx>
165  struct DevType<AccCpuTbbBlocks<TDim, TIdx>>
166  {
167  using type = DevCpu;
168  };
169 
170  //! The CPU TBB block accelerator dimension getter trait specialization.
171  template<typename TDim, typename TIdx>
172  struct DimType<AccCpuTbbBlocks<TDim, TIdx>>
173  {
174  using type = TDim;
175  };
176 
177  //! The CPU TBB block accelerator execution task type trait specialization.
178  template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
179  struct CreateTaskKernel<AccCpuTbbBlocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
180  {
181  ALPAKA_FN_HOST static auto createTaskKernel(
182  TWorkDiv const& workDiv,
183  TKernelFnObj const& kernelFnObj,
184  TArgs&&... args)
185  {
186  if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
187  {
188  throw std::runtime_error(
189  "The given work division is not valid for a single thread Acc: "
190  + getAccName<AccCpuTbbBlocks<TDim, TIdx>>() + ". Threads per block should be 1!");
191  }
192 
193  return TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>(
194  workDiv,
195  kernelFnObj,
196  std::forward<TArgs>(args)...);
197  }
198  };
199 
200  //! The CPU TBB block execution task platform type trait specialization.
201  template<typename TDim, typename TIdx>
202  struct PlatformType<AccCpuTbbBlocks<TDim, TIdx>>
203  {
204  using type = PlatformCpu;
205  };
206 
207  //! The CPU TBB block accelerator idx type trait specialization.
208  template<typename TDim, typename TIdx>
209  struct IdxType<AccCpuTbbBlocks<TDim, TIdx>>
210  {
211  using type = TIdx;
212  };
213 
214  template<typename TDim, typename TIdx>
215  struct AccToTag<alpaka::AccCpuTbbBlocks<TDim, TIdx>>
216  {
217  using type = alpaka::TagCpuTbbBlocks;
218  };
219 
220  template<typename TDim, typename TIdx>
221  struct TagToAcc<alpaka::TagCpuTbbBlocks, TDim, TIdx>
222  {
223  using type = alpaka::AccCpuTbbBlocks<TDim, TIdx>;
224  };
225  } // namespace trait
226 } // namespace alpaka
227 
228 #endif
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition: Vec.hpp:133
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition: Vec.hpp:116
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
TinyMersenneTwister RandStdLib
Definition: RandStdLib.hpp:23
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc >>
Definition: Traits.hpp:90
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition: Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition: Traits.hpp:95
ALPAKA_FN_HOST_ACC Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition: Traits.hpp:100
alpaka::meta::InheritFromList< alpaka::meta::Unique< std::tuple< TGridAtomic, TBlockAtomic, TThreadAtomic, interface::Implements< ConceptAtomicGrids, TGridAtomic >, interface::Implements< ConceptAtomicBlocks, TBlockAtomic >, interface::Implements< ConceptAtomicThreads, TThreadAtomic > >> > AtomicHierarchy
build a single class to inherit from different atomic implementations
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition: Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition: Tag.hpp:74
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition: Traits.hpp:69