alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AccCpuTbbBlocks.hpp
Go to the documentation of this file.
1/* Copyright 2025 Axel Huebl, Benjamin Worpitz, Erik Zenker, René Widera, Jan Stephan, Bernhard Manfred Gruber,
2 * Andrea Bocci
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
8// Base classes.
25
26// Specialized traits.
27#include "alpaka/acc/Traits.hpp"
28#include "alpaka/dev/Traits.hpp"
29#include "alpaka/idx/Traits.hpp"
32
33// Implementation details.
34#include "alpaka/acc/Tag.hpp"
37#include "alpaka/dev/DevCpu.hpp"
38
39#ifdef __cpp_lib_format
40# include <format>
41#endif
42#include <string>
43#include <typeinfo>
44
45#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
46
47# include <tbb/tbb.h>
48
49namespace alpaka
50{
51 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
52 class TaskKernelCpuTbbBlocks;
53
54 //! The CPU TBB block accelerator.
55 template<typename TDim, typename TIdx>
56 class AccCpuTbbBlocks final
57 : public WorkDivMembers<TDim, TIdx>
58 , public gb::IdxGbRef<TDim, TIdx>
59 , public bt::IdxBtZero<TDim, TIdx>
60 , public AtomicHierarchy<
61 AtomicCpu, // grid atomics
62 AtomicCpu, // block atomics
63 AtomicNoOp> // thread atomics
64 , public math::MathStdLib
65 , public BlockSharedMemDynMember<>
66 , public BlockSharedMemStMember<>
67 , public BlockSyncNoOp
68 , public IntrinsicCpu
69 , public MemFenceCpu
70# ifdef ALPAKA_DISABLE_VENDOR_RNG
71 , public rand::RandDefault
72# else
73 , public rand::RandStdLib
74# endif
75 , public warp::WarpSingleThread
76 , public interface::Implements<ConceptAcc, AccCpuTbbBlocks<TDim, TIdx>>
77 {
78 static_assert(
79 sizeof(TIdx) >= sizeof(int),
80 "Index type is not supported, consider using int or a larger type.");
81
82 public:
83 // Partial specialization with the correct TDim and TIdx is not allowed.
84 template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>
85 friend class ::alpaka::TaskKernelCpuTbbBlocks;
86
87 AccCpuTbbBlocks(AccCpuTbbBlocks const&) = delete;
88 AccCpuTbbBlocks(AccCpuTbbBlocks&&) = delete;
89 auto operator=(AccCpuTbbBlocks const&) -> AccCpuTbbBlocks& = delete;
90 auto operator=(AccCpuTbbBlocks&&) -> AccCpuTbbBlocks& = delete;
91
92 private:
93 template<typename TWorkDiv>
94 ALPAKA_FN_HOST AccCpuTbbBlocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
95 : WorkDivMembers<TDim, TIdx>(workDiv)
96 , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
97 , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
98 , BlockSharedMemStMember<>(staticMemBegin(), staticMemCapacity())
99 , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
100 {
101 }
102
103 private:
104 // getIdx
105 Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.
106 };
107
108 namespace trait
109 {
110 //! The CPU TBB block accelerator type trait specialization.
111 template<typename TDim, typename TIdx>
112 struct AccType<AccCpuTbbBlocks<TDim, TIdx>>
113 {
114 using type = AccCpuTbbBlocks<TDim, TIdx>;
115 };
116
117 //! The CPU TBB block single thread accelerator type trait specialization.
118 template<typename TDim, typename TIdx>
119 struct IsSingleThreadAcc<AccCpuTbbBlocks<TDim, TIdx>> : std::true_type
120 {
121 };
122
123 //! The CPU TBB block multi thread accelerator type trait specialization.
124 template<typename TDim, typename TIdx>
125 struct IsMultiThreadAcc<AccCpuTbbBlocks<TDim, TIdx>> : std::false_type
126 {
127 };
128
129 //! The CPU TBB block accelerator device properties get trait specialization.
130 template<typename TDim, typename TIdx>
131 struct GetAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>
132 {
133 ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> AccDevProps<TDim, TIdx>
134 {
135 return {// m_multiProcessorCount
136 alpaka::core::clipCast<TIdx>(tbb::this_task_arena::max_concurrency()),
137 // m_gridBlockExtentMax
138 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
139 // m_gridBlockCountMax
140 std::numeric_limits<TIdx>::max(),
141 // m_blockThreadExtentMax
143 // m_blockThreadCountMax
144 static_cast<TIdx>(1),
145 // m_threadElemExtentMax
146 Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),
147 // m_threadElemCountMax
148 std::numeric_limits<TIdx>::max(),
149 // m_sharedMemSizeBytes
150 static_cast<size_t>(AccCpuTbbBlocks<TDim, TIdx>::staticAllocBytes()),
151 // m_globalMemSizeBytes
152 getMemBytes(dev)};
153 }
154 };
155
156 //! The CPU TBB block accelerator name trait specialization.
157 template<typename TDim, typename TIdx>
158 struct GetAccName<AccCpuTbbBlocks<TDim, TIdx>>
159 {
160 ALPAKA_FN_HOST static auto getAccName() -> std::string
161 {
162# if ALPAKA_COMP_CLANG
163# pragma clang diagnostic push
164# pragma clang diagnostic ignored "-Wexit-time-destructors"
165# endif
166 using namespace std::literals;
167 static std::string const accName =
168# ifdef __cpp_lib_format
169 std::format("AccCpuTbbBlocks<{},{}>", TDim::value, core::demangled<TIdx>);
170# else
171 "AccCpuTbbBlocks<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>)
172 + ">"s;
173# endif
174 return accName;
175# if ALPAKA_COMP_CLANG
176# pragma clang diagnostic pop
177# endif
178 }
179 };
180
181 //! The CPU TBB block accelerator device type trait specialization.
182 template<typename TDim, typename TIdx>
183 struct DevType<AccCpuTbbBlocks<TDim, TIdx>>
184 {
185 using type = DevCpu;
186 };
187
188 //! The CPU TBB block accelerator dimension getter trait specialization.
189 template<typename TDim, typename TIdx>
190 struct DimType<AccCpuTbbBlocks<TDim, TIdx>>
191 {
192 using type = TDim;
193 };
194
195 //! The CPU TBB block accelerator execution task type trait specialization.
196 template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>
197 struct CreateTaskKernel<AccCpuTbbBlocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>
198 {
200 TWorkDiv const& workDiv,
201 TKernelFnObj const& kernelFnObj,
202 TArgs&&... args)
203 {
204 if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))
205 {
206 throw std::runtime_error(
207 "The given work division is not valid for a single thread Acc: "
208 + getAccName<AccCpuTbbBlocks<TDim, TIdx>>() + ". Threads per block should be 1!");
209 }
210
211 return TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>(
212 workDiv,
213 kernelFnObj,
214 std::forward<TArgs>(args)...);
215 }
216 };
217
218 //! The CPU TBB block execution task platform type trait specialization.
219 template<typename TDim, typename TIdx>
220 struct PlatformType<AccCpuTbbBlocks<TDim, TIdx>>
221 {
222 using type = PlatformCpu;
223 };
224
225 //! The CPU TBB block accelerator idx type trait specialization.
226 template<typename TDim, typename TIdx>
227 struct IdxType<AccCpuTbbBlocks<TDim, TIdx>>
228 {
229 using type = TIdx;
230 };
231
232 template<typename TDim, typename TIdx>
233 struct AccToTag<alpaka::AccCpuTbbBlocks<TDim, TIdx>>
234 {
235 using type = alpaka::TagCpuTbbBlocks;
236 };
237
238 template<typename TDim, typename TIdx>
239 struct TagToAcc<alpaka::TagCpuTbbBlocks, TDim, TIdx>
240 {
241 using type = alpaka::AccCpuTbbBlocks<TDim, TIdx>;
242 };
243 } // namespace trait
244} // namespace alpaka
245
246#endif
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89
#define ALPAKA_FN_HOST
Definition Common.hpp:40
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16
TinyMersenneTwister RandStdLib
The alpaka accelerator library.
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:100
alpaka::meta::InheritFromList< alpaka::meta::Unique< std::tuple< TGridAtomic, TBlockAtomic, TThreadAtomic, interface::Implements< ConceptAtomicGrids, TGridAtomic >, interface::Implements< ConceptAtomicBlocks, TBlockAtomic >, interface::Implements< ConceptAtomicThreads, TThreadAtomic > > > > AtomicHierarchy
build a single class to inherit from different atomic implementations
ALPAKA_FN_HOST_ACC Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex > >
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69