alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
TaskKernelCpuTbbBlocks.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, Erik Zenker, René Widera, Felice Pantaleo, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7// Specialized traits.
10#include "alpaka/dim/Traits.hpp"
11#include "alpaka/idx/Traits.hpp"
13
14// Implementation details.
16#include "alpaka/core/Decay.hpp"
17#include "alpaka/dev/DevCpu.hpp"
18#include "alpaka/idx/MapIdx.hpp"
24
25#include <functional>
26#include <stdexcept>
27#include <tuple>
28#include <type_traits>
29#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
30# include <iostream>
31#endif
32
33#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
34
35# include <tbb/blocked_range.h>
36# include <tbb/parallel_for.h>
37# include <tbb/task_group.h>
38
39namespace alpaka
40{
41 //! The CPU TBB block accelerator execution task.
42 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
43 class TaskKernelCpuTbbBlocks final : public WorkDivMembers<TDim, TIdx>
44 {
45 public:
46 template<typename TWorkDiv>
47 ALPAKA_FN_HOST TaskKernelCpuTbbBlocks(TWorkDiv&& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
48 : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
49 , m_kernelFnObj(kernelFnObj)
50 , m_args(std::forward<TArgs>(args)...)
51 {
52 static_assert(
53 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
54 "The work division and the execution task have to be of the same dimensionality!");
55 }
56
57 //! Executes the kernel function object.
58 ALPAKA_FN_HOST auto operator()() const -> void
59 {
61
62 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
63 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
64 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
65
66 // Get the size of the block shared dynamic memory.
67 auto const blockSharedMemDynSizeBytes = std::apply(
68 [&](std::decay_t<TArgs> const&... args)
69 {
70 return getBlockSharedMemDynSizeBytes<AccCpuTbbBlocks<TDim, TIdx>>(
71 m_kernelFnObj,
72 blockThreadExtent,
73 threadElemExtent,
74 args...);
75 },
76 m_args);
77
78# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
79 std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
80 << std::endl;
81# endif
82
83 // The number of blocks in the grid.
84 TIdx const numBlocksInGrid = gridBlockExtent.prod();
85
86 tbb::this_task_arena::isolate(
87 [&]
88 {
89 tbb::parallel_for(
90 static_cast<TIdx>(0),
91 static_cast<TIdx>(numBlocksInGrid),
92 [&](TIdx i)
93 {
94 AccCpuTbbBlocks<TDim, TIdx> acc(
95 *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
96 blockSharedMemDynSizeBytes);
97
98 acc.m_gridBlockIdx
99 = mapIdx<TDim::value>(Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(i)), gridBlockExtent);
100
101 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
102
103 freeSharedVars(acc);
104 });
105 });
106 }
107
108 private:
109 TKernelFnObj m_kernelFnObj;
110 std::tuple<std::decay_t<TArgs>...> m_args;
111 };
112
113 namespace trait
114 {
115 //! The CPU TBB block execution task accelerator type trait specialization.
116 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
117 struct AccType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
118 {
119 using type = AccCpuTbbBlocks<TDim, TIdx>;
120 };
121
122 //! The CPU TBB block execution task device type trait specialization.
123 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
124 struct DevType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
125 {
126 using type = DevCpu;
127 };
128
129 //! The CPU TBB block execution task dimension getter trait specialization.
130 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
131 struct DimType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
132 {
133 using type = TDim;
134 };
135
136 //! The CPU TBB block execution task platform type trait specialization.
137 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
138 struct PlatformType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
139 {
140 using type = PlatformCpu;
141 };
142
143 //! The CPU TBB block execution task idx type trait specialization.
144 template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
145 struct IdxType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
146 {
147 using type = TIdx;
148 };
149
150 //! \brief Specialisation of the class template FunctionAttributes
151 //! \tparam TDev The device type.
152 //! \tparam TDim The dimensionality of the accelerator device properties.
153 //! \tparam TIdx The idx type of the accelerator device properties.
154 //! \tparam TKernelFn Kernel function object type.
155 //! \tparam TArgs Kernel function object argument types as a parameter pack.
156 template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
157 struct FunctionAttributes<AccCpuTbbBlocks<TDim, TIdx>, TDev, TKernelFn, TArgs...>
158 {
159 //! \param dev The device instance
160 //! \param kernelFn The kernel function object which should be executed.
161 //! \param args The kernel invocation arguments.
162 //! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
163 //! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
165 TDev const& dev,
166 [[maybe_unused]] TKernelFn const& kernelFn,
167 [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
168 {
169 alpaka::KernelFunctionAttributes kernelFunctionAttributes;
170
171 // set function properties for maxThreadsPerBlock to device properties, since API doesn't have function
172 // properties function.
173 auto const& props = alpaka::getAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>(dev);
174 kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
175 kernelFunctionAttributes.maxDynamicSharedSizeBytes
176 = static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024);
177 return kernelFunctionAttributes;
178 }
179 };
180 } // namespace trait
181} // namespace alpaka
182
183#endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition Traits.hpp:54
STL namespace.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition Traits.hpp:85