alpaka
Abstraction Library for Parallel Kernel Acceleration
TaskKernelCpuTbbBlocks.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Erik Zenker, RenĂ© Widera, Felice Pantaleo, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Specialized traits.
8 #include "alpaka/acc/Traits.hpp"
9 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/dim/Traits.hpp"
11 #include "alpaka/idx/Traits.hpp"
13 
14 // Implementation details.
16 #include "alpaka/core/Decay.hpp"
17 #include "alpaka/dev/DevCpu.hpp"
18 #include "alpaka/idx/MapIdx.hpp"
20 #include "alpaka/kernel/Traits.hpp"
21 #include "alpaka/meta/NdLoop.hpp"
24 
25 #include <functional>
26 #include <stdexcept>
27 #include <tuple>
28 #include <type_traits>
29 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
30 # include <iostream>
31 #endif
32 
33 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
34 
35 # include <tbb/blocked_range.h>
36 # include <tbb/parallel_for.h>
37 # include <tbb/task_group.h>
38 
39 namespace alpaka
40 {
41  //! The CPU TBB block accelerator execution task.
42  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
43  class TaskKernelCpuTbbBlocks final : public WorkDivMembers<TDim, TIdx>
44  {
45  public:
46  template<typename TWorkDiv>
47  ALPAKA_FN_HOST TaskKernelCpuTbbBlocks(TWorkDiv&& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
48  : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
49  , m_kernelFnObj(kernelFnObj)
50  , m_args(std::forward<TArgs>(args)...)
51  {
52  static_assert(
53  Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
54  "The work division and the execution task have to be of the same dimensionality!");
55  }
56 
57  //! Executes the kernel function object.
58  ALPAKA_FN_HOST auto operator()() const -> void
59  {
61 
62  auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
63  auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
64  auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
65 
66  // Get the size of the block shared dynamic memory.
67  auto const blockSharedMemDynSizeBytes = std::apply(
68  [&](std::decay_t<TArgs> const&... args)
69  {
70  return getBlockSharedMemDynSizeBytes<AccCpuTbbBlocks<TDim, TIdx>>(
71  m_kernelFnObj,
72  blockThreadExtent,
73  threadElemExtent,
74  args...);
75  },
76  m_args);
77 
78 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
79  std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
80  << std::endl;
81 # endif
82 
83  // The number of blocks in the grid.
84  TIdx const numBlocksInGrid = gridBlockExtent.prod();
85 
86  tbb::this_task_arena::isolate(
87  [&]
88  {
89  tbb::parallel_for(
90  static_cast<TIdx>(0),
91  static_cast<TIdx>(numBlocksInGrid),
92  [&](TIdx i)
93  {
94  AccCpuTbbBlocks<TDim, TIdx> acc(
95  *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
96  blockSharedMemDynSizeBytes);
97 
98  acc.m_gridBlockIdx
99  = mapIdx<TDim::value>(Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(i)), gridBlockExtent);
100 
101  std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
102 
103  freeSharedVars(acc);
104  });
105  });
106  }
107 
108  private:
109  TKernelFnObj m_kernelFnObj;
110  std::tuple<std::decay_t<TArgs>...> m_args;
111  };
112 
113  namespace trait
114  {
115  //! The CPU TBB block execution task accelerator type trait specialization.
116  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
117  struct AccType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
118  {
119  using type = AccCpuTbbBlocks<TDim, TIdx>;
120  };
121 
122  //! The CPU TBB block execution task device type trait specialization.
123  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
124  struct DevType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
125  {
126  using type = DevCpu;
127  };
128 
129  //! The CPU TBB block execution task dimension getter trait specialization.
130  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
131  struct DimType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
132  {
133  using type = TDim;
134  };
135 
136  //! The CPU TBB block execution task platform type trait specialization.
137  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
138  struct PlatformType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
139  {
140  using type = PlatformCpu;
141  };
142 
143  //! The CPU TBB block execution task idx type trait specialization.
144  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
145  struct IdxType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
146  {
147  using type = TIdx;
148  };
149 
150  //! \brief Specialisation of the class template FunctionAttributes
151  //! \tparam TDev The device type.
152  //! \tparam TDim The dimensionality of the accelerator device properties.
153  //! \tparam TIdx The idx type of the accelerator device properties.
154  //! \tparam TKernelFn Kernel function object type.
155  //! \tparam TArgs Kernel function object argument types as a parameter pack.
156  template<typename TDev, typename TDim, typename TIdx, typename TKernelFn, typename... TArgs>
157  struct FunctionAttributes<AccCpuTbbBlocks<TDim, TIdx>, TDev, TKernelFn, TArgs...>
158  {
159  //! \param dev The device instance
160  //! \param kernelFn The kernel function object which should be executed.
161  //! \param args The kernel invocation arguments.
162  //! \return KernelFunctionAttributes instance. The default version always returns an instance with zero
163  //! fields. For CPU, the field of max threads allowed by kernel function for the block is 1.
165  TDev const& dev,
166  [[maybe_unused]] TKernelFn const& kernelFn,
167  [[maybe_unused]] TArgs&&... args) -> alpaka::KernelFunctionAttributes
168  {
169  alpaka::KernelFunctionAttributes kernelFunctionAttributes;
170 
171  // set function properties for maxThreadsPerBlock to device properties, since API doesn't have function
172  // properties function.
173  auto const& props = alpaka::getAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>(dev);
174  kernelFunctionAttributes.maxThreadsPerBlock = static_cast<int>(props.m_blockThreadCountMax);
175  kernelFunctionAttributes.maxDynamicSharedSizeBytes
176  = static_cast<int>(alpaka::BlockSharedDynMemberAllocKiB * 1024);
177  return kernelFunctionAttributes;
178  }
179  };
180  } // namespace trait
181 } // namespace alpaka
182 
183 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
ALPAKA_FN_HOST_ACC Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition: Traits.hpp:54
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
static ALPAKA_FN_HOST auto getFunctionAttributes([[maybe_unused]] TDev const &dev, [[maybe_unused]] TKernelFnObj const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
Definition: Traits.hpp:85