alpaka
Abstraction Library for Parallel Kernel Acceleration
TaskKernelCpuTbbBlocks.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Erik Zenker, RenĂ© Widera, Felice Pantaleo, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Specialized traits.
8 #include "alpaka/acc/Traits.hpp"
9 #include "alpaka/dev/Traits.hpp"
10 #include "alpaka/dim/Traits.hpp"
11 #include "alpaka/idx/Traits.hpp"
13 
14 // Implementation details.
16 #include "alpaka/core/Decay.hpp"
17 #include "alpaka/dev/DevCpu.hpp"
18 #include "alpaka/idx/MapIdx.hpp"
19 #include "alpaka/kernel/Traits.hpp"
20 #include "alpaka/meta/NdLoop.hpp"
22 
23 #include <functional>
24 #include <stdexcept>
25 #include <tuple>
26 #include <type_traits>
27 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
28 # include <iostream>
29 #endif
30 
31 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
32 
33 # include <tbb/blocked_range.h>
34 # include <tbb/parallel_for.h>
35 # include <tbb/task_group.h>
36 
37 namespace alpaka
38 {
39  //! The CPU TBB block accelerator execution task.
40  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
41  class TaskKernelCpuTbbBlocks final : public WorkDivMembers<TDim, TIdx>
42  {
43  public:
44  template<typename TWorkDiv>
45  ALPAKA_FN_HOST TaskKernelCpuTbbBlocks(TWorkDiv&& workDiv, TKernelFnObj const& kernelFnObj, TArgs&&... args)
46  : WorkDivMembers<TDim, TIdx>(std::forward<TWorkDiv>(workDiv))
47  , m_kernelFnObj(kernelFnObj)
48  , m_args(std::forward<TArgs>(args)...)
49  {
50  static_assert(
51  Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
52  "The work division and the execution task have to be of the same dimensionality!");
53  }
54 
55  //! Executes the kernel function object.
56  ALPAKA_FN_HOST auto operator()() const -> void
57  {
59 
60  auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*this);
61  auto const blockThreadExtent = getWorkDiv<Block, Threads>(*this);
62  auto const threadElemExtent = getWorkDiv<Thread, Elems>(*this);
63 
64  // Get the size of the block shared dynamic memory.
65  auto const blockSharedMemDynSizeBytes = std::apply(
66  [&](std::decay_t<TArgs> const&... args)
67  {
68  return getBlockSharedMemDynSizeBytes<AccCpuTbbBlocks<TDim, TIdx>>(
69  m_kernelFnObj,
70  blockThreadExtent,
71  threadElemExtent,
72  args...);
73  },
74  m_args);
75 
76 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
77  std::cout << __func__ << " blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes << " B"
78  << std::endl;
79 # endif
80 
81  // The number of blocks in the grid.
82  TIdx const numBlocksInGrid = gridBlockExtent.prod();
83 
84  if(blockThreadExtent.prod() != static_cast<TIdx>(1u))
85  {
86  throw std::runtime_error("A block for the TBB accelerator can only ever have one single thread!");
87  }
88 
89  tbb::this_task_arena::isolate(
90  [&]
91  {
92  tbb::parallel_for(
93  static_cast<TIdx>(0),
94  static_cast<TIdx>(numBlocksInGrid),
95  [&](TIdx i)
96  {
97  AccCpuTbbBlocks<TDim, TIdx> acc(
98  *static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
99  blockSharedMemDynSizeBytes);
100 
101  acc.m_gridBlockIdx
102  = mapIdx<TDim::value>(Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(i)), gridBlockExtent);
103 
104  std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
105 
106  freeSharedVars(acc);
107  });
108  });
109  }
110 
111  private:
112  TKernelFnObj m_kernelFnObj;
113  std::tuple<std::decay_t<TArgs>...> m_args;
114  };
115 
116  namespace trait
117  {
118  //! The CPU TBB block execution task accelerator type trait specialization.
119  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
120  struct AccType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
121  {
122  using type = AccCpuTbbBlocks<TDim, TIdx>;
123  };
124 
125  //! The CPU TBB block execution task device type trait specialization.
126  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
127  struct DevType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
128  {
129  using type = DevCpu;
130  };
131 
132  //! The CPU TBB block execution task dimension getter trait specialization.
133  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
134  struct DimType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
135  {
136  using type = TDim;
137  };
138 
139  //! The CPU TBB block execution task platform type trait specialization.
140  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
141  struct PlatformType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
142  {
143  using type = PlatformCpu;
144  };
145 
146  //! The CPU TBB block execution task idx type trait specialization.
147  template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>
148  struct IdxType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
149  {
150  using type = TIdx;
151  };
152  } // namespace trait
153 } // namespace alpaka
154 
155 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Definition: Traits.hpp:54