26 #include <type_traits>
27 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
31 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
33 # include <tbb/blocked_range.h>
34 # include <tbb/parallel_for.h>
35 # include <tbb/task_group.h>
40 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
41 class TaskKernelCpuTbbBlocks final :
public WorkDivMembers<TDim, TIdx>
44 template<
typename TWorkDiv>
45 ALPAKA_FN_HOST TaskKernelCpuTbbBlocks(TWorkDiv&& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
47 , m_kernelFnObj(kernelFnObj)
48 , m_args(std::forward<TArgs>(args)...)
51 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
52 "The work division and the execution task have to be of the same dimensionality!");
60 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
61 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
62 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
65 auto const blockSharedMemDynSizeBytes = std::apply(
66 [&](std::decay_t<TArgs>
const&... args)
68 return getBlockSharedMemDynSizeBytes<AccCpuTbbBlocks<TDim, TIdx>>(
76 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
77 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
82 TIdx
const numBlocksInGrid = gridBlockExtent.prod();
84 if(blockThreadExtent.prod() !=
static_cast<TIdx
>(1u))
86 throw std::runtime_error(
"A block for the TBB accelerator can only ever have one single thread!");
89 tbb::this_task_arena::isolate(
94 static_cast<TIdx
>(numBlocksInGrid),
97 AccCpuTbbBlocks<TDim, TIdx> acc(
98 *
static_cast<WorkDivMembers<TDim, TIdx> const*
>(
this),
99 blockSharedMemDynSizeBytes);
102 = mapIdx<TDim::value>(
Vec<DimInt<1u>, TIdx>(
static_cast<TIdx
>(i)), gridBlockExtent);
104 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
112 TKernelFnObj m_kernelFnObj;
113 std::tuple<std::decay_t<TArgs>...> m_args;
119 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
120 struct AccType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
122 using type = AccCpuTbbBlocks<TDim, TIdx>;
126 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
127 struct DevType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
133 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
134 struct DimType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
140 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
141 struct PlatformType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
143 using type = PlatformCpu;
147 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
148 struct IdxType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.