28 #include <type_traits>
29 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
33 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
35 # include <tbb/blocked_range.h>
36 # include <tbb/parallel_for.h>
37 # include <tbb/task_group.h>
42 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
43 class TaskKernelCpuTbbBlocks final :
public WorkDivMembers<TDim, TIdx>
46 template<
typename TWorkDiv>
47 ALPAKA_FN_HOST TaskKernelCpuTbbBlocks(TWorkDiv&& workDiv, TKernelFnObj
const& kernelFnObj, TArgs&&... args)
49 , m_kernelFnObj(kernelFnObj)
50 , m_args(std::forward<TArgs>(args)...)
53 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
54 "The work division and the execution task have to be of the same dimensionality!");
62 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
63 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
64 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
67 auto const blockSharedMemDynSizeBytes = std::apply(
68 [&](std::decay_t<TArgs>
const&... args)
70 return getBlockSharedMemDynSizeBytes<AccCpuTbbBlocks<TDim, TIdx>>(
78 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
79 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
84 TIdx
const numBlocksInGrid = gridBlockExtent.prod();
86 tbb::this_task_arena::isolate(
91 static_cast<TIdx
>(numBlocksInGrid),
94 AccCpuTbbBlocks<TDim, TIdx> acc(
95 *
static_cast<WorkDivMembers<TDim, TIdx> const*
>(
this),
96 blockSharedMemDynSizeBytes);
99 = mapIdx<TDim::value>(
Vec<DimInt<1u>, TIdx>(
static_cast<TIdx
>(i)), gridBlockExtent);
101 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
109 TKernelFnObj m_kernelFnObj;
110 std::tuple<std::decay_t<TArgs>...> m_args;
116 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
117 struct AccType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
119 using type = AccCpuTbbBlocks<TDim, TIdx>;
123 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
124 struct DevType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
130 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
131 struct DimType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
137 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
138 struct PlatformType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
140 using type = PlatformCpu;
144 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
145 struct IdxType<TaskKernelCpuTbbBlocks<TDim, TIdx, TKernelFnObj, TArgs...>>
156 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
157 struct FunctionAttributes<AccCpuTbbBlocks<TDim, TIdx>, TDev, TKernelFn, TArgs...>
166 [[maybe_unused]] TKernelFn
const& kernelFn,
173 auto const& props = alpaka::getAccDevProps<AccCpuTbbBlocks<TDim, TIdx>>(dev);
174 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
177 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC WorkDivMembers(alpaka::Vec< TDim, TIdx > const &gridBlockExtent, alpaka::Vec< TDim, TIdx > const &blockThreadExtent, alpaka::Vec< TDim, TIdx > const &elemExtent) -> WorkDivMembers< TDim, TIdx >
Deduction guide for the constructor which can be called without explicit template type parameters.
ALPAKA_FN_HOST_ACC Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
static ALPAKA_FN_HOST auto getFunctionAttributes([[maybe_unused]] TDev const &dev, [[maybe_unused]] TKernelFnObj const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes