29 #include <type_traits>
31 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
35 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
40 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
50 template<
typename TWorkDiv>
53 , m_kernelFnObj(kernelFnObj)
54 , m_args(std::forward<TArgs>(
58 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
59 "The work division and the execution task have to be of the same dimensionality!");
67 std::apply([&](
auto const&... args) { runWithArgs(args...); }, m_args);
71 ALPAKA_FN_HOST auto runWithArgs(std::decay_t<TArgs>
const&... args)
const ->
void
73 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
74 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
75 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
78 auto const smBytes = getBlockSharedMemDynSizeBytes<AccCpuThreads<TDim, TIdx>>(
83 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
84 std::cout << __func__ <<
" smBytes: " << smBytes <<
" B" << std::endl;
88 auto const threadsPerBlock = blockThreadExtent.prod();
89 ThreadPool threadPool(
static_cast<std::size_t
>(threadsPerBlock));
95 { runBlock(acc, gridBlockIdx, blockThreadExtent, threadPool, m_kernelFnObj, args...); });
100 AccCpuThreads<TDim, TIdx>& acc,
101 Vec<TDim, TIdx>
const& gridBlockIdx,
102 Vec<TDim, TIdx>
const& blockThreadExtent,
103 ThreadPool& threadPool,
104 TKernelFnObj
const& kernelFnObj,
105 std::decay_t<TArgs>
const&... args) ->
void
107 std::vector<std::future<void>> futuresInBlock;
108 acc.m_gridBlockIdx = gridBlockIdx;
113 [&](Vec<TDim, TIdx>
const& blockThreadIdx)
116 futuresInBlock.emplace_back(threadPool.enqueueTask(
117 [&, blockThreadIdx] { runThread(acc, blockThreadIdx, kernelFnObj, args...); }));
121 for(
auto& t : futuresInBlock)
125 futuresInBlock.clear();
126 acc.m_threadToIndexMap.clear();
132 AccCpuThreads<TDim, TIdx>& acc,
133 Vec<TDim, TIdx>
const& blockThreadIdx,
134 TKernelFnObj
const& kernelFnObj,
135 std::decay_t<TArgs>
const&... args) ->
void
139 auto const threadId = std::this_thread::get_id();
141 if(blockThreadIdx.sum() == 0)
143 acc.m_idMasterThread = threadId;
148 std::lock_guard<std::mutex> lock(acc.m_mtxMapInsert);
149 acc.m_threadToIndexMap.emplace(threadId, blockThreadIdx);
156 kernelFnObj(std::as_const(acc), args...);
163 TKernelFnObj m_kernelFnObj;
164 std::tuple<std::decay_t<TArgs>...> m_args;
170 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
177 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
184 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
191 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
198 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU threads accelerator.
The CPU threads execution task.
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
ALPAKA_FN_HOST TaskKernelCpuThreads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
A thread pool yielding when there is not enough work to be done.
The accelerator type trait.
The dimension getter type trait.