31 #include <type_traits>
33 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
37 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
42 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
52 template<
typename TWorkDiv>
55 , m_kernelFnObj(kernelFnObj)
56 , m_args(std::forward<TArgs>(
60 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
61 "The work division and the execution task have to be of the same dimensionality!");
69 std::apply([&](
auto const&... args) { runWithArgs(args...); }, m_args);
73 ALPAKA_FN_HOST auto runWithArgs(std::decay_t<TArgs>
const&... args)
const ->
void
75 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
76 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
77 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
80 auto const smBytes = getBlockSharedMemDynSizeBytes<AccCpuThreads<TDim, TIdx>>(
85 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
86 std::cout << __func__ <<
" smBytes: " << smBytes <<
" B" << std::endl;
90 auto const threadsPerBlock = blockThreadExtent.prod();
91 ThreadPool threadPool(
static_cast<std::size_t
>(threadsPerBlock));
97 { runBlock(acc, gridBlockIdx, blockThreadExtent, threadPool, m_kernelFnObj, args...); });
102 AccCpuThreads<TDim, TIdx>& acc,
103 Vec<TDim, TIdx>
const& gridBlockIdx,
104 Vec<TDim, TIdx>
const& blockThreadExtent,
105 ThreadPool& threadPool,
106 TKernelFnObj
const& kernelFnObj,
107 std::decay_t<TArgs>
const&... args) ->
void
109 std::vector<std::future<void>> futuresInBlock;
110 acc.m_gridBlockIdx = gridBlockIdx;
115 [&](Vec<TDim, TIdx>
const& blockThreadIdx)
118 futuresInBlock.emplace_back(threadPool.enqueueTask(
119 [&, blockThreadIdx] { runThread(acc, blockThreadIdx, kernelFnObj, args...); }));
123 for(
auto& t : futuresInBlock)
127 futuresInBlock.clear();
128 acc.m_threadToIndexMap.clear();
134 AccCpuThreads<TDim, TIdx>& acc,
135 Vec<TDim, TIdx>
const& blockThreadIdx,
136 TKernelFnObj
const& kernelFnObj,
137 std::decay_t<TArgs>
const&... args) ->
void
141 auto const threadId = std::this_thread::get_id();
143 if(blockThreadIdx.sum() == 0)
145 acc.m_idMasterThread = threadId;
150 std::lock_guard<std::mutex> lock(acc.m_mtxMapInsert);
151 acc.m_threadToIndexMap.emplace(threadId, blockThreadIdx);
158 kernelFnObj(std::as_const(acc), args...);
165 TKernelFnObj m_kernelFnObj;
166 std::tuple<std::decay_t<TArgs>...> m_args;
172 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
179 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
186 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
193 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
200 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
212 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
222 [[maybe_unused]] TKernelFn
const& kernelFn,
229 auto const& props = alpaka::getAccDevProps<AccCpuThreads<TDim, TIdx>>(dev);
230 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
233 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU threads accelerator.
The CPU threads execution task.
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
ALPAKA_FN_HOST TaskKernelCpuThreads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
A thread pool yielding when there is not enough work to be done.
The accelerator type trait.
The dimension getter type trait.
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, [[maybe_unused]] TKernelFn const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
The structure template to access to the functions attributes of a kernel function object.