26 #include <type_traits>
28 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
32 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
37 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
41 template<
typename TWorkDiv>
44 , m_kernelFnObj(std::move(kernelFnObj))
45 , m_args(std::forward<TArgs>(args)...)
48 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
49 "The work division and the execution task have to be of the same dimensionality!");
57 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
58 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
59 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
62 auto const blockSharedMemDynSizeBytes = std::apply(
63 [&](std::decay_t<TArgs>
const&... args)
65 return getBlockSharedMemDynSizeBytes<AccCpuSerial<TDim, TIdx>>(
73 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
74 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
80 blockSharedMemDynSizeBytes);
87 acc.m_gridBlockIdx = blockThreadIdx;
89 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
97 TKernelFnObj m_kernelFnObj;
98 std::tuple<std::decay_t<TArgs>...> m_args;
104 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
111 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
118 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
125 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
132 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
144 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
154 [[maybe_unused]] TKernelFn
const& kernelFn,
161 auto const& props = alpaka::getAccDevProps<AccCpuSerial<TDim, TIdx>>(dev);
162 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
165 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU serial accelerator.
The CPU serial execution task implementation.
ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv &&workDiv, TKernelFnObj kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
The accelerator type trait.
The dimension getter type trait.
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, [[maybe_unused]] TKernelFn const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
The structure template to access to the functions attributes of a kernel function object.