28#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
32#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
37 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
41 template<
typename TWorkDiv>
44 , m_kernelFnObj(
std::move(kernelFnObj))
45 , m_args(
std::forward<TArgs>(args)...)
49 "The work division and the execution task have to be of the same dimensionality!");
57 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
58 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
59 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
62 auto const blockSharedMemDynSizeBytes = std::apply(
63 [&](std::decay_t<TArgs>
const&... args)
65 return getBlockSharedMemDynSizeBytes<AccCpuSerial<TDim, TIdx>>(
73# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
74 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
80 blockSharedMemDynSizeBytes);
87 acc.m_gridBlockIdx = blockThreadIdx;
89 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
97 TKernelFnObj m_kernelFnObj;
98 std::tuple<std::decay_t<TArgs>...> m_args;
104 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
105 struct AccType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
107 using type = AccCpuSerial<TDim, TIdx>;
111 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
112 struct DevType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
118 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
119 struct DimType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
125 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
126 struct PlatformType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
128 using type = PlatformCpu;
132 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
133 struct IdxType<TaskKernelCpuSerial<TDim, TIdx, TKernelFnObj, TArgs...>>
144 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
145 struct FunctionAttributes<AccCpuSerial<TDim, TIdx>, TDev, TKernelFn, TArgs...>
154 [[maybe_unused]] TKernelFn
const& kernelFn,
161 auto const& props = alpaka::getAccDevProps<AccCpuSerial<TDim, TIdx>>(dev);
162 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
165 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU serial accelerator.
The CPU serial execution task implementation.
ALPAKA_FN_HOST TaskKernelCpuSerial(TWorkDiv &&workDiv, TKernelFnObj kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes