28#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
32#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
35# error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
43 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
47 template<
typename TWorkDiv>
50 , m_kernelFnObj(kernelFnObj)
51 , m_args(
std::forward<TArgs>(args)...)
55 "The work division and the execution task have to be of the same dimensionality!");
63 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
64 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
65 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
68 auto const blockSharedMemDynSizeBytes = std::apply(
69 [&](std::decay_t<TArgs>
const&... args)
71 return getBlockSharedMemDynSizeBytes<AccCpuOmp2Threads<TDim, TIdx>>(
79# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
80 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
86 blockSharedMemDynSizeBytes);
89 TIdx
const blockThreadCount(blockThreadExtent.prod());
90 [[maybe_unused]]
int const iBlockThreadCount(
static_cast<int>(blockThreadCount));
92 if(::omp_in_parallel() != 0)
94 throw std::runtime_error(
95 "The OpenMP 2.0 thread backend can not be used within an existing parallel region!");
99 int const ompIsDynamic(::omp_get_dynamic());
100 ::omp_set_dynamic(0);
107 acc.m_gridBlockIdx = gridBlockIdx;
115# pragma omp parallel num_threads(iBlockThreadCount)
117# pragma omp single nowait
122 if((iBlockThreadCount > 1) && (::omp_in_parallel() == 0))
124 throw std::runtime_error(
"The OpenMP 2.0 runtime did not create a parallel region!");
127 int const numThreads = ::omp_get_num_threads();
128 if(numThreads != iBlockThreadCount)
130 throw std::runtime_error(
"The OpenMP 2.0 runtime did not use the number of threads "
131 "that had been required!");
135 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
147 ::omp_set_dynamic(ompIsDynamic);
151 TKernelFnObj m_kernelFnObj;
152 std::tuple<std::decay_t<TArgs>...> m_args;
158 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
159 struct AccType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
161 using type = AccCpuOmp2Threads<TDim, TIdx>;
165 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
166 struct DevType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
172 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
173 struct DimType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
179 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
180 struct PlatformType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
182 using type = PlatformCpu;
186 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
187 struct IdxType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
198 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
199 struct FunctionAttributes<AccCpuOmp2Threads<TDim, TIdx>, TDev, TKernelFn, TArgs...>
208 [[maybe_unused]] TKernelFn
const& kernelFn,
215 auto const& props = alpaka::getAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>(dev);
216 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
219 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU OpenMP 2.0 thread accelerator.
The CPU OpenMP 2.0 thread accelerator execution task.
ALPAKA_FN_HOST TaskKernelCpuOmp2Threads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes