28#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
32#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
35# error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
43 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
47 template<
typename TWorkDiv>
50 , m_kernelFnObj(kernelFnObj)
51 , m_args(
std::forward<TArgs>(args)...)
55 "The work division and the execution task have to be of the same dimensionality!");
63 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
64 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
65 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
68 auto const blockSharedMemDynSizeBytes = std::apply(
69 [&](std::decay_t<TArgs>
const&... args)
71 return getBlockSharedMemDynSizeBytes<AccCpuOmp2Threads<TDim, TIdx>>(
79# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
80 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
86 blockSharedMemDynSizeBytes);
89 TIdx
const blockThreadCount(blockThreadExtent.prod());
90 [[maybe_unused]]
int const iBlockThreadCount(
static_cast<int>(blockThreadCount));
92 if(::omp_in_parallel() != 0)
94 throw std::runtime_error(
95 "The OpenMP 2.0 thread backend can not be used within an existing parallel region!");
99 int const ompIsDynamic(::omp_get_dynamic());
100 ::omp_set_dynamic(0);
107 acc.m_gridBlockIdx = gridBlockIdx;
115# pragma omp parallel num_threads(iBlockThreadCount)
118 if constexpr((!BOOST_COMP_GNUC) || (BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(8, 1, 0)))
120# pragma omp single nowait
125 if((iBlockThreadCount > 1) && (::omp_in_parallel() == 0))
127 throw std::runtime_error(
128 "The OpenMP 2.0 runtime did not create a parallel region!");
131 int const numThreads = ::omp_get_num_threads();
132 if(numThreads != iBlockThreadCount)
134 throw std::runtime_error(
135 "The OpenMP 2.0 runtime did not use the number of threads "
136 "that had been required!");
141 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
153 ::omp_set_dynamic(ompIsDynamic);
157 TKernelFnObj m_kernelFnObj;
158 std::tuple<std::decay_t<TArgs>...> m_args;
164 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
165 struct AccType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
167 using type = AccCpuOmp2Threads<TDim, TIdx>;
171 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
172 struct DevType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
178 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
179 struct DimType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
185 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
186 struct PlatformType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
188 using type = PlatformCpu;
192 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
193 struct IdxType<TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>>
204 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
205 struct FunctionAttributes<AccCpuOmp2Threads<TDim, TIdx>, TDev, TKernelFn, TArgs...>
214 [[maybe_unused]] TKernelFn
const& kernelFn,
221 auto const& props = alpaka::getAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>(dev);
222 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
225 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU OpenMP 2.0 thread accelerator.
The CPU OpenMP 2.0 thread accelerator execution task.
ALPAKA_FN_HOST TaskKernelCpuOmp2Threads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, TKernelFnObj const &kernelFn, TArgs &&... args) -> alpaka::KernelFunctionAttributes