25 #include <type_traits>
26 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
30 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
33 # error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
41 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
45 template<
typename TWorkDiv>
48 , m_kernelFnObj(kernelFnObj)
49 , m_args(std::forward<TArgs>(args)...)
52 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
53 "The work division and the execution task have to be of the same dimensionality!");
61 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
62 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
63 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
66 auto const blockSharedMemDynSizeBytes = std::apply(
67 [&](std::decay_t<TArgs>
const&... args)
69 return getBlockSharedMemDynSizeBytes<AccCpuOmp2Threads<TDim, TIdx>>(
77 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
78 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
84 blockSharedMemDynSizeBytes);
87 TIdx
const blockThreadCount(blockThreadExtent.prod());
88 [[maybe_unused]]
int const iBlockThreadCount(
static_cast<int>(blockThreadCount));
90 if(::omp_in_parallel() != 0)
92 throw std::runtime_error(
93 "The OpenMP 2.0 thread backend can not be used within an existing parallel region!");
97 int const ompIsDynamic(::omp_get_dynamic());
105 acc.m_gridBlockIdx = gridBlockIdx;
113 # pragma omp parallel num_threads(iBlockThreadCount)
116 if constexpr((!BOOST_COMP_GNUC) || (BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(8, 1, 0)))
118 # pragma omp single nowait
123 if((iBlockThreadCount > 1) && (::omp_in_parallel() == 0))
125 throw std::runtime_error(
126 "The OpenMP 2.0 runtime did not create a parallel region!");
129 int const numThreads = ::omp_get_num_threads();
130 if(numThreads != iBlockThreadCount)
132 throw std::runtime_error(
133 "The OpenMP 2.0 runtime did not use the number of threads "
134 "that had been required!");
139 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
151 ::omp_set_dynamic(ompIsDynamic);
155 TKernelFnObj m_kernelFnObj;
156 std::tuple<std::decay_t<TArgs>...> m_args;
162 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
169 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
176 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
183 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
190 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU OpenMP 2.0 thread accelerator.
The CPU OpenMP 2.0 thread accelerator execution task.
ALPAKA_FN_HOST TaskKernelCpuOmp2Threads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
The accelerator type trait.
The dimension getter type trait.