27 #include <type_traits>
28 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
32 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED
35 # error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!
43 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
47 template<
typename TWorkDiv>
50 , m_kernelFnObj(kernelFnObj)
51 , m_args(std::forward<TArgs>(args)...)
54 Dim<std::decay_t<TWorkDiv>>::value == TDim::value,
55 "The work division and the execution task have to be of the same dimensionality!");
63 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(*
this);
64 auto const blockThreadExtent = getWorkDiv<Block, Threads>(*
this);
65 auto const threadElemExtent = getWorkDiv<Thread, Elems>(*
this);
68 auto const blockSharedMemDynSizeBytes = std::apply(
69 [&](std::decay_t<TArgs>
const&... args)
71 return getBlockSharedMemDynSizeBytes<AccCpuOmp2Threads<TDim, TIdx>>(
79 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
80 std::cout << __func__ <<
" blockSharedMemDynSizeBytes: " << blockSharedMemDynSizeBytes <<
" B"
86 blockSharedMemDynSizeBytes);
89 TIdx
const blockThreadCount(blockThreadExtent.prod());
90 [[maybe_unused]]
int const iBlockThreadCount(
static_cast<int>(blockThreadCount));
92 if(::omp_in_parallel() != 0)
94 throw std::runtime_error(
95 "The OpenMP 2.0 thread backend can not be used within an existing parallel region!");
99 int const ompIsDynamic(::omp_get_dynamic());
100 ::omp_set_dynamic(0);
107 acc.m_gridBlockIdx = gridBlockIdx;
115 # pragma omp parallel num_threads(iBlockThreadCount)
118 if constexpr((!BOOST_COMP_GNUC) || (BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(8, 1, 0)))
120 # pragma omp single nowait
125 if((iBlockThreadCount > 1) && (::omp_in_parallel() == 0))
127 throw std::runtime_error(
128 "The OpenMP 2.0 runtime did not create a parallel region!");
131 int const numThreads = ::omp_get_num_threads();
132 if(numThreads != iBlockThreadCount)
134 throw std::runtime_error(
135 "The OpenMP 2.0 runtime did not use the number of threads "
136 "that had been required!");
141 std::apply(m_kernelFnObj, std::tuple_cat(std::tie(acc), m_args));
153 ::omp_set_dynamic(ompIsDynamic);
157 TKernelFnObj m_kernelFnObj;
158 std::tuple<std::decay_t<TArgs>...> m_args;
164 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
171 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
178 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
185 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
192 template<
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
204 template<
typename TDev,
typename TDim,
typename TIdx,
typename TKernelFn,
typename... TArgs>
214 [[maybe_unused]] TKernelFn
const& kernelFn,
221 auto const& props = alpaka::getAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>(dev);
222 kernelFunctionAttributes.
maxThreadsPerBlock =
static_cast<int>(props.m_blockThreadCountMax);
225 return kernelFunctionAttributes;
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
The CPU OpenMP 2.0 thread accelerator.
The CPU OpenMP 2.0 thread accelerator execution task.
ALPAKA_FN_HOST TaskKernelCpuOmp2Threads(TWorkDiv &&workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
ALPAKA_FN_HOST auto operator()() const -> void
Executes the kernel function object.
A basic class holding the work division as grid block extent, block thread and thread element extent.
The alpaka accelerator library.
constexpr std::uint32_t BlockSharedDynMemberAllocKiB
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto freeSharedVars(TBlockSharedMemSt &blockSharedMemSt) -> void
Frees all memory used by block shared variables.
Kernel function attributes struct. Attributes are filled by calling the API of the accelerator using ...
int maxDynamicSharedSizeBytes
The accelerator type trait.
The dimension getter type trait.
static ALPAKA_FN_HOST auto getFunctionAttributes(TDev const &dev, [[maybe_unused]] TKernelFn const &kernelFn, [[maybe_unused]] TArgs &&... args) -> alpaka::KernelFunctionAttributes
The structure template to access to the functions attributes of a kernel function object.