alpaka/AccCpuOmp2Threads_8hpp_source.html

/* Copyright 2025 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


// Base classes.

#include "alpaka/atomic/AtomicCpu.hpp"

#include "alpaka/atomic/AtomicHierarchy.hpp"

#include "alpaka/atomic/AtomicOmpBuiltIn.hpp"

#include "alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp"

#include "alpaka/block/shared/st/BlockSharedMemStMemberMasterSync.hpp"

#include "alpaka/block/sync/BlockSyncBarrierOmp.hpp"

#include "alpaka/core/DemangleTypeNames.hpp"

#include "alpaka/idx/bt/IdxBtOmp.hpp"

#include "alpaka/idx/gb/IdxGbRef.hpp"

#include "alpaka/intrinsic/IntrinsicCpu.hpp"

#include "alpaka/math/MathStdLib.hpp"

#include "alpaka/mem/fence/MemFenceOmp2Threads.hpp"

#include "alpaka/rand/RandDefault.hpp"

#include "alpaka/rand/RandStdLib.hpp"

#include "alpaka/warp/WarpSingleThread.hpp"

#include "alpaka/workdiv/WorkDivMembers.hpp"


// Specialized traits.

#include "alpaka/acc/Traits.hpp"

#include "alpaka/dev/Traits.hpp"

#include "alpaka/idx/Traits.hpp"

#include "alpaka/kernel/Traits.hpp"

#include "alpaka/platform/Traits.hpp"


// Implementation details.

#include "alpaka/acc/Tag.hpp"

#include "alpaka/core/ClipCast.hpp"

#include "alpaka/core/Interface.hpp"

#include "alpaka/dev/DevCpu.hpp"


#ifdef __cpp_lib_format

#    include <format>

#endif

#include <limits>

#include <string>


#ifdef ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED


#    if _OPENMP < 200203

#        error If ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!

#    endif


#    include <omp.h>


namespace alpaka

{

    template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>

    class TaskKernelCpuOmp2Threads;


    //! The CPU OpenMP 2.0 thread accelerator.

    //!

    //! This accelerator allows parallel kernel execution on a CPU device.

    //! It uses OpenMP 2.0 to implement the block thread parallelism.

    template<typename TDim, typename TIdx>


    class AccCpuOmp2Threads final

        : public WorkDivMembers<TDim, TIdx>

        , public gb::IdxGbRef<TDim, TIdx>

        , public bt::IdxBtOmp<TDim, TIdx>

        , public AtomicHierarchy<

              AtomicCpu, // grid atomics

              AtomicOmpBuiltIn, // block atomics

              AtomicOmpBuiltIn> // thread atomics

        , public math::MathStdLib

        , public BlockSharedMemDynMember<>

        , public BlockSharedMemStMemberMasterSync<>

        , public BlockSyncBarrierOmp

        , public IntrinsicCpu

        , public MemFenceOmp2Threads

#    ifdef ALPAKA_DISABLE_VENDOR_RNG

        , public rand::RandDefault

#    else

        , public rand::RandStdLib

#    endif

        , public warp::WarpSingleThread

        , public interface::Implements<InterfaceAcc, AccCpuOmp2Threads<TDim, TIdx>>

    {

        static_assert(

            sizeof(TIdx) >= sizeof(int),

            "Index type is not supported, consider using int or a larger type.");


    public:

        // Partial specialization with the correct TDim and TIdx is not allowed.

        template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>

        friend class ::alpaka::TaskKernelCpuOmp2Threads;


        AccCpuOmp2Threads(AccCpuOmp2Threads const&) = delete;

        AccCpuOmp2Threads(AccCpuOmp2Threads&&) = delete;

        auto operator=(AccCpuOmp2Threads const&) -> AccCpuOmp2Threads& = delete;

        auto operator=(AccCpuOmp2Threads&&) -> AccCpuOmp2Threads& = delete;


    private:

        template<typename TWorkDiv>

        ALPAKA_FN_HOST AccCpuOmp2Threads(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)

            : WorkDivMembers<TDim, TIdx>(workDiv)

            , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)

            , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)

            , BlockSharedMemStMemberMasterSync<>(

                  staticMemBegin(),

                  staticMemCapacity(),

                  [this]() { syncBlockThreads(*this); },

                  []() noexcept { return (::omp_get_thread_num() == 0); })

            , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())

        {

        }


    private:

        // getIdx

        Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.

    };


    namespace trait

    {

        //! The CPU OpenMP 2.0 thread accelerator accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct AccType<AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = AccCpuOmp2Threads<TDim, TIdx>;

        };


        //! The CPU OpenMP 2.0 thread single thread accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct IsSingleThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::false_type

        {

        };


        //! The CPU OpenMP 2.0 thread multi thread accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct IsMultiThreadAcc<AccCpuOmp2Threads<TDim, TIdx>> : std::true_type

        {

        };


        //! The CPU OpenMP 2.0 thread accelerator device properties get trait specialization.

        template<typename TDim, typename TIdx>

        struct GetAccDevProps<AccCpuOmp2Threads<TDim, TIdx>>

        {

            ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> alpaka::AccDevProps<TDim, TIdx>

            {

#    ifdef ALPAKA_CI

                auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(std::min(4, ::omp_get_max_threads()));

#    else

                auto const blockThreadCountMax = alpaka::core::clipCast<TIdx>(::omp_get_max_threads());

#    endif

                auto const memBytes = getMemBytes(dev);

                return {// m_multiProcessorCount

                        static_cast<TIdx>(1),

                        // m_gridBlockExtentMax

                        Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),

                        // m_gridBlockCountMax

                        std::numeric_limits<TIdx>::max(),

                        // m_blockThreadExtentMax

                        Vec<TDim, TIdx>::all(blockThreadCountMax),

                        // m_blockThreadCountMax

                        blockThreadCountMax,

                        // m_threadElemExtentMax

                        Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),

                        // m_threadElemCountMax

                        std::numeric_limits<TIdx>::max(),

                        // m_sharedMemSizeBytes

                        memBytes,

                        // m_globalMemSizeBytes

                        memBytes};

            }

        };


        //! The CPU OpenMP 2.0 thread accelerator name trait specialization.

        template<typename TDim, typename TIdx>

        struct GetAccName<AccCpuOmp2Threads<TDim, TIdx>>

        {

            ALPAKA_FN_HOST static auto getAccName() -> std::string

            {

#    if ALPAKA_COMP_CLANG

#        pragma clang diagnostic push

#        pragma clang diagnostic ignored "-Wexit-time-destructors"

#    endif

                using namespace std::literals;

                static std::string const accName =

#    ifdef __cpp_lib_format

                    std::format("AccCpuOmp2Threads<{},{}>", TDim::value, core::demangled<TIdx>);

#    else

                    "AccCpuOmp2Threads<"s + std::to_string(TDim::value) + ","s + std::string(core::demangled<TIdx>)

                    + ">"s;

#    endif

                return accName;

#    if ALPAKA_COMP_CLANG

#        pragma clang diagnostic pop

#    endif

            }

        };


        //! The CPU OpenMP 2.0 thread accelerator device type trait specialization.

        template<typename TDim, typename TIdx>

        struct DevType<AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = DevCpu;

        };


        //! The CPU OpenMP 2.0 thread accelerator dimension getter trait specialization.

        template<typename TDim, typename TIdx>

        struct DimType<AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = TDim;

        };


        //! The CPU OpenMP 2.0 thread accelerator execution task type trait specialization.

        template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>

        struct CreateTaskKernel<AccCpuOmp2Threads<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>

        {

            ALPAKA_FN_HOST static auto createTaskKernel(

                TWorkDiv const& workDiv,

                TKernelFnObj const& kernelFnObj,

                TArgs&&... args)

            {

                return TaskKernelCpuOmp2Threads<TDim, TIdx, TKernelFnObj, TArgs...>(

                    workDiv,

                    kernelFnObj,

                    std::forward<TArgs>(args)...);

            }

        };


        //! The CPU OpenMP 2.0 thread execution task platform type trait specialization.

        template<typename TDim, typename TIdx>

        struct PlatformType<AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = PlatformCpu;

        };


        //! The CPU OpenMP 2.0 thread accelerator idx type trait specialization.

        template<typename TDim, typename TIdx>

        struct IdxType<AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = TIdx;

        };


        template<typename TDim, typename TIdx>

        struct AccToTag<alpaka::AccCpuOmp2Threads<TDim, TIdx>>

        {

            using type = alpaka::TagCpuOmp2Threads;

        };


        template<typename TDim, typename TIdx>

        struct TagToAcc<alpaka::TagCpuOmp2Threads, TDim, TIdx>

        {

            using type = alpaka::AccCpuOmp2Threads<TDim, TIdx>;

        };

    } // namespace trait

} // namespace alpaka


#endif

AtomicCpu.hpp

AtomicHierarchy.hpp

AtomicOmpBuiltIn.hpp

BlockSharedMemDynMember.hpp

BlockSharedMemStMemberMasterSync.hpp

BlockSyncBarrierOmp.hpp

ClipCast.hpp

DemangleTypeNames.hpp

DevCpu.hpp

IdxBtOmp.hpp

IdxGbRef.hpp

Interface.hpp

IntrinsicCpu.hpp

MathStdLib.hpp

MemFenceOmp2Threads.hpp

RandDefault.hpp

RandStdLib.hpp

Tag.hpp

WarpSingleThread.hpp

WorkDivMembers.hpp

Traits.hpp

alpaka::AccCpuOmp2Threads
The CPU OpenMP 2.0 thread accelerator.
Definition AccCpuOmp2Threads.hpp:83

alpaka::AccCpuOmp2Threads::AccCpuOmp2Threads
AccCpuOmp2Threads(AccCpuOmp2Threads &&)=delete

alpaka::AccCpuOmp2Threads::AccCpuOmp2Threads
AccCpuOmp2Threads(AccCpuOmp2Threads const &)=delete

alpaka::AccCpuOmp2Threads::operator=
auto operator=(AccCpuOmp2Threads &&) -> AccCpuOmp2Threads &=delete

alpaka::AccCpuOmp2Threads::operator=
auto operator=(AccCpuOmp2Threads const &) -> AccCpuOmp2Threads &=delete

alpaka::BlockSharedMemDynMember
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
Definition BlockSharedMemDynMember.hpp:41

alpaka::BlockSharedMemDynMember::staticMemBegin
auto staticMemBegin() const -> uint8_t *
Definition BlockSharedMemDynMember.hpp:55

alpaka::BlockSharedMemDynMember::staticMemCapacity
auto staticMemCapacity() const -> std::uint32_t
Definition BlockSharedMemDynMember.hpp:63

alpaka::BlockSharedMemStMemberMasterSync
Definition BlockSharedMemStMemberMasterSync.hpp:24

alpaka::BlockSyncBarrierOmp
The OpenMP barrier block synchronization.
Definition BlockSyncBarrierOmp.hpp:18

alpaka::IntrinsicCpu
The CPU intrinsic.
Definition IntrinsicCpu.hpp:29

alpaka::MemFenceOmp2Threads
The CPU OpenMP 2.0 block memory fence.
Definition MemFenceOmp2Threads.hpp:20

alpaka::Vec::all
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89

alpaka::WorkDivMembers
A basic class holding the work division as grid block extent, block thread and thread element extent.
Definition WorkDivMembers.hpp:20

alpaka::bt::IdxBtOmp
The OpenMP accelerator index provider.
Definition IdxBtOmp.hpp:26

alpaka::gb::IdxGbRef
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20

alpaka::gb::IdxGbRef::IdxGbRef
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22

alpaka::math::MathStdLib
The standard library math trait specializations.
Definition MathStdLib.hpp:253

alpaka::meta::InheritFromList
Definition InheritFromList.hpp:10

alpaka::rand::RandDefault
Definition RandDefault.hpp:19

alpaka::rand::TinyMersenneTwister
"Tiny" state mersenne twister implementation
Definition RandStdLib.hpp:20

alpaka::warp::WarpSingleThread
The single-threaded warp to emulate it on CPUs.
Definition WarpSingleThread.hpp:15

ALPAKA_FN_HOST
#define ALPAKA_FN_HOST
Definition Common.hpp:40

Traits.hpp

Traits.hpp

Traits.hpp

alpaka::core::clipCast
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16

alpaka
The alpaka accelerator library.
Definition AccCpuOmp2Blocks.hpp:52

alpaka::getAccDevProps
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:95

alpaka::createTaskKernel
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:334

alpaka::getMemBytes
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95

alpaka::syncBlockThreads
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36

alpaka::AccToTag
typename trait::AccToTag< TAcc >::type AccToTag
maps an acc type to a tag type
Definition Tag.hpp:67

alpaka::TagToAcc
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74

Traits.hpp

alpaka::AccDevProps
The acceleration properties on a device.
Definition AccDevProps.hpp:18

alpaka::TagCpuOmp2Threads
Definition Tag.hpp:32

alpaka::interface::Implements
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15

alpaka::trait::GetAccName::getAccName
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:74