alpaka/AccCpuOmp2Blocks_8hpp_source.html

/* Copyright 2024 Axel Huebl, Benjamin Worpitz, René Widera, Jan Stephan, Bernhard Manfred Gruber, Andrea Bocci

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


// Base classes.

#include "alpaka/atomic/AtomicCpu.hpp"

#include "alpaka/atomic/AtomicHierarchy.hpp"

#include "alpaka/atomic/AtomicNoOp.hpp"

#include "alpaka/atomic/AtomicOmpBuiltIn.hpp"

#include "alpaka/block/shared/dyn/BlockSharedMemDynMember.hpp"

#include "alpaka/block/shared/st/BlockSharedMemStMember.hpp"

#include "alpaka/block/sync/BlockSyncNoOp.hpp"

#include "alpaka/core/DemangleTypeNames.hpp"

#include "alpaka/idx/bt/IdxBtZero.hpp"

#include "alpaka/idx/gb/IdxGbRef.hpp"

#include "alpaka/intrinsic/IntrinsicCpu.hpp"

#include "alpaka/math/MathStdLib.hpp"

#include "alpaka/mem/fence/MemFenceOmp2Blocks.hpp"

#include "alpaka/rand/RandDefault.hpp"

#include "alpaka/rand/RandStdLib.hpp"

#include "alpaka/warp/WarpSingleThread.hpp"

#include "alpaka/workdiv/WorkDivMembers.hpp"


// Specialized traits.

#include "alpaka/acc/Traits.hpp"

#include "alpaka/dev/Traits.hpp"

#include "alpaka/idx/Traits.hpp"

#include "alpaka/kernel/Traits.hpp"

#include "alpaka/platform/Traits.hpp"


// Implementation details.

#include "alpaka/acc/Tag.hpp"

#include "alpaka/core/ClipCast.hpp"

#include "alpaka/core/Interface.hpp"

#include "alpaka/dev/DevCpu.hpp"


#include <limits>

#include <typeinfo>


#ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED


#    if _OPENMP < 200203

#        error If ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED is set, the compiler has to support OpenMP 2.0 or higher!

#    endif


namespace alpaka

{

    template<typename TDim, typename TIdx, typename TKernelFnObj, typename... TArgs>

    class TaskKernelCpuOmp2Blocks;


    //! The CPU OpenMP 2.0 block accelerator.

    //!

    //! This accelerator allows parallel kernel execution on a CPU device.

    //! It uses OpenMP 2.0 to implement the grid block parallelism.

    //! The block idx is restricted to 1x1x1.

    template<typename TDim, typename TIdx>


    class AccCpuOmp2Blocks final

        : public WorkDivMembers<TDim, TIdx>

        , public gb::IdxGbRef<TDim, TIdx>

        , public bt::IdxBtZero<TDim, TIdx>

        , public AtomicHierarchy<

              AtomicCpu, // grid atomics

              AtomicOmpBuiltIn, // block atomics

              AtomicNoOp> // thread atomics

        , public math::MathStdLib

        , public BlockSharedMemDynMember<>

        , public BlockSharedMemStMember<>

        , public BlockSyncNoOp

        , public IntrinsicCpu

        , public MemFenceOmp2Blocks

#    ifdef ALPAKA_DISABLE_VENDOR_RNG

        , public rand::RandDefault

#    else

        , public rand::RandStdLib

#    endif

        , public warp::WarpSingleThread

        , public interface::Implements<ConceptAcc, AccCpuOmp2Blocks<TDim, TIdx>>

    {

        static_assert(

            sizeof(TIdx) >= sizeof(int),

            "Index type is not supported, consider using int or a larger type.");


    public:

        // Partial specialization with the correct TDim and TIdx is not allowed.

        template<typename TDim2, typename TIdx2, typename TKernelFnObj, typename... TArgs>

        friend class ::alpaka::TaskKernelCpuOmp2Blocks;


        AccCpuOmp2Blocks(AccCpuOmp2Blocks const&) = delete;

        AccCpuOmp2Blocks(AccCpuOmp2Blocks&&) = delete;

        auto operator=(AccCpuOmp2Blocks const&) -> AccCpuOmp2Blocks& = delete;

        auto operator=(AccCpuOmp2Blocks&&) -> AccCpuOmp2Blocks& = delete;


    private:

        template<typename TWorkDiv>

        ALPAKA_FN_HOST AccCpuOmp2Blocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)

            : WorkDivMembers<TDim, TIdx>(workDiv)

            , gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)

            , BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)

            , BlockSharedMemStMember<>(staticMemBegin(), staticMemCapacity())

            , m_gridBlockIdx(Vec<TDim, TIdx>::zeros())

        {

        }


    private:

        // getIdx

        Vec<TDim, TIdx> mutable m_gridBlockIdx; //!< The index of the currently executed block.

    };


    namespace trait

    {

        //! The CPU OpenMP 2.0 block accelerator accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct AccType<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = AccCpuOmp2Blocks<TDim, TIdx>;

        };


        //! The CPU OpenMP 2.0 block single thread accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct IsSingleThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::true_type

        {

        };


        //! The CPU OpenMP 2.0 block multi thread accelerator type trait specialization.

        template<typename TDim, typename TIdx>

        struct IsMultiThreadAcc<AccCpuOmp2Blocks<TDim, TIdx>> : std::false_type

        {

        };


        //! The CPU OpenMP 2.0 block accelerator device properties get trait specialization.

        template<typename TDim, typename TIdx>

        struct GetAccDevProps<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            ALPAKA_FN_HOST static auto getAccDevProps(DevCpu const& dev) -> alpaka::AccDevProps<TDim, TIdx>

            {

                return {// m_multiProcessorCount

                        alpaka::core::clipCast<TIdx>(omp_get_max_threads()),

                        // m_gridBlockExtentMax

                        Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),

                        // m_gridBlockCountMax

                        std::numeric_limits<TIdx>::max(),

                        // m_blockThreadExtentMax

                        Vec<TDim, TIdx>::ones(),

                        // m_blockThreadCountMax

                        static_cast<TIdx>(1),

                        // m_threadElemExtentMax

                        Vec<TDim, TIdx>::all(std::numeric_limits<TIdx>::max()),

                        // m_threadElemCountMax

                        std::numeric_limits<TIdx>::max(),

                        // m_sharedMemSizeBytes

                        static_cast<size_t>(AccCpuOmp2Blocks<TDim, TIdx>::staticAllocBytes()),

                        // m_globalMemSizeBytes

                        getMemBytes(dev)};

            }

        };


        //! The CPU OpenMP 2.0 block accelerator name trait specialization.

        template<typename TDim, typename TIdx>

        struct GetAccName<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            ALPAKA_FN_HOST static auto getAccName() -> std::string

            {

                return "AccCpuOmp2Blocks<" + std::to_string(TDim::value) + "," + core::demangled<TIdx> + ">";

            }

        };


        //! The CPU OpenMP 2.0 block accelerator device type trait specialization.

        template<typename TDim, typename TIdx>

        struct DevType<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = DevCpu;

        };


        //! The CPU OpenMP 2.0 block accelerator dimension getter trait specialization.

        template<typename TDim, typename TIdx>

        struct DimType<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = TDim;

        };


        //! The CPU OpenMP 2.0 block accelerator execution task type trait specialization.

        template<typename TDim, typename TIdx, typename TWorkDiv, typename TKernelFnObj, typename... TArgs>

        struct CreateTaskKernel<AccCpuOmp2Blocks<TDim, TIdx>, TWorkDiv, TKernelFnObj, TArgs...>

        {

            ALPAKA_FN_HOST static auto createTaskKernel(

                TWorkDiv const& workDiv,

                TKernelFnObj const& kernelFnObj,

                TArgs&&... args)

            {

                if(workDiv.m_blockThreadExtent.prod() != static_cast<TIdx>(1u))

                {

                    throw std::runtime_error(

                        "The given work division is not valid for a single thread Acc: "

                        + getAccName<AccCpuOmp2Blocks<TDim, TIdx>>() + ". Threads per block should be 1!");

                }


                return TaskKernelCpuOmp2Blocks<TDim, TIdx, TKernelFnObj, TArgs...>(

                    workDiv,

                    kernelFnObj,

                    std::forward<TArgs>(args)...);

            }

        };


        //! The CPU OpenMP 2.0 block execution task platform type trait specialization.

        template<typename TDim, typename TIdx>

        struct PlatformType<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = PlatformCpu;

        };


        //! The CPU OpenMP 2.0 block accelerator idx type trait specialization.

        template<typename TDim, typename TIdx>

        struct IdxType<AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = TIdx;

        };


        template<typename TDim, typename TIdx>

        struct AccToTag<alpaka::AccCpuOmp2Blocks<TDim, TIdx>>

        {

            using type = alpaka::TagCpuOmp2Blocks;

        };


        template<typename TDim, typename TIdx>

        struct TagToAcc<alpaka::TagCpuOmp2Blocks, TDim, TIdx>

        {

            using type = alpaka::AccCpuOmp2Blocks<TDim, TIdx>;

        };

    } // namespace trait


} // namespace alpaka


#endif

AtomicCpu.hpp

AtomicHierarchy.hpp

AtomicNoOp.hpp

AtomicOmpBuiltIn.hpp

BlockSharedMemDynMember.hpp

BlockSharedMemStMember.hpp

BlockSyncNoOp.hpp

ClipCast.hpp

DemangleTypeNames.hpp

DevCpu.hpp

IdxBtZero.hpp

IdxGbRef.hpp

Interface.hpp

IntrinsicCpu.hpp

MathStdLib.hpp

MemFenceOmp2Blocks.hpp

RandDefault.hpp

RandStdLib.hpp

Tag.hpp

WarpSingleThread.hpp

WorkDivMembers.hpp

Traits.hpp

alpaka::AccCpuOmp2Blocks
The CPU OpenMP 2.0 block accelerator.
Definition AccCpuOmp2Blocks.hpp:80

alpaka::AccCpuOmp2Blocks::AccCpuOmp2Blocks
AccCpuOmp2Blocks(AccCpuOmp2Blocks const &)=delete

alpaka::AccCpuOmp2Blocks::operator=
auto operator=(AccCpuOmp2Blocks const &) -> AccCpuOmp2Blocks &=delete

alpaka::AccCpuOmp2Blocks::AccCpuOmp2Blocks
AccCpuOmp2Blocks(AccCpuOmp2Blocks &&)=delete

alpaka::AccCpuOmp2Blocks::operator=
auto operator=(AccCpuOmp2Blocks &&) -> AccCpuOmp2Blocks &=delete

alpaka::BlockSharedMemDynMember
Dynamic block shared memory provider using fixed-size member array to allocate memory on the stack or...
Definition BlockSharedMemDynMember.hpp:41

alpaka::BlockSharedMemDynMember::staticAllocBytes
static constexpr auto staticAllocBytes() -> std::uint32_t
Definition BlockSharedMemDynMember.hpp:71

alpaka::BlockSharedMemDynMember::staticMemBegin
auto staticMemBegin() const -> uint8_t *
Definition BlockSharedMemDynMember.hpp:55

alpaka::BlockSharedMemDynMember::staticMemCapacity
auto staticMemCapacity() const -> std::uint32_t
Definition BlockSharedMemDynMember.hpp:63

alpaka::BlockSharedMemStMember
Static block shared memory provider using a pointer to externally allocated fixed-size memory,...
Definition BlockSharedMemStMember.hpp:26

alpaka::BlockSyncNoOp
The no op block synchronization.
Definition BlockSyncNoOp.hpp:14

alpaka::DevCpu
The CPU device handle.
Definition DevCpu.hpp:56

alpaka::IntrinsicCpu
The CPU intrinsic.
Definition IntrinsicCpu.hpp:29

alpaka::MemFenceOmp2Blocks
The CPU OpenMP 2.0 block memory fence.
Definition MemFenceOmp2Blocks.hpp:20

alpaka::TaskKernelCpuOmp2Blocks
The CPU OpenMP 2.0 block accelerator execution task.
Definition TaskKernelCpuOmp2Blocks.hpp:793

alpaka::Vec
A n-dimensional vector.
Definition Vec.hpp:38

alpaka::Vec::ones
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106

alpaka::Vec::all
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition Vec.hpp:89

alpaka::WorkDivMembers
A basic class holding the work division as grid block extent, block thread and thread element extent.
Definition WorkDivMembers.hpp:20

alpaka::bt::IdxBtZero
A zero block thread index provider.
Definition IdxBtZero.hpp:19

alpaka::gb::IdxGbRef
A IdxGbRef grid block index.
Definition IdxGbRef.hpp:20

alpaka::gb::IdxGbRef::IdxGbRef
IdxGbRef(Vec< TDim, TIdx > const &gridBlockIdx)
Definition IdxGbRef.hpp:22

alpaka::math::MathStdLib
The standard library math trait specializations.
Definition MathStdLib.hpp:249

alpaka::meta::InheritFromList
Definition InheritFromList.hpp:10

alpaka::rand::RandDefault
Definition RandDefault.hpp:19

alpaka::rand::TinyMersenneTwister
"Tiny" state mersenne twister implementation
Definition RandStdLib.hpp:20

alpaka::warp::WarpSingleThread
The single-threaded warp to emulate it on CPUs.
Definition WarpSingleThread.hpp:15

ALPAKA_FN_HOST
#define ALPAKA_FN_HOST
Definition Common.hpp:40

Traits.hpp

Traits.hpp

Traits.hpp

alpaka::core::clipCast
auto clipCast(V const &val) -> T
Definition ClipCast.hpp:16

alpaka
The alpaka accelerator library.
Definition AccCpuOmp2Blocks.hpp:49

alpaka::getAccDevProps
ALPAKA_FN_HOST auto getAccDevProps(TDev const &dev) -> AccDevProps< Dim< TAcc >, Idx< TAcc > >
Definition Traits.hpp:90

alpaka::createTaskKernel
ALPAKA_FN_HOST auto createTaskKernel(TWorkDiv const &workDiv, TKernelFnObj const &kernelFnObj, TArgs &&... args)
Creates a kernel execution task.
Definition Traits.hpp:332

alpaka::getMemBytes
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95

alpaka::getAccName
ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:100

Traits.hpp

alpaka::AccDevProps
The acceleration properties on a device.
Definition AccDevProps.hpp:18

alpaka::PlatformCpu
The CPU device platform.
Definition PlatformCpu.hpp:18

alpaka::TagCpuOmp2Blocks
Definition Tag.hpp:31

alpaka::interface::Implements
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15

alpaka::trait::AccToTag
Definition Tag.hpp:58

alpaka::trait::AccType
The accelerator type trait.
Definition Traits.hpp:37

alpaka::trait::CreateTaskKernel
The kernel execution task creation trait.
Definition Traits.hpp:35

alpaka::trait::DevType
The device type trait.
Definition Traits.hpp:23

alpaka::trait::DimType
The dimension getter type trait.
Definition Traits.hpp:14

alpaka::trait::GetAccDevProps
The device properties get trait.
Definition Traits.hpp:61

alpaka::trait::GetAccName
The accelerator name trait.
Definition Traits.hpp:68

alpaka::trait::GetAccName::getAccName
static ALPAKA_FN_HOST auto getAccName() -> std::string
Definition Traits.hpp:69

alpaka::trait::IdxType
The idx type trait.
Definition Traits.hpp:25

alpaka::trait::IsMultiThreadAcc
The multi thread accelerator trait.
Definition Traits.hpp:56

alpaka::trait::IsSingleThreadAcc
The single thread accelerator trait.
Definition Traits.hpp:46

alpaka::trait::PlatformType
The platform type trait.
Definition Traits.hpp:30

alpaka::trait::TagToAcc
Definition Tag.hpp:61