alpaka/DevUniformCudaHipRt_8hpp_source.html

/* Copyright 2024 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber,

 *                Antonio Di Pilato, Jan Stephan, Andrea Bocci

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "alpaka/core/ApiCudaRt.hpp"

#include "alpaka/core/Cuda.hpp"

#include "alpaka/core/Hip.hpp"

#include "alpaka/core/Interface.hpp"

#include "alpaka/dev/Traits.hpp"

#include "alpaka/dev/common/DevGenericImpl.hpp"

#include "alpaka/dev/common/DeviceProperties.hpp"

#include "alpaka/mem/buf/Traits.hpp"

#include "alpaka/platform/Traits.hpp"

#include "alpaka/queue/Properties.hpp"

#include "alpaka/queue/Traits.hpp"

#include "alpaka/queue/cuda-hip/QueueUniformCudaHipRt.hpp"

#include "alpaka/traits/Traits.hpp"

#include "alpaka/wait/Traits.hpp"


#include <cstddef>

#include <mutex>

#include <string>

#include <vector>


#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)


namespace alpaka

{


    template<typename TApi>

    class DevUniformCudaHipRt;


    namespace trait

    {

        template<typename TPlatform, typename TSfinae>

        struct GetDevByIdx;

    } // namespace trait


    namespace uniform_cuda_hip::detail

    {

        template<typename TApi, bool TBlocking>

        class QueueUniformCudaHipRt;

    } // namespace uniform_cuda_hip::detail


    template<typename TApi>

    using QueueUniformCudaHipRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<TApi, true>;


    template<typename TApi>

    using QueueUniformCudaHipRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<TApi, false>;


    template<typename TApi>

    struct PlatformUniformCudaHipRt;


    //! The CUDA/HIP RT device handle.

    template<typename TApi>


    class DevUniformCudaHipRt

        : public interface::Implements<ConceptCurrentThreadWaitFor, DevUniformCudaHipRt<TApi>>

        , public interface::Implements<ConceptDev, DevUniformCudaHipRt<TApi>>

    {

        friend struct trait::GetDevByIdx<PlatformUniformCudaHipRt<TApi>>;


        using IDeviceQueue = uniform_cuda_hip::detail::QueueUniformCudaHipRtImpl<TApi>;


    protected:


        DevUniformCudaHipRt() : m_DevGenericImpl{std::make_shared<alpaka::detail::DevGenericImpl<IDeviceQueue>>()}

        {

        }


    public:


        ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const& rhs) const -> bool

        {

            return m_iDevice == rhs.m_iDevice;

        }


        ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const& rhs) const -> bool

        {

            return !((*this) == rhs);

        }


        [[nodiscard]] auto getNativeHandle() const noexcept -> int

        {

            return m_iDevice;

        }


        [[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector<std::shared_ptr<IDeviceQueue>>

        {

            return m_DevGenericImpl->getAllExistingQueues();

        }


        //! Registers the given queue on this device.

        //! NOTE: Every queue has to be registered for correct functionality of device wait operations!


        ALPAKA_FN_HOST auto registerQueue(std::shared_ptr<IDeviceQueue> spQueue) const -> void

        {

            m_DevGenericImpl->registerQueue(spQueue);

        }


        static void setDeviceProperties(

            DevUniformCudaHipRt<TApi> const& device,

            alpaka::DeviceProperties& devProperties)

        {

            // There is cuda/hip-DeviceGetAttribute as faster alternative to

            // cuda/hip-GetDeviceProperties to get a single device property but it has no option to get

            // the name

            auto devHandle = device.getNativeHandle();

            typename TApi::DeviceProp_t devProp;

            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, devHandle));

            devProperties.name = std::string(devProp.name);


            std::size_t freeInternal(0u);

            std::size_t totalInternal(0u);

            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));

            devProperties.totalGlobalMem = totalInternal;


            int warpSize = 0;

            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(

                TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, devHandle));

            devProperties.warpSizes = std::vector<std::size_t>{static_cast<std::size_t>(warpSize)};

            devProperties.preferredWarpSize = static_cast<std::size_t>(warpSize);

        }


        friend struct trait::GetName<DevUniformCudaHipRt<TApi>>;

        friend struct trait::GetMemBytes<DevUniformCudaHipRt<TApi>>;

        friend struct trait::GetFreeMemBytes<DevUniformCudaHipRt<TApi>>;

        friend struct trait::GetWarpSizes<DevUniformCudaHipRt<TApi>>;

        friend struct trait::GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>;


    private:

        DevUniformCudaHipRt(int iDevice)

            : m_iDevice(iDevice)

            , m_DevGenericImpl(std::make_shared<alpaka::detail::DevGenericImpl<IDeviceQueue>>())

        {

        }


        int m_iDevice;


        std::shared_ptr<alpaka::detail::DevGenericImpl<IDeviceQueue>> m_DevGenericImpl;

    };


    namespace trait

    {


        //! The CUDA/HIP RT device name get trait specialization.

        template<typename TApi>

        struct GetName<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt<TApi> const& dev) -> std::string

            {

                return dev.m_DevGenericImpl->deviceProperties(dev)->name;

            }

        };


        //! The CUDA/HIP RT device available memory get trait specialization.

        template<typename TApi>

        struct GetMemBytes<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t

            {

                return dev.m_DevGenericImpl->deviceProperties(dev)->totalGlobalMem;

            }

        };


        //! The CUDA/HIP RT device free memory get trait specialization.

        template<typename TApi>

        struct GetFreeMemBytes<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t

            {

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));

                std::size_t freeInternal(0u);

                std::size_t totalInternal(0u);

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));


                return freeInternal;

            }

        };


        //! The CUDA/HIP RT device warp size get trait specialization.

        template<typename TApi>

        struct GetWarpSizes<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>

            {

                return dev.m_DevGenericImpl->deviceProperties(dev)->warpSizes;

            }

        };


        //! The CUDA/HIP RT preferred device warp size get trait specialization.

        template<typename TApi>

        struct GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t

            {

                return dev.m_DevGenericImpl->deviceProperties(dev)->preferredWarpSize;

            }

        };


#    ifdef ALPAKA_ACC_GPU_CUDA_ENABLED

        //! The CUDA RT preferred device warp size get trait specialization.

        template<>

        struct GetPreferredWarpSize<DevUniformCudaHipRt<ApiCudaRt>>

        {

            ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevUniformCudaHipRt<ApiCudaRt> const& /* dev */)

                -> std::size_t

            {

                // All CUDA GPUs to date have a warp size of 32 threads.

                return 32u;

            }

        };

#    endif // ALPAKA_ACC_GPU_CUDA_ENABLED


        //! The CUDA/HIP RT device reset trait specialization.

        template<typename TApi>

        struct Reset<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto reset(DevUniformCudaHipRt<TApi> const& dev) -> void

            {

                ALPAKA_DEBUG_FULL_LOG_SCOPE;


                // Set the current device to wait for.

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceReset());

            }

        };


        //! The CUDA/HIP RT device native handle trait specialization.

        template<typename TApi>

        struct NativeHandle<DevUniformCudaHipRt<TApi>>

        {

            [[nodiscard]] static auto getNativeHandle(DevUniformCudaHipRt<TApi> const& dev)

            {

                return dev.getNativeHandle();

            }

        };


        //! The CUDA/HIP RT device platform type trait specialization.

        template<typename TApi>

        struct PlatformType<DevUniformCudaHipRt<TApi>>

        {

            using type = PlatformUniformCudaHipRt<TApi>;

        };


        //! The thread CUDA/HIP device wait specialization.

        //!

        //! Blocks until the device has completed all preceding requested tasks.

        //! Tasks that are enqueued or queues that are created after this call is made are not waited for.

        template<typename TApi>

        struct CurrentThreadWaitFor<DevUniformCudaHipRt<TApi>>

        {

            ALPAKA_FN_HOST static auto currentThreadWaitFor(DevUniformCudaHipRt<TApi> const& dev) -> void

            {

                ALPAKA_DEBUG_FULL_LOG_SCOPE;


                // Set the current device to wait for.

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));

                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceSynchronize());

            }

        };


        template<typename TApi>

        struct QueueType<DevUniformCudaHipRt<TApi>, Blocking>

        {

            using type = QueueUniformCudaHipRtBlocking<TApi>;

        };


        template<typename TApi>

        struct QueueType<DevUniformCudaHipRt<TApi>, NonBlocking>

        {

            using type = QueueUniformCudaHipRtNonBlocking<TApi>;

        };

    } // namespace trait

} // namespace alpaka


#endif

ApiCudaRt.hpp

Cuda.hpp

ALPAKA_DEBUG_FULL_LOG_SCOPE
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition Debug.hpp:62

DevGenericImpl.hpp

DeviceProperties.hpp

Hip.hpp

Interface.hpp

Properties.hpp

QueueUniformCudaHipRt.hpp

ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
Definition UniformCudaHip.hpp:105

alpaka::DevUniformCudaHipRt
The CUDA/HIP RT device handle.
Definition DevUniformCudaHipRt.hpp:62

alpaka::DevUniformCudaHipRt::getNativeHandle
auto getNativeHandle() const noexcept -> int
Definition DevUniformCudaHipRt.hpp:83

alpaka::DevUniformCudaHipRt::operator!=
ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const &rhs) const -> bool
Definition DevUniformCudaHipRt.hpp:78

alpaka::DevUniformCudaHipRt::DevUniformCudaHipRt
DevUniformCudaHipRt()
Definition DevUniformCudaHipRt.hpp:68

alpaka::DevUniformCudaHipRt::setDeviceProperties
static void setDeviceProperties(DevUniformCudaHipRt< TApi > const &device, alpaka::DeviceProperties &devProperties)
Definition DevUniformCudaHipRt.hpp:100

alpaka::DevUniformCudaHipRt::registerQueue
ALPAKA_FN_HOST auto registerQueue(std::shared_ptr< IDeviceQueue > spQueue) const -> void
Registers the given queue on this device. NOTE: Every queue has to be registered for correct function...
Definition DevUniformCudaHipRt.hpp:95

alpaka::DevUniformCudaHipRt::getAllQueues
ALPAKA_FN_HOST auto getAllQueues() const -> std::vector< std::shared_ptr< IDeviceQueue > >
Definition DevUniformCudaHipRt.hpp:88

alpaka::DevUniformCudaHipRt::operator==
ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const &rhs) const -> bool
Definition DevUniformCudaHipRt.hpp:73

alpaka::uniform_cuda_hip::detail::QueueUniformCudaHipRtImpl
The CUDA/HIP RT queue implementation.
Definition QueueUniformCudaHipRt.hpp:41

alpaka::uniform_cuda_hip::detail::QueueUniformCudaHipRt
The CUDA/HIP RT queue.
Definition QueueUniformCudaHipRt.hpp:97

ALPAKA_FN_HOST
#define ALPAKA_FN_HOST
Definition Common.hpp:40

Traits.hpp

Traits.hpp

alpaka
The alpaka accelerator library.
Definition AccCpuOmp2Blocks.hpp:52

alpaka::getPreferredWarpSize
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition Traits.hpp:118

alpaka::getName
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition Traits.hpp:87

alpaka::getWarpSizes
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition Traits.hpp:111

alpaka::reset
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition Traits.hpp:126

alpaka::getFreeMemBytes
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:104

alpaka::getMemBytes
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95

alpaka::NativeHandle
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36

std
STL namespace.

Traits.hpp

Traits.hpp

alpaka::DeviceProperties
Definition DeviceProperties.hpp:13

alpaka::DeviceProperties::warpSizes
std::vector< std::size_t > warpSizes
Definition DeviceProperties.hpp:16

alpaka::DeviceProperties::name
std::string name
Definition DeviceProperties.hpp:14

alpaka::DeviceProperties::totalGlobalMem
std::size_t totalGlobalMem
Definition DeviceProperties.hpp:15

alpaka::DeviceProperties::preferredWarpSize
std::size_t preferredWarpSize
Definition DeviceProperties.hpp:17

alpaka::PlatformUniformCudaHipRt
The CUDA/HIP RT platform.
Definition PlatformUniformCudaHipRt.hpp:30

alpaka::interface::Implements
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15

alpaka::trait::GetDevByIdx
The device get trait.
Definition DevCpu.hpp:41

alpaka::trait::GetFreeMemBytes
The device free memory size get trait.
Definition Traits.hpp:39

alpaka::trait::GetMemBytes
The device memory size get trait.
Definition Traits.hpp:35

alpaka::trait::GetName
The device name get trait.
Definition Traits.hpp:31

alpaka::trait::GetPreferredWarpSize
The device preferred warp size get trait.
Definition Traits.hpp:47

alpaka::trait::GetWarpSizes
The device warp size get trait.
Definition Traits.hpp:43

alpaka::trait::NativeHandle::getNativeHandle
static auto getNativeHandle(TImpl const &)
Definition Traits.hpp:18

Traits.hpp

Traits.hpp