alpaka/WarpUniformCudaHipBuiltIn_8hpp_source.html

/* Copyright 2023 Sergei Bastrakov, David M. Rogers, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Aurora Perego

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "alpaka/core/Config.hpp"

#include "alpaka/core/Interface.hpp"

#include "alpaka/warp/Traits.hpp"


#include <cstdint>


#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)


namespace alpaka::warp

{

    //! The GPU CUDA/HIP warp.


    class WarpUniformCudaHipBuiltIn : public interface::Implements<ConceptWarp, WarpUniformCudaHipBuiltIn>

    {

    };


#    if !defined(ALPAKA_HOST_ONLY)


#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA

#            error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!

#        endif


#        if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP

#            error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!

#        endif


    namespace trait

    {

        template<>

        struct GetSize<WarpUniformCudaHipBuiltIn>

        {

            __device__ static auto getSize(warp::WarpUniformCudaHipBuiltIn const& /*warp*/) -> std::int32_t

            {

                return warpSize;

            }

        };


        template<>

        struct Activemask<WarpUniformCudaHipBuiltIn>

        {

            __device__ static auto activemask(warp::WarpUniformCudaHipBuiltIn const& /*warp*/)

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                -> std::uint32_t

#        else

                -> std::uint64_t

#        endif

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __activemask();

#        else

                // No HIP intrinsic for it, emulate via ballot

                return __ballot(1);

#        endif

            }

        };


        template<>

        struct All<WarpUniformCudaHipBuiltIn>

        {

            __device__ static auto all(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                std::int32_t predicate) -> std::int32_t

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __all_sync(0xffff'ffff, predicate);

#        else

                return __all(predicate);

#        endif

            }

        };


        template<>

        struct Any<WarpUniformCudaHipBuiltIn>

        {

            __device__ static auto any(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                std::int32_t predicate) -> std::int32_t

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __any_sync(0xffff'ffff, predicate);

#        else

                return __any(predicate);

#        endif

            }

        };


        template<>

        struct Ballot<WarpUniformCudaHipBuiltIn>

        {

            __device__ static auto ballot(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                std::int32_t predicate)

            // return type is required by the compiler

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                -> std::uint32_t

#        else

                -> std::uint64_t

#        endif

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __ballot_sync(0xffff'ffff, predicate);

#        else

                return __ballot(predicate);

#        endif

            }

        };


        template<>

        struct Shfl<WarpUniformCudaHipBuiltIn>

        {

            template<typename T>

            __device__ static auto shfl(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                T val,

                int srcLane,

                std::int32_t width) -> T

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __shfl_sync(0xffff'ffff, val, srcLane, width);

#        else

                return __shfl(val, srcLane, width);

#        endif

            }

        };


        template<>

        struct ShflUp<WarpUniformCudaHipBuiltIn>

        {

            template<typename T>

            __device__ static auto shfl_up(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                T val,

                std::uint32_t offset,

                std::int32_t width) -> T

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __shfl_up_sync(0xffff'ffff, val, offset, width);

#        else

                return __shfl_up(val, offset, width);

#        endif

            }

        };


        template<>

        struct ShflDown<WarpUniformCudaHipBuiltIn>

        {

            template<typename T>

            __device__ static auto shfl_down(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                T val,

                std::uint32_t offset,

                std::int32_t width) -> T

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __shfl_down_sync(0xffff'ffff, val, offset, width);

#        else

                return __shfl_down(val, offset, width);

#        endif

            }

        };


        template<>

        struct ShflXor<WarpUniformCudaHipBuiltIn>

        {

            template<typename T>

            __device__ static auto shfl_xor(

                [[maybe_unused]] warp::WarpUniformCudaHipBuiltIn const& warp,

                T val,

                std::int32_t mask,

                std::int32_t width) -> T

            {

#        if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)

                return __shfl_xor_sync(0xffff'ffff, val, mask, width);

#        else

                return __shfl_xor(val, mask, width);

#        endif

            }

        };


    } // namespace trait

#    endif

} // namespace alpaka::warp


#endif

Config.hpp

Interface.hpp

alpaka::warp::WarpUniformCudaHipBuiltIn
The GPU CUDA/HIP warp.
Definition WarpUniformCudaHipBuiltIn.hpp:19

alpaka::warp
Definition Traits.hpp:14

alpaka::warp::all
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:114

alpaka::warp::ballot
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ballot(TWarp const &warp, std::int32_t predicate)
Evaluates predicate for all non-exited threads in a warp and returns a 32- or 64-bit unsigned integer...
Definition Traits.hpp:164

alpaka::warp::shfl_up
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_up(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with lower ID relative to caller....
Definition Traits.hpp:236

alpaka::warp::activemask
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto activemask(TWarp const &warp) -> decltype(trait::Activemask< interface::ImplementationBase< ConceptWarp, TWarp > >::activemask(warp))
Returns a 32- or 64-bit unsigned integer (depending on the accelerator) whose Nth bit is set if and o...
Definition Traits.hpp:90

alpaka::warp::any
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto any(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
Definition Traits.hpp:137

alpaka::warp::shfl_down
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_down(TWarp const &warp, T value, std::uint32_t offset, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane with higher ID relative to caller....
Definition Traits.hpp:274

alpaka::warp::getSize
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto getSize(TWarp const &warp) -> std::int32_t
Returns warp size.
Definition Traits.hpp:65

alpaka::warp::shfl_xor
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl_xor(TWarp const &warp, T value, std::int32_t mask, std::int32_t width=0)
Exchange data between threads within a warp. It copies from a lane based on bitwise XOR of own lane I...
Definition Traits.hpp:312

alpaka::warp::shfl
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto shfl(TWarp const &warp, T value, std::int32_t srcLane, std::int32_t width=0)
Exchange data between threads within a warp.
Definition Traits.hpp:198

alpaka::interface::Implements
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15

Traits.hpp