alpaka/cpu_2Fill_8hpp_source.html

/* Copyright 2025 Maria Michailidi, Anna Polova, Abdulrahman Al Marzouqi

 * SPDX-License-Identifier: MPL-2.0

 */


#pragma once


#include "alpaka/core/Assert.hpp"

#include "alpaka/dim/DimIntegralConst.hpp"

#include "alpaka/extent/Traits.hpp"

#include "alpaka/mem/view/Traits.hpp"

#include "alpaka/meta/Integral.hpp"

#include "alpaka/meta/NdLoop.hpp"


namespace alpaka

{

    class DevCpu;


    namespace detail

    {

        //! The CPU device N-dimensional memory fill task.

        template<typename TDim, typename TView, typename TExtent>


        struct TaskFillCpu

        {

            static_assert(TDim::value > 0);


            using ExtentSize = Idx<TExtent>;

            using DstSize = Idx<TView>;

            using Elem = alpaka::Elem<TView>;


            static_assert(std::is_trivially_copyable_v<Elem>, "Only trivially copyable types supported for fill");


            template<typename TViewFwd>


            TaskFillCpu(TViewFwd&& view, Elem const& value, TExtent const& extent)

                : m_value(value)

                , m_extent(getExtents(extent))

#if(!defined(NDEBUG))

                , m_dstExtent(getExtents(view))

#endif

                , m_dstPitchBytes(getPitchesInBytes(view))

                , m_dstMemNative(getPtrNative(view))

            {

                ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());

                if constexpr(TDim::value > 0)

                {

                    ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[TDim::value - 1]) >= sizeof(Elem));

                    ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[TDim::value - 1]) % alignof(Elem) == 0);

                }

                if constexpr(TDim::value > 1)

                {

                    for(int dim = TDim::value - 2; dim >= 0; --dim)

                    {

                        ALPAKA_ASSERT(

                            static_cast<std::size_t>(m_dstPitchBytes[dim])

                            >= static_cast<std::size_t>(m_dstPitchBytes[dim + 1] * m_dstExtent[dim + 1]));

                        ALPAKA_ASSERT(static_cast<std::size_t>(m_dstPitchBytes[dim]) % alignof(Elem) == 0);

                    }

                }

                ALPAKA_ASSERT(reinterpret_cast<std::uintptr_t>(m_dstMemNative) % alignof(Elem) == 0);

            }


            ALPAKA_FN_HOST auto operator()() const -> void

            {

                if(static_cast<std::size_t>(m_extent.prod()) != 0u)

                {

                    meta::ndLoopIncIdx(

                        m_extent,

                        [&](Vec<TDim, ExtentSize> const& idx)

                        {

                            // All elements of m_dstPitchBytes are multiples of the alignment of Elem.

                            std::uintptr_t offsetBytes = static_cast<std::uintptr_t>((idx * m_dstPitchBytes).sum());

                            Elem* elem = reinterpret_cast<Elem*>(__builtin_assume_aligned(

                                reinterpret_cast<std::uint8_t*>(m_dstMemNative) + offsetBytes,

                                alignof(Elem)));

                            *elem = m_value;

                        });

                }

            }


        private:

            Elem const m_value;

            Vec<TDim, ExtentSize> const m_extent;

#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)

            Vec<TDim, DstSize> const m_dstExtent;

#endif

            Vec<TDim, DstSize> const m_dstPitchBytes;

            Elem* const m_dstMemNative;

        };


        //! The CPU device 0-dimensional memory fill task specialisation.

        template<typename TView, typename TExtent>


        struct TaskFillCpu<DimInt<0u>, TView, TExtent>

        {

            using Elem = alpaka::Elem<TView>;


            template<typename TViewFwd>


            TaskFillCpu(TViewFwd&& view, Elem const& value, [[maybe_unused]] TExtent const& extent)

                : m_value(value)

                , m_dstMemNative(getPtrNative(view))

            {

                ALPAKA_ASSERT(getExtents(extent).prod() == 1u);

                ALPAKA_ASSERT(getExtents(view).prod() == 1u);

                ALPAKA_ASSERT(reinterpret_cast<std::uintptr_t>(m_dstMemNative) % alignof(Elem) == 0);

            }


            ALPAKA_FN_HOST auto operator()() const noexcept -> void

            {

                *m_dstMemNative = m_value;

            }


        private:

            Elem const m_value;

            Elem* const m_dstMemNative;

        };


    } // namespace detail


    namespace trait

    {

        //! The memory fill task trait specialization for CPU devices.

        template<typename TDim>

        struct CreateTaskFill<TDim, DevCpu>

        {

            template<typename TExtent, typename TViewFwd>

            ALPAKA_FN_HOST static auto createTaskFill(

                TViewFwd&& view,

                alpaka::Elem<std::remove_reference_t<TViewFwd>> const& value,

                TExtent const& extent)

            {

                using TView = std::remove_reference_t<TViewFwd>;

                using Elem = alpaka::Elem<TView>;

                static_assert(

                    std::is_trivially_copyable_v<Elem>,

                    "Only trivially copyable types are supported for fill");


                return alpaka::detail::TaskFillCpu<TDim, TView, TExtent>{std::forward<TViewFwd>(view), value, extent};

            }

        };

    } // namespace trait


} // namespace alpaka

Assert.hpp

ALPAKA_ASSERT
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13

DimIntegralConst.hpp

Integral.hpp

NdLoop.hpp

alpaka::Vec
A n-dimensional vector.
Definition Vec.hpp:38

ALPAKA_FN_HOST
#define ALPAKA_FN_HOST
Definition Common.hpp:40

Traits.hpp

Traits.hpp

alpaka::meta::ndLoopIncIdx
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ndLoopIncIdx(TExtentVec const &extent, TFnObj const &f) -> void
Loops over an n-dimensional iteration index variable calling f(idx, args...) for each iteration....
Definition NdLoop.hpp:81

alpaka
The alpaka accelerator library.
Definition AccCpuOmp2Blocks.hpp:52

alpaka::Idx
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29

alpaka::getPitchesInBytes
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:199

alpaka::createTaskFill
ALPAKA_FN_HOST auto createTaskFill(TViewFwd &&view, TValue const &value, TExtent const &extent)
Definition Traits.hpp:228

alpaka::getPtrNative
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:139

alpaka::Elem
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21

alpaka::getExtents
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59

alpaka::DimInt
std::integral_constant< std::size_t, N > DimInt
Definition DimIntegralConst.hpp:15

alpaka::detail::TaskFillCpu< DimInt< 0u >, TView, TExtent >::operator()
ALPAKA_FN_HOST auto operator()() const noexcept -> void
Definition Fill.hpp:105

alpaka::detail::TaskFillCpu< DimInt< 0u >, TView, TExtent >::Elem
alpaka::Elem< TView > Elem
Definition Fill.hpp:93

alpaka::detail::TaskFillCpu< DimInt< 0u >, TView, TExtent >::TaskFillCpu
TaskFillCpu(TViewFwd &&view, Elem const &value, TExtent const &extent)
Definition Fill.hpp:96

alpaka::detail::TaskFillCpu
The CPU device N-dimensional memory fill task.
Definition Fill.hpp:23

alpaka::detail::TaskFillCpu::DstSize
Idx< TView > DstSize
Definition Fill.hpp:27

alpaka::detail::TaskFillCpu::operator()
ALPAKA_FN_HOST auto operator()() const -> void
Definition Fill.hpp:61

alpaka::detail::TaskFillCpu::ExtentSize
Idx< TExtent > ExtentSize
Definition Fill.hpp:26

alpaka::detail::TaskFillCpu::TaskFillCpu
TaskFillCpu(TViewFwd &&view, Elem const &value, TExtent const &extent)
Definition Fill.hpp:33

alpaka::detail::TaskFillCpu::Elem
alpaka::Elem< TView > Elem
Definition Fill.hpp:28