llama/Copy_8hpp_source.html

 // Copyright 2021 Bernhard Manfred Gruber

 // SPDX-License-Identifier: MPL-2.0


 #pragma once


 #include "View.hpp"

 #include "mapping/AoSoA.hpp"

 #include "mapping/SoA.hpp"


 #include <cstring>

 #include <numeric>


 namespace llama

 {

     namespace internal

     {

         template<typename RecordDim>

         void assertTrivialCopyable()

         {

             forEachLeafCoord<RecordDim>(

                 [](auto rc)

                 {

                     static_assert(

                         std::is_trivially_copyable_v<GetType<RecordDim, decltype(rc)>>,

                         "All types in the record dimension must be trivially copyable");

                 });

         }


         // need a custom memcpy symbol in LLAMA, because with clang+CUDA, there are multiple std::memcpy symbols, so

         // the address is ambiguous.

         inline constexpr auto memcpy

             = [](void* dst, const void* src, std::size_t size) { std::memcpy(dst, src, size); };


         template<typename MemcpyFunc = decltype(memcpy)>

         void parallelMemcpy(

             std::byte* dst,

             const std::byte* src,

             std::size_t size,

             std::size_t threadId = 0,

             std::size_t threadCount = 1,

             MemcpyFunc singleThreadMemcpy = memcpy)

         {

             const auto sizePerThread = size / threadCount;

             const auto sizeLastThread = sizePerThread + size % threadCount;

             const auto sizeThisThread = threadId == threadCount - 1 ? sizeLastThread : sizePerThread;

             singleThreadMemcpy(dst + threadId * sizePerThread, src + threadId * sizePerThread, sizeThisThread);

         }

     } // namespace internal


     LLAMA_EXPORT

     template<typename Mapping, typename SrcBlob, typename DstBlob, typename MemcpyFunc = decltype(internal::memcpy)>

     void memcpyBlobs(

         const View<Mapping, SrcBlob>& srcView,

         View<Mapping, DstBlob>& dstView,

         std::size_t threadId = 0,

         std::size_t threadCount = 1,

         MemcpyFunc singleThreadMemcpy = internal::memcpy)

     {

         internal::assertTrivialCopyable<typename Mapping::RecordDim>();


         // TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions

         if(srcView.extents() != dstView.extents())

             throw std::runtime_error{"Array dimensions sizes are different"};


         // TODO(bgruber): this is maybe not the best parallel copying strategy

         for(std::size_t i = 0; i < Mapping::blobCount; i++)

             internal::parallelMemcpy(

                 &dstView.blobs()[i][0],

                 &srcView.blobs()[i][0],

                 dstView.mapping().blobSize(i),

                 threadId,

                 threadCount,

                 singleThreadMemcpy);

     }


     namespace internal

     {

         inline constexpr auto copyBlobWithMemcpy = [](const auto& src, auto& dst, std::size_t size)

         {

             static_assert(std::is_trivially_copyable_v<std::remove_reference_t<decltype(*&src[0])>>);

             static_assert(std::is_trivially_copyable_v<std::remove_reference_t<decltype(*&dst[0])>>);

             std::memcpy(&dst[0], &src[0], size);

         };

     } // namespace internal


     LLAMA_EXPORT

     template<

         typename Mapping,

         typename SrcBlob,

         typename DstBlob,

         typename BlobCopyFunc = decltype(internal::copyBlobWithMemcpy)>

     void copyBlobs(

         const View<Mapping, SrcBlob>& srcView,

         View<Mapping, DstBlob>& dstView,

         BlobCopyFunc copyBlob = internal::copyBlobWithMemcpy)

     {

         // TODO(bgruber): we do not verify if the mappings have other runtime state than the array dimensions

         if(srcView.extents() != dstView.extents())

             throw std::runtime_error{"Array dimensions sizes are different"};

         for(std::size_t i = 0; i < Mapping::blobCount; i++)

             copyBlob(srcView.blobs()[i], dstView.blobs()[i], dstView.mapping().blobSize(i));

     }


     LLAMA_EXPORT

     template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>

     void fieldWiseCopy(

         const View<SrcMapping, SrcBlob>& srcView,

         View<DstMapping, DstBlob>& dstView,

         std::size_t threadId = 0,

         std::size_t threadCount = 1)

     {

         // TODO(bgruber): think if we can remove this restriction

         static_assert(

             std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,

             "The source and destination record dimensions must be the same");


         if(srcView.extents() != dstView.extents())

             throw std::runtime_error{"Array dimensions sizes are different"};


         auto copyOne = [&](auto ai) LLAMA_LAMBDA_INLINE

         {

             forEachLeafCoord<typename DstMapping::RecordDim>([&](auto rc) LLAMA_LAMBDA_INLINE

                                                              { dstView(ai)(rc) = srcView(ai)(rc); });

         };


         constexpr auto dims = SrcMapping::ArrayExtents::rank;

         const auto extents = srcView.extents().toArray();

         const auto workPerThread = (extents[0] + threadCount - 1) / threadCount;

         const auto start = threadId * workPerThread;

         const auto end = std::min((threadId + 1) * workPerThread, static_cast<std::size_t>(extents[0]));

         for(auto i = start; i < end; i++)

         {

             using SrcSizeType = typename SrcMapping::ArrayExtents::value_type;

             if constexpr(dims > 1)

                 forEachArrayIndex(extents, copyOne, static_cast<SrcSizeType>(i));

             else

                 copyOne(ArrayIndex<SrcSizeType, dims>{static_cast<std::size_t>(i)});

         }

     }


     namespace internal

     {

         template<typename Mapping>

         inline constexpr std::size_t aosoaLanes = 1;


         template<

             typename ArrayExtents,

             typename RecordDim,

             mapping::Blobs Blobs,

             mapping::SubArrayAlignment SubArrayAlignment,

             typename LinearizeArrayIndexFunctor,

             template<typename>

             typename PermuteSBFields>

         inline constexpr std::size_t aosoaLanes<

             mapping::

                 SoA<ArrayExtents, RecordDim, Blobs, SubArrayAlignment, LinearizeArrayIndexFunctor, PermuteSBFields>>

             = std::numeric_limits<std::size_t>::max();


         template<

             typename ArrayExtents,

             typename RecordDim,

             typename ArrayExtents::value_type Lanes,

             mapping::FieldAlignment FA,

             typename LinearizeArrayIndexFunctor,

             template<typename>

             typename PermuteFields>

         inline constexpr std::size_t

             aosoaLanes<mapping::AoSoA<ArrayExtents, RecordDim, Lanes, FA, LinearizeArrayIndexFunctor, PermuteFields>>

             = Lanes;

     } // namespace internal


     LLAMA_EXPORT

     template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>

     void aosoaCommonBlockCopy(

         const View<SrcMapping, SrcBlob>& srcView,

         View<DstMapping, DstBlob>& dstView,

         std::size_t threadId = 0,

         std::size_t threadCount = 1)

     {

         static_assert(

             mapping::isAoSoA<SrcMapping> || mapping::isSoA<SrcMapping>,

             "Only AoSoA and SoA mappings allowed as source");

         static_assert(

             mapping::isAoSoA<DstMapping> || mapping::isSoA<DstMapping>,

             "Only AoSoA and SoA mappings allowed as destination");


         // TODO(bgruber): think if we can remove this restriction

         static_assert(

             std::is_same_v<typename SrcMapping::RecordDim, typename DstMapping::RecordDim>,

             "The source and destination record dimensions must be the same");

         static_assert(

             std::is_same_v<

                 typename SrcMapping::LinearizeArrayIndexFunctor,

                 typename DstMapping::LinearizeArrayIndexFunctor>,

             "Source and destination mapping need to use the same array dimensions linearizer");

         using RecordDim = typename SrcMapping::RecordDim;

         internal::assertTrivialCopyable<RecordDim>();


         static constexpr auto lanesSrc = internal::aosoaLanes<SrcMapping>;

         static constexpr auto lanesDst = internal::aosoaLanes<DstMapping>;


         if(srcView.extents() != dstView.extents())

             throw std::runtime_error{"Array dimensions sizes are different"};


         static constexpr auto srcIsAoSoA = lanesSrc != std::numeric_limits<std::size_t>::max();

         static constexpr auto dstIsAoSoA = lanesDst != std::numeric_limits<std::size_t>::max();


         static_assert(srcIsAoSoA || dstIsAoSoA, "At least one of the mappings must be an AoSoA mapping");

         static_assert(!srcIsAoSoA || SrcMapping::blobCount == 1, "Implementation assumes AoSoA with single blob");

         static_assert(!dstIsAoSoA || DstMapping::blobCount == 1, "Implementation assumes AoSoA with single blob");


         const auto flatSize = product(dstView.extents());


         // TODO(bgruber): implement the following by adding additional copy loops for the remaining elements

         if(!srcIsAoSoA && flatSize % lanesDst != 0)

             throw std::runtime_error{"Source SoA mapping's total array elements must be evenly divisible by the "

                                      "destination AoSoA Lane count."};

         if(!dstIsAoSoA && flatSize % lanesSrc != 0)

             throw std::runtime_error{"Destination SoA mapping's total array elements must be evenly divisible by the "

                                      "source AoSoA Lane count."};


         auto mapSrc = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE

         {

             const auto [blob, off] = srcView.mapping().blobNrAndOffset(flatArrayIndex, rc);

             return &srcView.blobs()[blob][off];

         };

         auto mapDst = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE

         {

             const auto [blob, off] = dstView.mapping().blobNrAndOffset(flatArrayIndex, rc);

             return &dstView.blobs()[blob][off];

         };


         static constexpr auto l = []

         {

             if constexpr(srcIsAoSoA && dstIsAoSoA)

                 return std::gcd(lanesSrc, lanesDst);

             return std::min(lanesSrc, lanesDst);

         }();

         if constexpr(lanesSrc < lanesDst)

         {

             static_assert(srcIsAoSoA);


             // optimized for linear reading

             const auto elementsPerThread = flatSize / lanesSrc / threadCount * lanesSrc;

             const auto start = threadId * elementsPerThread;

             const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread;


             static constexpr auto packed = SrcMapping::fieldAlignment == mapping::FieldAlignment::Pack;

             decltype(mapSrc(start, RecordCoord<>{})) src;

             if constexpr(packed)

                 src = mapSrc(start, RecordCoord<>{});

             for(std::size_t i = start; i < stop; i += lanesSrc)

                 forEachLeafCoord<RecordDim>(

                     [&](auto rc) LLAMA_LAMBDA_INLINE

                     {

                         if constexpr(!packed)

                             src = mapSrc(i, rc);

                         for(std::size_t j = 0; j < lanesSrc; j += l)

                         {

                             assert(src == mapSrc(i + j, rc));

                             static constexpr auto bytes = l * sizeof(GetType<RecordDim, decltype(rc)>);

                             std::memcpy(mapDst(i + j, rc), src, bytes);

                             src += bytes;

                         }

                     });

         }

         else

         {

             static_assert(dstIsAoSoA);


             // optimized for linear writing

             const auto elementsPerThread = flatSize / lanesDst / threadCount * lanesDst;

             const auto start = threadId * elementsPerThread;

             const auto stop = threadId == threadCount - 1 ? flatSize : (threadId + 1) * elementsPerThread;


             static constexpr auto packed = DstMapping::fieldAlignment == mapping::FieldAlignment::Pack;

             decltype(mapDst(start, RecordCoord<>{})) dst;

             if constexpr(packed)

                 dst = mapDst(start, RecordCoord<>{});

             for(std::size_t i = start; i < stop; i += lanesDst)

                 forEachLeafCoord<RecordDim>(

                     [&](auto rc) LLAMA_LAMBDA_INLINE

                     {

                         if constexpr(!packed)

                             dst = mapDst(i, rc);

                         for(std::size_t j = 0; j < lanesDst; j += l)

                         {

                             assert(dst == mapDst(i + j, rc));

                             constexpr auto bytes = l * sizeof(GetType<RecordDim, decltype(rc)>);

                             std::memcpy(dst, mapSrc(i + j, rc), bytes);

                             dst += bytes;

                         }

                     });

         }

     }


     LLAMA_EXPORT

     template<typename SrcMapping, typename DstMapping, typename SFINAE = void>

     struct Copy

     {

         template<typename SrcView, typename DstView>

         void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const

         {

             fieldWiseCopy(srcView, dstView, threadId, threadCount);

         }

     };


     LLAMA_EXPORT

     template<typename Mapping>

     struct Copy<Mapping, Mapping>

     {

         template<typename SrcView, typename DstView>

         void operator()(const SrcView& srcView, DstView& dstView, std::size_t threadId, std::size_t threadCount) const

         {

             // FIXME(bgruber): need to fallback to fieldWiseCopy when elements are not trivially copyable

             memcpyBlobs(srcView, dstView, threadId, threadCount);

         }

     };


     LLAMA_EXPORT

     template<

         typename ArrayExtents,

         typename RecordDim,

         typename LinearizeArrayIndex,

         typename ArrayExtents::value_type LanesSrc,

         typename ArrayExtents::value_type LanesDst,

         mapping::FieldAlignment AlignSrc,

         mapping::FieldAlignment AlignDst,

         template<typename>

         typename PermuteFields>

     struct Copy<

         mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,

         mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,

         std::enable_if_t<LanesSrc != LanesDst>>

     {

         template<typename SrcBlob, typename DstBlob>

         void operator()(

             const View<

                 mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,

                 SrcBlob>& srcView,

             View<

                 mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,

                 DstBlob>& dstView,

             std::size_t threadId,

             std::size_t threadCount)

         {

             aosoaCommonBlockCopy(srcView, dstView, threadId, threadCount);

         }

     };


     LLAMA_EXPORT

     template<

         typename ArrayExtents,

         typename RecordDim,

         typename LinearizeArrayIndex,

         template<typename>

         typename PermuteFields,

         typename ArrayExtents::value_type LanesSrc,

         mapping::FieldAlignment AlignSrc,

         mapping::Blobs DstBlobs,

         mapping::SubArrayAlignment DstSubArrayAlignment>

     struct Copy<

         mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,

         mapping::SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>>

     {

         template<typename SrcBlob, typename DstBlob>

         void operator()(

             const View<

                 mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,

                 SrcBlob>& srcView,

             View<

                 mapping::

                     SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

                 DstBlob>& dstView,

             std::size_t threadId,

             std::size_t threadCount)

         {

             aosoaCommonBlockCopy(srcView, dstView, threadId, threadCount);

         }

     };


     LLAMA_EXPORT

     template<

         typename ArrayExtents,

         typename RecordDim,

         typename LinearizeArrayIndex,

         template<typename>

         typename PermuteFields,

         typename ArrayExtents::value_type LanesDst,

         mapping::FieldAlignment AlignDst,

         mapping::Blobs SrcBlobs,

         mapping::SubArrayAlignment SrcSubArrayAlignment>

     struct Copy<

         mapping::SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

         mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>>

     {

         template<typename SrcBlob, typename DstBlob>

         void operator()(

             const View<

                 mapping::

                     SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

                 SrcBlob>& srcView,

             View<

                 mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,

                 DstBlob>& dstView,

             std::size_t threadId,

             std::size_t threadCount)

         {

             aosoaCommonBlockCopy(srcView, dstView, threadId, threadCount);

         }

     };


     LLAMA_EXPORT

     template<

         typename ArrayExtents,

         typename RecordDim,

         mapping::Blobs SrcBlobs,

         mapping::Blobs DstBlobs,

         mapping::SubArrayAlignment SrcSubArrayAlignment,

         mapping::SubArrayAlignment DstSubArrayAlignment,

         typename LinearizeArrayIndex,

         template<typename>

         typename PermuteFields>

     struct Copy<

         mapping::SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

         mapping::SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

         std::enable_if_t<SrcBlobs != DstBlobs || SrcSubArrayAlignment != DstSubArrayAlignment>>

     {

         template<typename SrcBlob, typename DstBlob>

         void operator()(

             const View<

                 mapping::

                     SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

                 SrcBlob>& srcView,

             View<

                 mapping::

                     SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,

                 DstBlob>& dstView,

             std::size_t threadId,

             std::size_t threadCount)

         {

             if(srcView.extents() != dstView.extents())

                 throw std::runtime_error{"Array dimensions sizes are different"};


             const auto subArrayLength = product(srcView.extents());

             forEachLeafCoord<RecordDim>(

                 [&](auto rc) LLAMA_LAMBDA_INLINE

                 {

                     auto subArrayStart = [&](auto& view, auto rc) LLAMA_LAMBDA_INLINE

                     {

                         const auto [blob, off] = view.mapping().blobNrAndOffset(0, rc);

                         return &view.blobs()[blob][off];

                     };

                     internal::parallelMemcpy(

                         subArrayStart(dstView, rc),

                         subArrayStart(srcView, rc),

                         subArrayLength * sizeof(GetType<RecordDim, decltype(rc)>),

                         threadId,

                         threadCount);

                 });

         }

     };


     LLAMA_EXPORT

     template<typename SrcMapping, typename SrcBlob, typename DstMapping, typename DstBlob>

     void copy(

         const View<SrcMapping, SrcBlob>& srcView,

         View<DstMapping, DstBlob>& dstView,

         std::size_t threadId = 0,

         std::size_t threadCount = 1)

     {

         Copy<SrcMapping, DstMapping>{}(srcView, dstView, threadId, threadCount);

     }

 } // namespace llama

AoSoA.hpp

SoA.hpp

View.hpp

LLAMA_EXPORT
#define LLAMA_EXPORT
Definition: macros.hpp:192

LLAMA_LAMBDA_INLINE
#define LLAMA_LAMBDA_INLINE
Gives strong indication to the compiler to inline the attributed lambda.
Definition: macros.hpp:113

llama::internal::copyBlobWithMemcpy
constexpr auto copyBlobWithMemcpy
Definition: Copy.hpp:83

llama::internal::parallelMemcpy
void parallelMemcpy(std::byte *dst, const std::byte *src, std::size_t size, std::size_t threadId=0, std::size_t threadCount=1, MemcpyFunc singleThreadMemcpy=memcpy)
Definition: Copy.hpp:35

llama::internal::aosoaLanes
constexpr std::size_t aosoaLanes
Definition: Copy.hpp:156

llama::internal::assertTrivialCopyable
void assertTrivialCopyable()
Definition: Copy.hpp:18

llama::internal::memcpy
constexpr auto memcpy
Definition: Copy.hpp:32

llama::mapping::SoA
SoA(TArrayExtents, TRecordDim) -> SoA< TArrayExtents, TRecordDim >

llama::mapping::Blobs
Blobs
Definition: SoA.hpp:14

llama::mapping::SubArrayAlignment
SubArrayAlignment
Definition: SoA.hpp:21

llama::mapping::FieldAlignment
FieldAlignment
Definition: Common.hpp:272

llama::mapping::FieldAlignment::Pack
@ Pack

llama
Definition: Accessors.hpp:15

llama::forEachArrayIndex
void forEachArrayIndex([[maybe_unused]] const ArrayIndex< SizeType, Dim > &extents, Func &&func, OuterIndices... outerIndices)
Definition: ArrayExtents.hpp:250

llama::copyBlobs
void copyBlobs(const View< Mapping, SrcBlob > &srcView, View< Mapping, DstBlob > &dstView, BlobCopyFunc copyBlob=internal::copyBlobWithMemcpy)
Definition: Copy.hpp:101

llama::ArrayExtents
ArrayExtents(Args...) -> ArrayExtents< typename internal::IndexTypeFromArgs< std::size_t, Args... >::type,(Args{}, dyn)... >

llama::memcpyBlobs
void memcpyBlobs(const View< Mapping, SrcBlob > &srcView, View< Mapping, DstBlob > &dstView, std::size_t threadId=0, std::size_t threadCount=1, MemcpyFunc singleThreadMemcpy=internal::memcpy)
Definition: Copy.hpp:57

llama::aosoaCommonBlockCopy
void aosoaCommonBlockCopy(const View< SrcMapping, SrcBlob > &srcView, View< DstMapping, DstBlob > &dstView, std::size_t threadId=0, std::size_t threadCount=1)
Definition: Copy.hpp:190

llama::copy
void copy(const View< SrcMapping, SrcBlob > &srcView, View< DstMapping, DstBlob > &dstView, std::size_t threadId=0, std::size_t threadCount=1)
Definition: Copy.hpp:491

llama::product
constexpr auto product(Array< T, N > a) -> T
Definition: Array.hpp:315

llama::GetType
typename internal::GetTypeImpl< RecordDim, RecordCoordOrTags... >::type GetType
Definition: Core.hpp:388

llama::fieldWiseCopy
void fieldWiseCopy(const View< SrcMapping, SrcBlob > &srcView, View< DstMapping, DstBlob > &dstView, std::size_t threadId=0, std::size_t threadCount=1)
Definition: Copy.hpp:118

llama::ArrayExtents::value_type
T value_type
Definition: ArrayExtents.hpp:141

llama::ArrayIndex
Definition: ArrayExtents.hpp:20

llama::Copy< Mapping, Mapping >::operator()
void operator()(const SrcView &srcView, DstView &dstView, std::size_t threadId, std::size_t threadCount) const
Definition: Copy.hpp:333

llama::Copy< mapping::AoSoA< ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields >, mapping::AoSoA< ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields >, std::enable_if_t< LanesSrc !=LanesDst > >::operator()
void operator()(const View< mapping::AoSoA< ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields >, SrcBlob > &srcView, View< mapping::AoSoA< ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields >, DstBlob > &dstView, std::size_t threadId, std::size_t threadCount)
Definition: Copy.hpp:357

llama::Copy< mapping::AoSoA< ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields >, mapping::SoA< ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields > >::operator()
void operator()(const View< mapping::AoSoA< ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields >, SrcBlob > &srcView, View< mapping::SoA< ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, DstBlob > &dstView, std::size_t threadId, std::size_t threadCount)
Definition: Copy.hpp:387

llama::Copy< mapping::SoA< ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, mapping::AoSoA< ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields > >::operator()
void operator()(const View< mapping::SoA< ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, SrcBlob > &srcView, View< mapping::AoSoA< ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields >, DstBlob > &dstView, std::size_t threadId, std::size_t threadCount)
Definition: Copy.hpp:418

llama::Copy< mapping::SoA< ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, mapping::SoA< ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, std::enable_if_t< SrcBlobs !=DstBlobs||SrcSubArrayAlignment !=DstSubArrayAlignment > >::operator()
void operator()(const View< mapping::SoA< ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, SrcBlob > &srcView, View< mapping::SoA< ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields >, DstBlob > &dstView, std::size_t threadId, std::size_t threadCount)
Definition: Copy.hpp:450

llama::Copy
Generic implementation of copy defaulting to fieldWiseCopy. LLAMA provides several specializations of...
Definition: Copy.hpp:320

llama::Copy::operator()
void operator()(const SrcView &srcView, DstView &dstView, std::size_t threadId, std::size_t threadCount) const
Definition: Copy.hpp:322

llama::RecordCoord<>
Definition: RecordCoord.hpp:32

llama::View
Definition: View.hpp:394

llama::View::mapping
auto mapping() -> Mapping &
Definition: View.hpp:436

llama::View::blobs
auto blobs() -> Array< BlobType, Mapping::blobCount > &
Definition: View.hpp:565

llama::View::extents
auto extents() const -> ArrayExtents
Definition: View.hpp:456

llama::mapping::AoSoA
Definition: AoSoA.hpp:45

llama::mapping::SoA
Definition: SoA.hpp:47