Low-Level Abstraction of Memory Access
SoA.hpp
Go to the documentation of this file.
1 // Copyright 2022 Alexander Matthes, Bernhard Manfred Gruber
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #pragma once
5 
6 #include "Common.hpp"
7 
8 #include <limits>
9 
10 namespace llama::mapping
11 {
13  enum class Blobs
14  {
15  Single,
17  };
18 
20  enum class SubArrayAlignment
21  {
22  Pack,
23  Align
24  };
25 
38  template<
39  typename TArrayExtents,
40  typename TRecordDim,
41  Blobs TBlobs = Blobs::OnePerField,
42  SubArrayAlignment TSubArrayAlignment
44  typename TLinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
45  template<typename> typename PermuteFieldsSingleBlob = PermuteFieldsInOrder>
46  struct SoA : MappingBase<TArrayExtents, TRecordDim>
47  {
48  private:
50  using size_type = typename TArrayExtents::value_type;
51 
52  public:
53  inline static constexpr Blobs blobs = TBlobs;
54  inline static constexpr SubArrayAlignment subArrayAlignment = TSubArrayAlignment;
55  using LinearizeArrayIndexFunctor = TLinearizeArrayIndexFunctor;
56  using Permuter = PermuteFieldsSingleBlob<FlatRecordDim<TRecordDim>>;
57  inline static constexpr std::size_t blobCount
58  = blobs == Blobs::OnePerField ? mp_size<FlatRecordDim<TRecordDim>>::value : 1;
59 
60 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ >= 12
61  using Base::Base;
62 #else
63  constexpr SoA() = default;
64 
65  LLAMA_FN_HOST_ACC_INLINE constexpr explicit SoA(TArrayExtents extents, TRecordDim = {}) : Base(extents)
66  {
67  }
68 #endif
69 
71  constexpr auto blobSize([[maybe_unused]] size_type blobIndex) const -> size_type
72  {
73  const auto flatSize = LinearizeArrayIndexFunctor{}.size(Base::extents());
74  if constexpr(blobs == Blobs::OnePerField)
75  {
76  constexpr auto typeSizes = []() constexpr
77  {
79  forEachLeafCoord<TRecordDim>([&r, i = 0](auto rc) mutable constexpr
80  { r[i++] = sizeof(GetType<TRecordDim, decltype(rc)>); });
81  return r;
82  }();
83  return flatSize * typeSizes[blobIndex];
84  }
85  else if constexpr(subArrayAlignment == SubArrayAlignment::Align)
86  {
87  size_type size = 0;
88  using FRD = typename Permuter::FlatRecordDim;
89  mp_for_each_inline<mp_transform<mp_identity, FRD>>(
90  [&](auto ti) LLAMA_LAMBDA_INLINE
91  {
92  using FieldType = typename decltype(ti)::type;
93  size = roundUpToMultiple(size, static_cast<size_type>(alignof(FieldType)));
94  size += static_cast<size_type>(sizeof(FieldType)) * flatSize;
95  });
96  return size;
97  }
98  else
99  {
100  return flatSize * static_cast<size_type>(sizeOf<TRecordDim>);
101  }
102  }
103 
104  private:
105  static LLAMA_CONSTEVAL auto computeSubArrayOffsets()
106  {
107  using FRD = typename Permuter::FlatRecordDim;
108  constexpr auto staticFlatSize = LinearizeArrayIndexFunctor{}.size(TArrayExtents{});
109  constexpr auto subArrays = mp_size<FRD>::value;
110  Array<size_type, subArrays> r{};
111  // r[0] == 0, only compute the following offsets
112  mp_for_each_inline<mp_iota_c<subArrays - 1>>(
113  [&](auto ic)
114  {
115  constexpr auto i = decltype(ic)::value;
116  r[i + 1] = r[i];
117  using ThisFieldType = mp_at_c<FRD, i>;
118  r[i + 1] += static_cast<size_type>(sizeof(ThisFieldType)) * staticFlatSize;
119  using NextFieldType = mp_at_c<FRD, i + 1>;
120  r[i + 1] = roundUpToMultiple(r[i + 1], static_cast<size_type>(alignof(NextFieldType)));
121  });
122  return r;
123  }
124 
125  public:
126  template<std::size_t... RecordCoords>
128  typename Base::ArrayIndex ai,
130  {
132  }
133 
134  // Exposed for aosoaCommonBlockCopy. Should be private ...
135  template<std::size_t... RecordCoords>
137  size_type flatArrayIndex,
139  {
140  const size_type elementOffset
141  = flatArrayIndex * static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>));
142  if constexpr(blobs == Blobs::OnePerField)
143  {
144  constexpr auto blob = flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>;
145  return {blob, elementOffset};
146  }
147  else
148  {
149  constexpr std::size_t flatFieldIndex =
150 #if defined(__NVCC__) && __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ <= 6
151  *& // mess with nvcc compiler state to workaround bug
152 #endif
153  Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
154  const size_type flatSize = LinearizeArrayIndexFunctor{}.size(Base::extents());
155  using FRD = typename Permuter::FlatRecordDim;
157  {
158  if constexpr(TArrayExtents::rankStatic == TArrayExtents::rank)
159  {
160  // full array extents are known statically, we can precompute the sub array offsets
161  constexpr auto subArrayOffsets = computeSubArrayOffsets();
162  return {0, subArrayOffsets[flatFieldIndex] + elementOffset};
163  }
164  else
165  {
166  // TODO(bgruber): we can take a shortcut here if we know that flatSize is a multiple of all
167  // type's alignment. We can also precompute a table of sub array starts (and maybe store it),
168  // or rely on the compiler it out of loops.
169  size_type offset = 0;
170  mp_for_each_inline<mp_iota_c<flatFieldIndex>>(
171  [&](auto ic) LLAMA_LAMBDA_INLINE
172  {
173  constexpr auto i = decltype(ic)::value;
174  using ThisFieldType = mp_at_c<FRD, i>;
175  offset += static_cast<size_type>(sizeof(ThisFieldType)) * flatSize;
176  using NextFieldType = mp_at_c<FRD, i + 1>;
177  offset = roundUpToMultiple(offset, static_cast<size_type>(alignof(NextFieldType)));
178  });
179  offset += elementOffset;
180  return {0, offset};
181  }
182  }
183  else
184  {
185  const auto offset
186  = elementOffset + static_cast<size_type>(flatOffsetOf<FRD, flatFieldIndex, false>) * flatSize;
187  return {0, offset};
188  }
189  }
190  }
191  };
192 
193  // we can drop this when inherited ctors also inherit deduction guides
195  template<typename TArrayExtents, typename TRecordDim>
196  SoA(TArrayExtents, TRecordDim) -> SoA<TArrayExtents, TRecordDim>;
197 
201  template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight>
204 
208  template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight>
211 
215  template<typename ArrayExtents, typename RecordDim, typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight>
218 
222  template<
225  typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight>
226  struct BindSoA
227  {
228  template<typename ArrayExtents, typename RecordDim>
230  };
231 
233  template<typename Mapping>
234  inline constexpr bool isSoA = false;
235 
237  template<
238  typename ArrayExtents,
239  typename RecordDim,
240  Blobs Blobs,
242  typename LinearizeArrayIndexFunctor>
243  inline constexpr bool isSoA<SoA<ArrayExtents, RecordDim, Blobs, SubArrayAlignment, LinearizeArrayIndexFunctor>>
244  = true;
245 } // namespace llama::mapping
#define LLAMA_EXPORT
Definition: macros.hpp:192
#define LLAMA_LAMBDA_INLINE
Gives strong indication to the compiler to inline the attributed lambda.
Definition: macros.hpp:113
#define LLAMA_CONSTEVAL
Expands to consteval if the compilers supports the keyword, otherwise to constexpr.
Definition: macros.hpp:186
#define LLAMA_FN_HOST_ACC_INLINE
Definition: macros.hpp:96
constexpr bool isSoA
Definition: SoA.hpp:234
SoA(TArrayExtents, TRecordDim) -> SoA< TArrayExtents, TRecordDim >
SubArrayAlignment
Definition: SoA.hpp:21
typename internal::FlattenRecordDimImpl< RecordDim >::type FlatRecordDim
Returns a flat type list containing all leaf field types of the given record dimension.
Definition: Core.hpp:481
ArrayExtents(Args...) -> ArrayExtents< typename internal::IndexTypeFromArgs< std::size_t, Args... >::type,(Args{}, dyn)... >
constexpr std::size_t flatRecordCoord
Definition: Core.hpp:517
constexpr void mp_for_each_inline(F &&f)
Like boost::mp11::mp_for_each, but marked with LLAMA_FN_HOST_ACC_INLINE.
Definition: Meta.hpp:59
constexpr auto roundUpToMultiple(Integral n, Integral mult) -> Integral
Returns the integral n rounded up to be a multiple of mult.
Definition: Core.hpp:578
typename internal::GetTypeImpl< RecordDim, RecordCoordOrTags... >::type GetType
Definition: Core.hpp:388
typename ArrayExtents::value_type size_type
Definition: Common.hpp:25
typename ArrayExtents::Index ArrayIndex
Definition: Common.hpp:24
constexpr auto extents() const -> ArrayExtents
Definition: Common.hpp:35
constexpr SoA(TArrayExtents extents, TRecordDim={})
Definition: SoA.hpp:65
TLinearizeArrayIndexFunctor LinearizeArrayIndexFunctor
Definition: SoA.hpp:55
static constexpr SubArrayAlignment subArrayAlignment
Definition: SoA.hpp:54
static constexpr std::size_t blobCount
Definition: SoA.hpp:58
constexpr SoA()=default
constexpr auto blobSize([[maybe_unused]] size_type blobIndex) const -> size_type
Definition: SoA.hpp:71
constexpr auto blobNrAndOffset(typename Base::ArrayIndex ai, RecordCoord< RecordCoords... > rc={}) const -> NrAndOffset< size_type >
Definition: SoA.hpp:127
static constexpr Blobs blobs
Definition: SoA.hpp:53
constexpr auto blobNrAndOffset(size_type flatArrayIndex, RecordCoord< RecordCoords... >={}) const -> NrAndOffset< size_type >
Definition: SoA.hpp:136
PermuteFieldsSingleBlob< FlatRecordDim< TRecordDim > > Permuter
Definition: SoA.hpp:56