Low-Level Abstraction of Memory Access
Common.hpp
Go to the documentation of this file.
1 // Copyright 2022 Alexander Matthes, Bernhard Manfred Gruber
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #pragma once
5 
6 #include "../Core.hpp"
7 
8 #include <atomic>
9 #include <climits>
10 #ifndef __cpp_lib_atomic_ref
11 # include <boost/atomic/atomic_ref.hpp>
12 #endif
13 
14 namespace llama::mapping
15 {
17  template<typename TArrayExtents, typename TRecordDim>
18  struct MappingBase : protected TArrayExtents
19  {
20  using ArrayExtents = TArrayExtents;
21  using RecordDim = TRecordDim;
22 
23  protected:
24  using ArrayIndex = typename ArrayExtents::Index;
26 
27  public:
28  constexpr MappingBase() = default;
29 
32  {
33  }
34 
35  LLAMA_FN_HOST_ACC_INLINE constexpr auto extents() const -> ArrayExtents
36  {
37  return static_cast<const ArrayExtents&>(*this);
38  }
39  };
40 
46  {
47  template<typename ArrayExtents>
48  LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> typename ArrayExtents::value_type
49  {
50  return product(extents);
51  }
52 
56  template<typename ArrayExtents>
58  const typename ArrayExtents::Index& ai,
59  const ArrayExtents& extents) const -> typename ArrayExtents::value_type
60  {
61  if constexpr(ArrayExtents::rank == 0)
62  return 0;
63  else
64  {
65  auto address = ai[0];
66  for(int i = 1; i < static_cast<int>(ArrayExtents::rank); i++)
67  {
68  address *= extents[i];
69  address += ai[i];
70  }
71  return address;
72  }
73  }
74  };
75 
78 
84  {
85  template<typename ArrayExtents>
86  LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) -> typename ArrayExtents::value_type
87  {
88  return product(extents);
89  }
90 
94  template<typename ArrayExtents>
96  const typename ArrayExtents::Index& ai,
97  const ArrayExtents& extents) const -> typename ArrayExtents::value_type
98  {
99  if constexpr(ArrayExtents::rank == 0)
100  return 0;
101  else
102  {
103  auto address = ai[ArrayExtents::rank - 1];
104  for(int i = static_cast<int>(ArrayExtents::rank) - 2; i >= 0; i--)
105  {
106  address *= extents[i];
107  address += ai[i];
108  }
109  return address;
110  }
111  }
112  };
113 
116 
120  {
121  template<typename ArrayExtents>
122  LLAMA_FN_HOST_ACC_INLINE constexpr auto size(const ArrayExtents& extents) const ->
123  typename ArrayExtents::value_type
124  {
125  if constexpr(ArrayExtents::rank == 0)
126  return 0;
127  else
128  {
129  auto longest = extents[0];
130  for(int i = 1; i < static_cast<int>(ArrayExtents::rank); i++)
131  longest = std::max(longest, extents[i]);
132  const auto longestPO2 = bitCeil(longest);
133  return intPow(longestPO2, static_cast<typename ArrayExtents::value_type>(ArrayExtents::rank));
134  }
135  }
136 
140  template<typename ArrayExtents>
142  const typename ArrayExtents::Index& ai,
143  [[maybe_unused]] const ArrayExtents& extents) const -> typename ArrayExtents::value_type
144  {
145  using size_type = typename ArrayExtents::value_type;
146  constexpr auto rank = static_cast<size_type>(ArrayExtents::rank);
147  size_type r = 0;
148  for(size_type bit = 0; bit < (static_cast<size_type>(sizeof(size_type)) * CHAR_BIT) / rank; bit++)
149  for(size_type i = 0; i < rank; i++)
150  r |= (ai[i] & (size_type{1} << bit)) << ((bit + 1) * (rank - 1) - i);
151  return r;
152  }
153 
154  private:
155  template<typename T>
156  LLAMA_FN_HOST_ACC_INLINE static constexpr auto bitCeil(T n) -> T
157  {
158  T r = 1u;
159  while(r < n)
160  r <<= 1u;
161  return r;
162  }
163 
164  template<typename T>
165  LLAMA_FN_HOST_ACC_INLINE static constexpr auto intPow(T b, T e) -> T
166  {
167  e--;
168  auto r = b;
169  while(e != 0u)
170  {
171  r *= b;
172  e--;
173  }
174  return r;
175  }
176  };
177 
180  template<typename TFlatRecordDim>
182  {
183  using FlatRecordDim = TFlatRecordDim;
184 
185  template<std::size_t FlatRecordCoord>
186  static constexpr std::size_t permute = FlatRecordCoord;
187  };
188 
193  template<typename FlatOrigRecordDim, template<typename, typename> typename Less>
195  {
196  private:
197  using FlatSortedRecordDim = mp_sort<FlatOrigRecordDim, Less>;
198 
199  template<typename A, typename B>
200  using LessWithIndices = Less<mp_at<FlatOrigRecordDim, A>, mp_at<FlatOrigRecordDim, B>>;
201 
202  // A permutation from new FlatSortedRecordDim index to old FlatOrigRecordDim index
203  using PermutedIndices = mp_sort<mp_iota<mp_size<FlatOrigRecordDim>>, LessWithIndices>;
204 
205  template<typename A, typename B>
206  using LessInvertPermutation
207  = std::bool_constant<(mp_at<PermutedIndices, A>::value < mp_at<PermutedIndices, B>::value)>;
208 
209  // A permutation from old FlatOrigRecordDim index to new FlatSortedRecordDim index
210  using InversePermutedIndices = mp_sort<mp_iota<mp_size<FlatOrigRecordDim>>, LessInvertPermutation>;
211 
212  public:
213  using FlatRecordDim = FlatSortedRecordDim;
214 
215  template<std::size_t FlatRecordCoord>
216  static constexpr std::size_t permute = mp_at_c<InversePermutedIndices, FlatRecordCoord>::value;
217  };
218 
219  namespace internal
220  {
221  template<typename A, typename B>
222  using LessAlignment = std::bool_constant<alignof(A) < alignof(B)>;
223 
224  template<typename A, typename B>
225  using MoreAlignment = std::bool_constant<(alignof(A) > alignof(B))>;
226  } // namespace internal
227 
230  template<typename FlatRecordDim>
232 
235  template<typename FlatRecordDim>
237 
240  template<typename FlatRecordDim>
242 
243  namespace internal
244  {
245  template<auto I>
246  struct S;
247 
248  template<typename CountType>
250  {
251 #ifdef __CUDA_ARCH__
252  // if you get an error here that there is no overload of atomicAdd, your CMAKE_CUDA_ARCHITECTURE might be
253  // too low or you need to use a smaller CountType for the FieldAccessCount or Heatmap mapping.
254  if constexpr(mp_contains<mp_list<int, unsigned int, unsigned long long int>, CountType>::value)
255  atomicAdd(&i, CountType{1});
256  else if constexpr(sizeof(CountType) == sizeof(unsigned int))
257  atomicAdd(reinterpret_cast<unsigned int*>(&i), 1u);
258  else if constexpr(sizeof(CountType) == sizeof(unsigned long long int))
259  atomicAdd(reinterpret_cast<unsigned long long int*>(&i), 1ull);
260  else
261  static_assert(sizeof(CountType) == 0, "There is no CUDA atomicAdd for your CountType");
262 #elif defined(__cpp_lib_atomic_ref)
263  ++std::atomic_ref<CountType>{i};
264 #else
265  ++boost::atomic_ref<CountType>{i};
266 #endif
267  }
268  } // namespace internal
269 
271  enum class FieldAlignment
272  {
273  Pack,
274  Align
275  };
276 } // namespace llama::mapping
#define LLAMA_EXPORT
Definition: macros.hpp:192
#define LLAMA_FN_HOST_ACC_INLINE
Definition: macros.hpp:96
std::bool_constant< alignof(A)< alignof(B)> LessAlignment
Definition: Common.hpp:222
std::bool_constant<(alignof(A) > alignof(B))> MoreAlignment
Definition: Common.hpp:225
void atomicInc(CountType &i)
Definition: Common.hpp:249
constexpr auto product(Array< T, N > a) -> T
Definition: Array.hpp:315
static constexpr std::size_t rank
ArrayIndex< T, rank > Index
constexpr auto size(const ArrayExtents &extents) -> typename ArrayExtents::value_type
Definition: Common.hpp:86
constexpr auto operator()(const typename ArrayExtents::Index &ai, const ArrayExtents &extents) const -> typename ArrayExtents::value_type
Definition: Common.hpp:95
Functor that maps an ArrayIndex into linear numbers using the Z-order space filling curve (Morton cod...
Definition: Common.hpp:120
constexpr auto operator()(const typename ArrayExtents::Index &ai, [[maybe_unused]] const ArrayExtents &extents) const -> typename ArrayExtents::value_type
Definition: Common.hpp:141
constexpr auto size(const ArrayExtents &extents) const -> typename ArrayExtents::value_type
Definition: Common.hpp:122
constexpr auto operator()(const typename ArrayExtents::Index &ai, const ArrayExtents &extents) const -> typename ArrayExtents::value_type
Definition: Common.hpp:57
constexpr auto size(const ArrayExtents &extents) -> typename ArrayExtents::value_type
Definition: Common.hpp:48
constexpr MappingBase()=default
typename ArrayExtents::value_type size_type
Definition: Common.hpp:25
typename ArrayExtents::Index ArrayIndex
Definition: Common.hpp:24
TArrayExtents ArrayExtents
Definition: Common.hpp:20
constexpr MappingBase(ArrayExtents extents, RecordDim={})
Definition: Common.hpp:31
constexpr auto extents() const -> ArrayExtents
Definition: Common.hpp:35
Retains the order of the record dimension's fields.
Definition: Common.hpp:182
static constexpr std::size_t permute
Definition: Common.hpp:186
FlatSortedRecordDim FlatRecordDim
Definition: Common.hpp:213
static constexpr std::size_t permute
Definition: Common.hpp:216