Low-Level Abstraction of Memory Access
FieldAccessCount.hpp
Go to the documentation of this file.
1 // Copyright 2022 Bernhard Manfred Gruber
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #pragma once
5 
6 #include "../StructName.hpp"
7 #include "Common.hpp"
8 
9 #include <cstdio>
10 #include <iomanip>
11 #include <iostream>
12 
13 namespace llama::mapping
14 {
16  template<typename CountType>
17  struct AccessCounts
18  {
19  union
20  {
21  CountType memLocsComputed;
22  CountType reads;
23  };
24  CountType writes;
25  };
26 
27  namespace internal
28  {
29  template<typename Value, typename Ref, typename Count>
30  struct FieldAccessCountReference : ProxyRefOpMixin<FieldAccessCountReference<Value, Ref, Count>, Value>
31  {
32  using value_type = Value;
33 
34  template<typename RefFwd>
36  : r(std::forward<RefFwd>(r))
37  , hits(hits)
38  {
39  static_assert(std::is_same_v<std::remove_reference_t<Ref>, std::remove_reference_t<RefFwd>>);
40  }
41 
44  auto operator=(FieldAccessCountReference&& ref) noexcept -> FieldAccessCountReference& = default;
46 
48  {
49  if(&ref != this)
50  {
52  r = static_cast<value_type>(ref);
53  }
54  return *this;
55  }
56 
58  {
60  r = value;
61  return *this;
62  }
63 
64  // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
66  {
68  return static_cast<value_type>(r);
69  }
70 
71  private:
72  Ref r;
73  AccessCounts<Count>* hits;
74  };
75  } // namespace internal
76 
84  template<typename Mapping, typename TCountType = std::size_t, bool MyCodeHandlesProxyReferences = true>
85  struct FieldAccessCount : Mapping
86  {
87  private:
88  using size_type = typename Mapping::ArrayExtents::value_type;
89 
90  public:
91  using RecordDim = typename Mapping::RecordDim;
92  using CountType = TCountType;
93  inline static constexpr bool myCodeHandlesProxyReferences = MyCodeHandlesProxyReferences;
94 
95  struct FieldHitsArray : Array<AccessCounts<CountType>, flatFieldCount<RecordDim>>
96  {
98  {
100  for(const auto& ac : *this)
101  {
102  if constexpr(MyCodeHandlesProxyReferences)
103  {
104  total.reads += ac.reads;
105  total.writes += ac.writes;
106  }
107  else
108  total.memLocsComputed += ac.memLocsComputed;
109  }
110  return total;
111  }
112 
113  struct TotalBytes
114  {
117  };
118 
122  {
123  CountType r = 0;
124  CountType w = 0; // NOLINT(misc-const-correctness)
125  forEachLeafCoord<RecordDim>(
126  [&](auto rc) LLAMA_LAMBDA_INLINE
127  {
128  const size_type i = flatRecordCoord<RecordDim, decltype(rc)>;
129  const auto fieldSize = sizeof(GetType<RecordDim, decltype(rc)>);
130  if constexpr(MyCodeHandlesProxyReferences)
131  {
132  r += (*this)[i].reads * fieldSize;
133  w += (*this)[i].writes * fieldSize;
134  }
135  else
136  r += (*this)[i].memLocsComputed * fieldSize;
137  });
138  if constexpr(MyCodeHandlesProxyReferences)
139  return TotalBytes{r, w};
140  else
141  return r;
142  }
143  };
144 
145  inline static constexpr auto blobCount = Mapping::blobCount + 1;
146 
147  constexpr FieldAccessCount() = default;
148 
150  explicit FieldAccessCount(Mapping mapping) : Mapping(std::move(mapping))
151  {
152  }
153 
154  template<typename... Args>
155  LLAMA_FN_HOST_ACC_INLINE explicit FieldAccessCount(Args&&... innerArgs)
156  : Mapping(std::forward<Args>(innerArgs)...)
157  {
158  }
159 
161  constexpr auto blobSize(size_type blobIndex) const -> size_type
162  {
163  if(blobIndex < size_type{Mapping::blobCount})
164  return inner().blobSize(blobIndex);
165  return sizeof(FieldHitsArray);
166  }
167 
168  template<std::size_t... RecordCoords>
170  {
171  return true;
172  }
173 
174  template<std::size_t... RecordCoords, typename Blobs>
176  typename Mapping::ArrayExtents::Index ai,
178  Blobs& blobs) const -> decltype(auto)
179  {
180  static_assert(
181  !std::is_const_v<Blobs>,
182  "Cannot access (even just reading) data through FieldAccessCount from const blobs/view, since we need "
183  "to write "
184  "the access counts");
185 
186  auto& hits = fieldHits(blobs)[+flatRecordCoord<RecordDim, RecordCoord<RecordCoords...>>];
187  decltype(auto) ref = mapToMemory(inner(), ai, rc, blobs); // T& or proxy reference (value)
188  if constexpr(MyCodeHandlesProxyReferences)
189  {
190  using Value = GetType<RecordDim, decltype(rc)>;
191  using Ref = decltype(ref);
192  return internal::FieldAccessCountReference<Value, Ref, CountType>{std::forward<Ref>(ref), &hits};
193  }
194  else
195  {
196  internal::atomicInc(hits.memLocsComputed);
197  return ref;
198  }
199  }
200 
202  template<typename Blobs>
203  LLAMA_FN_HOST_ACC_INLINE auto fieldHits(const Blobs& blobs) const -> const FieldHitsArray&
204  {
205  return reinterpret_cast<const FieldHitsArray&>(*&blobs[blobCount - 1][0]);
206  }
207 
209  template<typename Blobs>
211  {
212  return reinterpret_cast<FieldHitsArray&>(*&blobs[blobCount - 1][0]);
213  }
214 
215  template<typename Blobs>
217  {
218  printFieldHits(fieldHits(blobs));
219  }
220 
222  {
223 #ifdef __CUDA_ARCH__
224  printFieldHitsDevice(hits);
225 #else
226  printFieldHitsHost(hits);
227 #endif
228  }
229 
230  private:
231  static constexpr auto columnWidth = 10;
232  static constexpr auto sizeColumnWidth = 5;
233 
234  void printFieldHitsHost(const FieldHitsArray& hits) const
235  {
236  if constexpr(MyCodeHandlesProxyReferences)
237  std::cout << std::left << std::setw(columnWidth) << "Field" << ' ' << std::right
238  << std::setw(sizeColumnWidth) << "Size" << std::right << std::setw(columnWidth) << "Reads"
239  << ' ' << std::right << std::setw(columnWidth) << "Writes" << '\n';
240  else
241  std::cout << std::left << std::setw(columnWidth) << "Field" << ' ' << std::right
242  << std::setw(sizeColumnWidth) << "Size" << std::right << std::setw(columnWidth)
243  << "Mlocs cmp" << '\n';
244  forEachLeafCoord<RecordDim>(
245  [&](auto rc)
246  {
247  const size_type i = flatRecordCoord<RecordDim, decltype(rc)>;
248  const auto fieldSize = sizeof(GetType<RecordDim, decltype(rc)>);
249  if constexpr(MyCodeHandlesProxyReferences)
250  std::cout << std::left << std::setw(columnWidth) << prettyRecordCoord<RecordDim>(rc) << ' '
251  << std::right << std::setw(sizeColumnWidth) << fieldSize << std::right
252  << std::setw(columnWidth) << hits[i].reads << ' ' << std::right
253  << std::setw(columnWidth) << hits[i].writes << '\n';
254  else
255  std::cout << std::left << std::setw(columnWidth) << prettyRecordCoord<RecordDim>(rc) << ' '
256  << std::right << std::setw(sizeColumnWidth) << fieldSize << std::right
257  << std::setw(columnWidth) << hits[i].memLocsComputed << '\n';
258  });
259  const auto total = hits.totalBytes();
260  if constexpr(MyCodeHandlesProxyReferences)
261  {
262  const auto [rsize, runit] = prettySize(total.totalRead);
263  const auto [wsize, wunit] = prettySize(total.totalWritten);
264  std::cout << std::left << std::setw(columnWidth) << "Total" << ' ' << std::right
265  << std::setw(sizeColumnWidth) << ' ' << std::right << std::setw(columnWidth) << rsize
266  << runit << ' ' << std::right << std::setw(columnWidth - 2) << wsize << wunit << '\n';
267  }
268  else
269  {
270  const auto [size, unit] = prettySize(total);
271  std::cout << std::left << std::setw(columnWidth) << "Total" << ' ' << std::right
272  << std::setw(sizeColumnWidth) << ' ' << std::right << std::setw(columnWidth) << size << unit
273  << '\n';
274  }
275  std::cout << std::internal;
276  }
277 
278  LLAMA_ACC void printFieldHitsDevice(const FieldHitsArray& hits) const
279  {
280  if constexpr(MyCodeHandlesProxyReferences)
281  {
282  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
283  printf(
284  "%*s %*s %*s %*s\n",
285  columnWidth,
286  "Field",
287  sizeColumnWidth,
288  "Size",
289  columnWidth,
290  "Reads",
291  columnWidth,
292  "Writes");
293  }
294  else
295  {
296  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
297  printf("%*s %*s %*s\n", columnWidth, "Field", sizeColumnWidth, "Size", columnWidth, "Mlocs cmp");
298  }
299  forEachLeafCoord<RecordDim>(
300  [&](auto rc)
301  {
302  const size_type i = flatRecordCoord<RecordDim, decltype(rc)>;
303  const auto fieldSize = sizeof(GetType<RecordDim, decltype(rc)>);
304  constexpr auto fieldName = prettyRecordCoord<RecordDim>(rc);
305  char fieldNameZT[fieldName.size() + 1]{}; // nvcc does not handle the %*.*s parameter correctly
306  llama::internal::constexprCopy(fieldName.begin(), fieldName.end(), fieldNameZT);
307  if constexpr(MyCodeHandlesProxyReferences)
308  {
309  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
310  printf(
311  "%*.s %*lu %*lu %*lu\n",
312  columnWidth,
313  fieldNameZT,
314  sizeColumnWidth,
315  fieldSize,
316  columnWidth,
317  static_cast<unsigned long>(hits[i].reads),
318  columnWidth,
319  static_cast<unsigned long>(hits[i].writes));
320  }
321  else
322  {
323  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
324  printf(
325  "%*.s %*lu %*lu\n",
326  columnWidth,
327  fieldNameZT,
328  sizeColumnWidth,
329  fieldSize,
330  columnWidth,
331  static_cast<unsigned long>(hits[i].memLocsComputed));
332  }
333  });
334 
335  const auto total = hits.totalBytes();
336  if constexpr(MyCodeHandlesProxyReferences)
337  {
338  const auto [rsize, runit] = prettySize(total.totalRead);
339  const auto [wsize, wunit] = prettySize(total.totalWritten);
340  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
341  printf(
342  "%*s %*s %*f%s %*f%s\n",
343  columnWidth,
344  "Total",
345  sizeColumnWidth,
346  "",
347  columnWidth,
348  rsize,
349  runit,
350  columnWidth - 2,
351  wsize,
352  wunit);
353  }
354  else
355  {
356  const auto [size, unit] = prettySize(total);
357  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg)
358  printf("%*s %*s %*f%s\n", columnWidth, "Total", sizeColumnWidth, "", columnWidth, size, unit);
359  }
360  }
361 
362  LLAMA_FN_HOST_ACC_INLINE auto inner() const -> const Mapping&
363  {
364  return static_cast<const Mapping&>(*this);
365  }
366  };
367 
369  template<typename Mapping>
370  inline constexpr bool isFieldAccessCount = false;
371 
373  template<typename Mapping, typename CountType, bool MyCodeHandlesProxyReferences>
374  inline constexpr bool isFieldAccessCount<FieldAccessCount<Mapping, CountType, MyCodeHandlesProxyReferences>>
375  = true;
376 } // namespace llama::mapping
#define LLAMA_EXPORT
Definition: macros.hpp:192
#define LLAMA_LAMBDA_INLINE
Gives strong indication to the compiler to inline the attributed lambda.
Definition: macros.hpp:113
#define LLAMA_SUPPRESS_HOST_DEVICE_WARNING
Definition: macros.hpp:122
#define LLAMA_ACC
Definition: macros.hpp:77
#define LLAMA_FN_HOST_ACC_INLINE
Definition: macros.hpp:96
constexpr auto constexprCopy(In f, In l, Out d) -> Out
Definition: StructName.hpp:17
void atomicInc(CountType &i)
Definition: Common.hpp:249
constexpr bool isFieldAccessCount
constexpr std::size_t flatRecordCoord
Definition: Core.hpp:517
typename internal::GetTypeImpl< RecordDim, RecordCoordOrTags... >::type GetType
Definition: Core.hpp:388
auto mapToMemory(Mapping &mapping, typename Mapping::ArrayExtents::Index ai, RecordCoord rc, Blobs &blobs) -> decltype(auto)
Definition: View.hpp:359
auto prettySize(double size) -> PrettySize
Definition: Core.hpp:901
CRTP mixin for proxy reference types to support all compound assignment and increment/decrement opera...
auto total() const -> AccessCounts< CountType >
auto fieldHits(Blobs &blobs) const -> FieldHitsArray &
typename Mapping::RecordDim RecordDim
auto compute(typename Mapping::ArrayExtents::Index ai, RecordCoord< RecordCoords... > rc, Blobs &blobs) const -> decltype(auto)
void printFieldHits(const Blobs &blobs) const
constexpr FieldAccessCount()=default
static constexpr bool myCodeHandlesProxyReferences
static constexpr auto isComputed(RecordCoord< RecordCoords... >)
auto fieldHits(const Blobs &blobs) const -> const FieldHitsArray &
FieldAccessCount(Args &&... innerArgs)
constexpr auto blobSize(size_type blobIndex) const -> size_type
void printFieldHits(const FieldHitsArray &hits) const
auto operator=(value_type value) -> FieldAccessCountReference &
constexpr FieldAccessCountReference(RefFwd &&r, AccessCounts< Count > *hits)
FieldAccessCountReference(const FieldAccessCountReference &)=default
FieldAccessCountReference(FieldAccessCountReference &&) noexcept=default