Low-Level Abstraction of Memory Access
BitPackedFloat.hpp
Go to the documentation of this file.
1 // Copyright 2023 Bernhard Manfred Gruber
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #pragma once
5 
6 #include "../ProxyRefOpMixin.hpp"
7 #include "BitPackedInt.hpp"
8 #include "Common.hpp"
9 
10 #include <algorithm>
11 #include <climits>
12 #include <cstdint>
13 #include <cstring>
14 #include <limits>
15 #include <type_traits>
16 
17 namespace llama::mapping
18 {
19  namespace internal
20  {
21  template<typename T>
23 
24  template<>
25  struct FloatBitTraits<float>
26  {
27  inline static constexpr unsigned mantissa = 23;
28  inline static constexpr unsigned exponent = 8;
29  };
30 
31  template<>
32  struct FloatBitTraits<double>
33  {
34  inline static constexpr unsigned mantissa = 52;
35  inline static constexpr unsigned exponent = 11;
36  };
37 
38  template<typename Integral>
40  Integral inFloat,
41  unsigned inMantissaBits,
42  unsigned inExponentBits,
43  unsigned outMantissaBits,
44  unsigned outExponentBits) -> Integral
45  {
46  const Integral inMantissaMask = (Integral{1} << inMantissaBits) - 1u;
47  const Integral inExponentMask = (Integral{1} << inExponentBits) - 1u;
48 
49  Integral inMantissa = inFloat & inMantissaMask;
50  const Integral inExponent = (inFloat >> inMantissaBits) & inExponentMask;
51  const Integral inSign = inFloat >> inExponentBits >> inMantissaBits;
52 
53  const Integral outExponentMask = (Integral{1} << outExponentBits) - 1u;
54  Integral outExponent;
55  if(inExponent == inExponentMask) [[LLAMA_UNLIKELY]]
56  outExponent = outExponentMask; // propagate +/- inf/nan
57  else if(inExponent == 0) [[LLAMA_UNLIKELY]]
58  outExponent = 0; // propagate -/+ zero
59  else
60  {
61  const int outExponentMax = 1 << (outExponentBits - 1); // NOLINT(hicpp-signed-bitwise)
62  const int outExponentMin = -outExponentMax + 1;
63  const int outExponentBias = outExponentMax - 1;
64  const int inExponentBias = (1 << (inExponentBits - 1)) - 1; // NOLINT(hicpp-signed-bitwise)
65 
66  const int exponent = static_cast<int>(inExponent) - inExponentBias;
67  const auto clampedExponent = std::clamp(exponent, outExponentMin, outExponentMax);
68  if(clampedExponent == outExponentMin || clampedExponent == outExponentMax)
69  inMantissa = 0; // when exponent changed, let value become inf and not nan
70  outExponent = clampedExponent + outExponentBias;
71  }
72  assert(outExponent < (1u << outExponentBits));
73 
74  const Integral packedMantissa = inMantissaBits > outMantissaBits
75  ? inMantissa >> (inMantissaBits - outMantissaBits)
76  : inMantissa << (outMantissaBits - inMantissaBits);
77  const Integral packedExponent = outExponent << outMantissaBits;
78  const Integral packedSign = inSign << outExponentBits << outMantissaBits;
79 
80  const auto outFloat = static_cast<Integral>(packedMantissa | packedExponent | packedSign);
81  return outFloat;
82  }
83 
84  // TODO(bgruber): Boost.Hana generalizes these sorts of computations on mixed constants and values
85  template<typename E, typename M>
87  {
88  return llama::internal::BoxedValue{e.value() + m.value() + 1};
89  }
90 
91  template<auto E, auto M>
95  {
97  }
98 
104  template<typename Float, typename StoredIntegralCV, typename VHExp, typename VHMan, typename SizeType>
105  // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
107  : private VHExp
108  , private VHMan
109  , ProxyRefOpMixin<BitPackedFloatRef<Float, StoredIntegralCV, VHExp, VHMan, SizeType>, Float>
110  {
111  private:
112  static_assert(
113  std::is_same_v<Float, float> || std::is_same_v<Float, double>,
114  "Types other than float or double are not implemented yet");
115  static_assert(
116  std::numeric_limits<Float>::is_iec559,
117  "Only IEEE754/IEC559 floating point formats are implemented");
118 
119  using FloatBits = std::conditional_t<std::is_same_v<Float, float>, std::uint32_t, std::uint64_t>;
120 
122  FloatBits,
123  StoredIntegralCV,
124  decltype(integBits(std::declval<VHExp>(), std::declval<VHMan>())),
125  SizeType,
127  intref;
128 
129  public:
130  using value_type = Float;
131 
133  StoredIntegralCV* p,
134  SizeType bitOffset,
135  VHExp vhExp,
136  VHMan vhMan)
137  : VHExp{vhExp}
138  , VHMan{vhMan}
139  , intref{
140  p,
141  bitOffset,
142  integBits(vhExp, vhMan),
143  }
144  {
145  }
146 
148 
149  // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
151  {
152  *this = static_cast<value_type>(other);
153  return *this;
154  }
155 
156  // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions)
157  LLAMA_FN_HOST_ACC_INLINE constexpr operator Float() const
158  {
159  using Bits = FloatBitTraits<Float>;
160  const FloatBits packedFloat = intref;
161  const FloatBits unpackedFloat
162  = repackFloat(packedFloat, VHMan::value(), VHExp::value(), Bits::mantissa, Bits::exponent);
163  Float f;
164  std::memcpy(&f, &unpackedFloat, sizeof(Float));
165  return f;
166  }
167 
169  {
170  using Bits = FloatBitTraits<Float>;
171  FloatBits unpackedFloat = 0;
172  std::memcpy(&unpackedFloat, &f, sizeof(Float));
173  const FloatBits packedFloat
174  = repackFloat(unpackedFloat, Bits::mantissa, Bits::exponent, VHMan::value(), VHExp::value());
175  intref = packedFloat;
176  return *this;
177  }
178  };
179 
180  template<typename RecordDim>
182  = std::conditional_t<mp_contains<FlatRecordDim<RecordDim>, double>::value, std::uint64_t, std::uint32_t>;
183  } // namespace internal
184 
185  // TODO(bgruber): I would like to allow zero mantissa bits, which would then no longer support INF. Likewise,
186  // support to skip the sign bit would also be great.
200  template<
201  typename TArrayExtents,
202  typename TRecordDim,
203  typename ExponentBits = typename TArrayExtents::value_type,
204  typename MantissaBits = ExponentBits,
205  typename TLinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
206  typename TStoredIntegral = internal::StoredIntegralFor<TRecordDim>>
208  : MappingBase<TArrayExtents, TRecordDim>
209  , llama::internal::BoxedValue<ExponentBits, 0>
210  , llama::internal::BoxedValue<MantissaBits, 1>
211  {
212  private:
216  using size_type = typename TArrayExtents::value_type;
217 
218  public:
219  using LinearizeArrayIndexFunctor = TLinearizeArrayIndexFunctor;
220  using StoredIntegral = TStoredIntegral;
221  static constexpr std::size_t blobCount = mp_size<FlatRecordDim<TRecordDim>>::value;
222 
224  constexpr auto exponentBits() const -> size_type
225  {
226  return static_cast<size_type>(VHExp::value());
227  }
228 
230  constexpr auto mantissaBits() const -> size_type
231  {
232  return static_cast<size_type>(VHMan::value());
233  }
234 
236  constexpr explicit BitPackedFloatSoA(
237  TArrayExtents extents = {},
238  ExponentBits exponentBits = {},
239  MantissaBits mantissaBits = {},
240  TRecordDim = {})
241  : Base(extents)
242  , VHExp{exponentBits}
243  , VHMan{mantissaBits}
244  {
245  assert(this->exponentBits() > 0);
246  }
247 
249  constexpr auto blobSize(size_type /*blobIndex*/) const -> size_type
250  {
251  constexpr auto bitsPerStoredIntegral = static_cast<size_type>(sizeof(StoredIntegral) * CHAR_BIT);
252  const auto bitsNeeded
253  = LinearizeArrayIndexFunctor{}.size(Base::extents()) * (exponentBits() + mantissaBits() + 1);
254  return roundUpToMultiple(bitsNeeded, bitsPerStoredIntegral) / CHAR_BIT;
255  }
256 
257  template<std::size_t... RecordCoords>
259  {
260  return true;
261  }
262 
263  template<std::size_t... RecordCoords, typename Blobs>
265  typename Base::ArrayIndex ai,
267  Blobs& blobs) const
268  {
269  constexpr auto blob = llama::flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>;
270  const auto bitOffset
271  = LinearizeArrayIndexFunctor{}(ai, Base::extents()) * (exponentBits() + mantissaBits() + 1);
272 
273  using QualifiedStoredIntegral = CopyConst<Blobs, StoredIntegral>;
274  using DstType = GetType<TRecordDim, RecordCoord<RecordCoords...>>;
277  reinterpret_cast<QualifiedStoredIntegral*>(&blobs[blob][0]),
278  bitOffset,
279  static_cast<const VHExp&>(*this),
280  static_cast<const VHMan&>(*this)};
282  }
283  };
284 
288  template<
289  typename ExponentBits = unsigned,
290  typename MantissaBits = ExponentBits,
291  typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
292  typename StoredIntegral = void>
294  {
295  template<typename ArrayExtents, typename RecordDim>
297  ArrayExtents,
298  RecordDim,
299  ExponentBits,
300  MantissaBits,
301  LinearizeArrayIndexFunctor,
302  std::conditional_t<
303  !std::is_void_v<StoredIntegral>,
304  StoredIntegral,
306  };
307 
309  template<typename Mapping>
310  inline constexpr bool isBitPackedFloatSoA = false;
311 
313  template<typename... Ts>
314  inline constexpr bool isBitPackedFloatSoA<BitPackedFloatSoA<Ts...>> = true;
315 
317  template<
318  typename TArrayExtents,
319  typename TRecordDim,
320  typename ExponentBits = typename TArrayExtents::value_type,
321  typename MantissaBits = ExponentBits,
322  typename TLinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
323  template<typename> typename PermuteFields = PermuteFieldsInOrder,
324  typename TStoredIntegral = internal::StoredIntegralFor<TRecordDim>>
326  : MappingBase<TArrayExtents, TRecordDim>
327  , llama::internal::BoxedValue<ExponentBits, 0>
328  , llama::internal::BoxedValue<MantissaBits, 1>
329  {
330  private:
334  using size_type = typename TArrayExtents::value_type;
335 
336  public:
337  using LinearizeArrayIndexFunctor = TLinearizeArrayIndexFunctor;
338  using StoredIntegral = TStoredIntegral;
339 
340  using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
341  static constexpr std::size_t blobCount = 1;
342 
344  constexpr auto exponentBits() const -> size_type
345  {
346  return static_cast<size_type>(VHExp::value());
347  }
348 
350  constexpr auto mantissaBits() const -> size_type
351  {
352  return static_cast<size_type>(VHMan::value());
353  }
354 
356  constexpr explicit BitPackedFloatAoS(
357  TArrayExtents extents = {},
358  ExponentBits exponentBits = {},
359  MantissaBits mantissaBits = {},
360  TRecordDim = {})
361  : Base(extents)
362  , VHExp{exponentBits}
363  , VHMan{mantissaBits}
364  {
365  assert(this->exponentBits() > 0);
366  }
367 
369  constexpr auto blobSize(size_type /*blobIndex*/) const -> size_type
370  {
371  constexpr auto bitsPerStoredIntegral = static_cast<size_type>(sizeof(StoredIntegral) * CHAR_BIT);
372  const auto bitsNeeded = TLinearizeArrayIndexFunctor{}.size(Base::extents())
373  * static_cast<size_type>(exponentBits() + mantissaBits() + 1)
374  * static_cast<size_type>(flatFieldCount<TRecordDim>);
375  return roundUpToMultiple(bitsNeeded, bitsPerStoredIntegral) / CHAR_BIT;
376  }
377 
378  template<std::size_t... RecordCoords>
380  {
381  return true;
382  }
383 
384  template<std::size_t... RecordCoords, typename Blobs>
386  typename Base::ArrayIndex ai,
388  Blobs& blobs) const
389  {
390  constexpr auto flatFieldIndex = static_cast<size_type>(
391  Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>);
392  const auto bitOffset = ((TLinearizeArrayIndexFunctor{}(ai, Base::extents())
393  * static_cast<size_type>(flatFieldCount<TRecordDim>))
394  + flatFieldIndex)
395  * static_cast<size_type>(exponentBits() + mantissaBits() + 1);
396 
397  using QualifiedStoredIntegral = CopyConst<Blobs, StoredIntegral>;
398  using DstType = GetType<TRecordDim, RecordCoord<RecordCoords...>>;
401  reinterpret_cast<QualifiedStoredIntegral*>(&blobs[0][0]),
402  bitOffset,
403  static_cast<const VHExp&>(*this),
404  static_cast<const VHMan&>(*this)};
406  }
407  };
408 
410  template<
411  typename ExponentBits = unsigned,
412  typename MantissaBits = ExponentBits,
413  typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
414  template<typename> typename PermuteFields = PermuteFieldsInOrder,
415  typename StoredIntegral = void>
417  {
418  template<typename ArrayExtents, typename RecordDim>
420  ArrayExtents,
421  RecordDim,
422  ExponentBits,
423  MantissaBits,
424  LinearizeArrayIndexFunctor,
425  PermuteFields,
426  std::conditional_t<
427  !std::is_void_v<StoredIntegral>,
428  StoredIntegral,
430  };
431 
433  template<typename Mapping>
434  inline constexpr bool isBitPackedFloatAoS = false;
435 
437  template<
438  typename ArrayExtents,
439  typename RecordDim,
440  typename ExponentBits,
441  typename MantissaBits,
442  typename LinearizeArrayIndexFunctor,
443  template<typename>
444  typename PermuteFields,
445  typename StoredIntegral>
446  inline constexpr bool isBitPackedFloatAoS<BitPackedFloatAoS<
447  ArrayExtents,
448  RecordDim,
449  ExponentBits,
450  MantissaBits,
451  LinearizeArrayIndexFunctor,
452  PermuteFields,
453  StoredIntegral>>
454  = true;
455 } // namespace llama::mapping
#define LLAMA_UNLIKELY
Expands to unlikely if [[unlikely]] supported by the compiler. Use as [[LLAMA_UNLIKELY]].
Definition: macros.hpp:178
#define LLAMA_EXPORT
Definition: macros.hpp:192
#define LLAMA_DECLSPEC_EMPTY_BASES
Definition: macros.hpp:164
#define LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
Definition: macros.hpp:141
#define LLAMA_FN_HOST_ACC_INLINE
Definition: macros.hpp:96
#define LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
Definition: macros.hpp:153
constexpr auto memcpy
Definition: Copy.hpp:32
std::conditional_t< mp_contains< FlatRecordDim< RecordDim >, double >::value, std::uint64_t, std::uint32_t > StoredIntegralFor
auto repackFloat(Integral inFloat, unsigned inMantissaBits, unsigned inExponentBits, unsigned outMantissaBits, unsigned outExponentBits) -> Integral
constexpr bool isBitPackedFloatAoS
constexpr bool isBitPackedFloatSoA
std::integral_constant< decltype(V), V > Constant
Used as template argument to specify a constant/compile-time value.
Definition: Core.hpp:829
ArrayExtents(Args...) -> ArrayExtents< typename internal::IndexTypeFromArgs< std::size_t, Args... >::type,(Args{}, dyn)... >
constexpr std::size_t flatRecordCoord
Definition: Core.hpp:517
std::conditional_t< std::is_const_v< FromT >, const ToT, ToT > CopyConst
Alias for ToT, adding const if FromT is const qualified.
Definition: Core.hpp:824
constexpr auto roundUpToMultiple(Integral n, Integral mult) -> Integral
Returns the integral n rounded up to be a multiple of mult.
Definition: Core.hpp:578
typename internal::GetTypeImpl< RecordDim, RecordCoordOrTags... >::type GetType
Definition: Core.hpp:388
CRTP mixin for proxy reference types to support all compound assignment and increment/decrement opera...
constexpr auto value() const
Definition: Core.hpp:864
PermuteFields< FlatRecordDim< TRecordDim > > Permuter
constexpr BitPackedFloatAoS(TArrayExtents extents={}, ExponentBits exponentBits={}, MantissaBits mantissaBits={}, TRecordDim={})
constexpr auto mantissaBits() const -> size_type
static constexpr auto isComputed(RecordCoord< RecordCoords... >)
TLinearizeArrayIndexFunctor LinearizeArrayIndexFunctor
constexpr auto compute(typename Base::ArrayIndex ai, RecordCoord< RecordCoords... >, Blobs &blobs) const
constexpr auto exponentBits() const -> size_type
constexpr auto blobSize(size_type) const -> size_type
static constexpr auto isComputed(RecordCoord< RecordCoords... >)
constexpr auto compute(typename Base::ArrayIndex ai, RecordCoord< RecordCoords... >, Blobs &blobs) const
constexpr auto exponentBits() const -> size_type
TLinearizeArrayIndexFunctor LinearizeArrayIndexFunctor
constexpr BitPackedFloatSoA(TArrayExtents extents={}, ExponentBits exponentBits={}, MantissaBits mantissaBits={}, TRecordDim={})
constexpr auto mantissaBits() const -> size_type
constexpr auto blobSize(size_type) const -> size_type
typename ArrayExtents::Index ArrayIndex
Definition: Common.hpp:24
Retains the order of the record dimension's fields.
Definition: Common.hpp:182
constexpr auto operator=(Float f) -> BitPackedFloatRef &
constexpr BitPackedFloatRef(StoredIntegralCV *p, SizeType bitOffset, VHExp vhExp, VHMan vhMan)
constexpr auto operator=(const BitPackedFloatRef &other) -> BitPackedFloatRef &
BitPackedFloatRef(const BitPackedFloatRef &)=default