alpaka
Abstraction Library for Parallel Kernel Acceleration
BlockSharedMemStMemberImpl.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Jeffrey Kelling, Rene Widera, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/core/Assert.hpp"
10 
11 #include <algorithm>
12 #include <cstdint>
13 #include <functional>
14 #include <limits>
15 #include <type_traits>
16 
17 namespace alpaka::detail
18 {
19  //! Implementation of static block shared memory provider.
20  //!
21  //! externally allocated fixed-size memory, likely provided by BlockSharedMemDynMember.
22  template<std::size_t TMinDataAlignBytes = core::vectorization::defaultAlignment>
24  {
25  struct MetaData
26  {
27  //! Unique id if the next data chunk.
28  std::uint32_t id = std::numeric_limits<std::uint32_t>::max();
29  //! Offset to the next meta data header, relative to m_mem.
30  //! To access the meta data header the offset must by aligned first.
31  std::uint32_t offset = 0;
32  };
33 
34  static constexpr std::uint32_t metaDataSize = sizeof(MetaData);
35 
36  public:
37 #ifndef NDEBUG
38  BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t capacity)
39  : m_mem(mem)
40  , m_capacity(static_cast<std::uint32_t>(capacity))
41  {
42  ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
43  }
44 #else
45  BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t) : m_mem(mem)
46  {
47  }
48 #endif
49 
50  template<typename T>
51  void alloc(std::uint32_t id) const
52  {
53  // Add meta data chunk in front of the user data
54  m_allocdBytes = varChunkEnd<MetaData>(m_allocdBytes);
55  ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
56  auto* meta = getLatestVarPtr<MetaData>();
57 
58  // Allocate variable
59  m_allocdBytes = varChunkEnd<T>(m_allocdBytes);
60  ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
61 
62  // Update meta data with id and offset for the allocated variable.
63  meta->id = id;
64  meta->offset = m_allocdBytes;
65  }
66 
67 #if BOOST_COMP_GNUC
68 # pragma GCC diagnostic push
69 # pragma GCC diagnostic ignored \
70  "-Wcast-align" // "cast from 'unsigned char*' to 'unsigned int*' increases required alignment of target type"
71 #endif
72 
73  //! Give the pointer to an exiting variable
74  //!
75  //! @tparam T type of the variable
76  //! @param id unique id of the variable
77  //! @return nullptr if variable with id not exists
78  template<typename T>
79  auto getVarPtr(std::uint32_t id) const -> T*
80  {
81  // Offset in bytes to the next unaligned meta data header behind the variable.
82  std::uint32_t off = 0;
83 
84  // Iterate over allocated data only
85  while(off < m_allocdBytes)
86  {
87  // Adjust offset to be aligned
88  std::uint32_t const alignedMetaDataOffset
89  = varChunkEnd<MetaData>(off) - static_cast<std::uint32_t>(sizeof(MetaData));
91  (alignedMetaDataOffset + static_cast<std::uint32_t>(sizeof(MetaData))) <= m_allocdBytes);
92  auto* metaDataPtr = reinterpret_cast<MetaData*>(m_mem + alignedMetaDataOffset);
93  off = metaDataPtr->offset;
94 
95  if(metaDataPtr->id == id)
96  return reinterpret_cast<T*>(&m_mem[off - sizeof(T)]);
97  }
98 
99  // Variable not found.
100  return nullptr;
101  }
102 
103  //! Get last allocated variable.
104  template<typename T>
105  auto getLatestVarPtr() const -> T*
106  {
107  return reinterpret_cast<T*>(&m_mem[m_allocdBytes - sizeof(T)]);
108  }
109 
110  private:
111 #if BOOST_COMP_GNUC
112 # pragma GCC diagnostic pop
113 #endif
114 
115  //! Byte offset to the end of the memory chunk
116  //!
117  //! Calculate bytes required to store a type with a aligned starting address in m_mem.
118  //! Start offset to the origin of the user data chunk can be calculated with `result - sizeof(T)`.
119  //! The padding is always before the origin of the user data chunk and can be zero byte.
120  //!
121  //! \tparam T type should fit into the chunk
122  //! \param byteOffset Current byte offset.
123  //! \result Byte offset to the end of the data chunk, relative to m_mem..
124  template<typename T>
125  auto varChunkEnd(std::uint32_t byteOffset) const -> std::uint32_t
126  {
127  auto const ptr = reinterpret_cast<std::size_t>(m_mem + byteOffset);
128  constexpr size_t align = std::max(TMinDataAlignBytes, alignof(T));
129  std::size_t const newPtrAdress = ((ptr + align - 1u) / align) * align + sizeof(T);
130  return static_cast<uint32_t>(newPtrAdress - reinterpret_cast<std::size_t>(m_mem));
131  }
132 
133  //! Offset in bytes relative to m_mem to next free data area.
134  //! The last aligned before the free area is always a meta data header.
135  mutable std::uint32_t m_allocdBytes = 0u;
136 
137  //! Memory layout
138  //! |Header|Padding|Variable|Padding|Header|....uninitialized Data ....
139  //! Size of padding can be zero if data after padding is already aligned.
140  std::uint8_t* const m_mem;
141 #ifndef NDEBUG
142  const std::uint32_t m_capacity;
143 #endif
144  };
145 } // namespace alpaka::detail
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
Definition: Assert.hpp:52
Implementation of static block shared memory provider.
auto getVarPtr(std::uint32_t id) const -> T *
Give the pointer to an exiting variable.
BlockSharedMemStMemberImpl(std::uint8_t *mem, std::size_t capacity)
auto getLatestVarPtr() const -> T *
Get last allocated variable.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
constexpr auto offset
Definition: Extent.hpp:34