alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BlockSharedMemStMemberImpl.hpp
Go to the documentation of this file.
1/* Copyright 2022 Jeffrey Kelling, Rene Widera, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#include <algorithm>
12#include <cstdint>
13#include <functional>
14#include <limits>
15#include <type_traits>
16
17namespace alpaka::detail
18{
19 //! Implementation of static block shared memory provider.
20 //!
21 //! externally allocated fixed-size memory, likely provided by BlockSharedMemDynMember.
22 template<std::size_t TMinDataAlignBytes = core::vectorization::defaultAlignment>
24 {
25 struct MetaData
26 {
27 //! Unique id if the next data chunk.
28 std::uint32_t id = std::numeric_limits<std::uint32_t>::max();
29 //! Offset to the next meta data header, relative to m_mem.
30 //! To access the meta data header the offset must by aligned first.
31 std::uint32_t offset = 0;
32 };
33
34 static constexpr std::uint32_t metaDataSize = sizeof(MetaData);
35
36 public:
37#ifndef NDEBUG
38 BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t capacity)
39 : m_mem(mem)
40 , m_capacity(static_cast<std::uint32_t>(capacity))
41 {
42 ALPAKA_ASSERT_ACC((m_mem == nullptr) == (m_capacity == 0u));
43 }
44#else
45 BlockSharedMemStMemberImpl(std::uint8_t* mem, std::size_t) : m_mem(mem)
46 {
47 }
48#endif
49
50 template<typename T>
51 void alloc(std::uint32_t id) const
52 {
53 // Add meta data chunk in front of the user data
54 m_allocdBytes = varChunkEnd<MetaData>(m_allocdBytes);
55 ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
56 auto* meta = getLatestVarPtr<MetaData>();
57
58 // Allocate variable
59 m_allocdBytes = varChunkEnd<T>(m_allocdBytes);
60 ALPAKA_ASSERT_ACC(m_allocdBytes <= m_capacity);
61
62 // Update meta data with id and offset for the allocated variable.
63 meta->id = id;
64 meta->offset = m_allocdBytes;
65 }
66
67 //! Give the pointer to an exiting variable
68 //!
69 //! @tparam T type of the variable
70 //! @param id unique id of the variable
71 //! @return nullptr if variable with id not exists
72 template<typename T>
73 auto getVarPtr(std::uint32_t id) const -> T*
74 {
75 // Offset in bytes to the next unaligned meta data header behind the variable.
76 std::uint32_t off = 0;
77
78 // Iterate over allocated data only
79 while(off < m_allocdBytes)
80 {
81 // Adjust offset to be aligned
82 std::uint32_t const alignedMetaDataOffset
83 = varChunkEnd<MetaData>(off) - static_cast<std::uint32_t>(sizeof(MetaData));
85 (alignedMetaDataOffset + static_cast<std::uint32_t>(sizeof(MetaData))) <= m_allocdBytes);
86 auto* metaDataPtr = reinterpret_cast<MetaData*>(
87 __builtin_assume_aligned(m_mem + alignedMetaDataOffset, alignof(MetaData)));
88 off = metaDataPtr->offset;
89
90 if(metaDataPtr->id == id)
91 return reinterpret_cast<T*>(__builtin_assume_aligned(&m_mem[off - sizeof(T)], alignof(T)));
92 }
93
94 // Variable not found.
95 return nullptr;
96 }
97
98 //! Get last allocated variable.
99 template<typename T>
100 auto getLatestVarPtr() const -> T*
101 {
102 return reinterpret_cast<T*>(__builtin_assume_aligned(&m_mem[m_allocdBytes - sizeof(T)], alignof(T)));
103 }
104
105 private:
106 //! Byte offset to the end of the memory chunk
107 //!
108 //! Calculate bytes required to store a type with a aligned starting address in m_mem.
109 //! Start offset to the origin of the user data chunk can be calculated with `result - sizeof(T)`.
110 //! The padding is always before the origin of the user data chunk and can be zero byte.
111 //!
112 //! \tparam T type should fit into the chunk
113 //! \param byteOffset Current byte offset.
114 //! \result Byte offset to the end of the data chunk, relative to m_mem..
115 template<typename T>
116 auto varChunkEnd(std::uint32_t byteOffset) const -> std::uint32_t
117 {
118 auto const ptr = reinterpret_cast<std::size_t>(m_mem + byteOffset);
119 constexpr size_t align = std::max(TMinDataAlignBytes, alignof(T));
120 std::size_t const newPtrAdress = ((ptr + align - 1u) / align) * align + sizeof(T);
121 return static_cast<uint32_t>(newPtrAdress - reinterpret_cast<std::size_t>(m_mem));
122 }
123
124 //! Offset in bytes relative to m_mem to next free data area.
125 //! The last aligned before the free area is always a meta data header.
126 mutable std::uint32_t m_allocdBytes = 0u;
127
128 //! Memory layout
129 //! |Header|Padding|Variable|Padding|Header|....uninitialized Data ....
130 //! Size of padding can be zero if data after padding is already aligned.
131 std::uint8_t* const m_mem;
132#ifndef NDEBUG
133 const std::uint32_t m_capacity;
134#endif
135 };
136} // namespace alpaka::detail
#define ALPAKA_ASSERT_ACC(...)
ALPAKA_ASSERT_ACC is an assert-like macro.
Definition Assert.hpp:52
Implementation of static block shared memory provider.
auto getVarPtr(std::uint32_t id) const -> T *
Give the pointer to an exiting variable.
BlockSharedMemStMemberImpl(std::uint8_t *mem, std::size_t capacity)
auto getLatestVarPtr() const -> T *
Get last allocated variable.
STL namespace.