Low-Level Abstraction of Memory Access
BlobAllocators.hpp
Go to the documentation of this file.
1 // Copyright 2022 Alexander Matthes, Bernhard Manfred Gruber
2 // SPDX-License-Identifier: MPL-2.0
3 
4 #pragma once
5 
6 #include "Array.hpp"
7 #include "Concepts.hpp"
8 #include "macros.hpp"
9 
10 #include <cstddef>
11 #include <memory>
12 #include <vector>
13 #if __has_include(<cuda_runtime.h>)
14 # include <cuda_runtime.h>
15 #endif
16 #if __has_include(<sycl/sycl.hpp>)
17 # include <sycl/sycl.hpp>
18 #endif
19 
20 namespace alpaka
21 {
22  template<typename TElem, typename TIdx, typename TExtent, typename TDev>
23  auto allocBuf(const TDev& dev, const TExtent& extent); // NOLINT(readability-redundant-declaration)
24 } // namespace alpaka
25 
27 {
31  template<std::size_t BytesToReserve>
32  struct Array
33  {
34  template<std::size_t Alignment>
35  struct alignas(Alignment) AlignedArray : llama::Array<std::byte, BytesToReserve>
36  {
37  };
38 
39  template<std::size_t Alignment>
41  std::integral_constant<std::size_t, Alignment>,
42  [[maybe_unused]] std::size_t count) const -> AlignedArray<Alignment>
43  {
44  assert(count == BytesToReserve);
45  return {};
46  }
47  };
48 #ifdef __cpp_lib_concepts
49  static_assert(BlobAllocator<Array<64>>);
50 #endif
51 
55  struct UniquePtr
56  {
57  template<std::size_t Alignment>
58  auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
59  {
60  auto* ptr
61  = static_cast<std::byte*>(::operator new[](count * sizeof(std::byte), std::align_val_t{Alignment}));
62  auto deleter = [](std::byte* ptr) { ::operator delete[](ptr, std::align_val_t{Alignment}); };
63  return std::unique_ptr<std::byte[], decltype(deleter)>{ptr, deleter};
64  }
65  };
66 #ifdef __cpp_lib_concepts
67  static_assert(BlobAllocator<UniquePtr>);
68 #endif
69 
73  struct SharedPtr
74  {
75  template<std::size_t Alignment>
76  auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
77  -> std::shared_ptr<std::byte[]>
78  {
79  auto* ptr
80  = static_cast<std::byte*>(::operator new[](count * sizeof(std::byte), std::align_val_t{Alignment}));
81  auto deleter = [](std::byte* ptr) { ::operator delete[](ptr, std::align_val_t{Alignment}); };
82  return {ptr, deleter};
83  }
84  };
85 #ifdef __cpp_lib_concepts
86  static_assert(BlobAllocator<SharedPtr>);
87 #endif
88 
91  template<typename T, std::size_t Alignment>
93  {
94  using value_type = T;
95 
96  inline AlignedAllocator() noexcept = default;
97 
98  template<typename T2>
99  inline explicit AlignedAllocator(const AlignedAllocator<T2, Alignment>&) noexcept
100  {
101  }
102 
103  inline auto allocate(std::size_t n) -> T*
104  {
105  return static_cast<T*>(::operator new[](n * sizeof(T), std::align_val_t{Alignment}));
106  }
107 
108  inline void deallocate(T* p, std::size_t)
109  {
110  ::operator delete[](p, std::align_val_t{Alignment});
111  }
112 
113  template<typename T2>
114  struct rebind // NOLINT(readability-identifier-naming)
115  {
117  };
118 
119  auto operator!=(const AlignedAllocator<T, Alignment>& other) const -> bool
120  {
121  return !(*this == other);
122  }
123 
124  auto operator==(const AlignedAllocator<T, Alignment>&) const -> bool
125  {
126  return true;
127  }
128  };
129 
133  struct Vector
134  {
135  template<std::size_t Alignment>
136  inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
137  {
138  return std::vector<std::byte, AlignedAllocator<std::byte, Alignment>>(count);
139  }
140  };
141 #ifdef __cpp_lib_concepts
142  static_assert(BlobAllocator<Vector>);
143 #endif
144 
145 #if __has_include(<cuda_runtime.h>)
150  struct CudaMalloc
151  {
152  inline static const auto deleter = [](void* p)
153  {
154  if(const auto code = cudaFree(p); code != cudaSuccess)
155  throw std::runtime_error(std::string{"cudaFree failed with code "} + cudaGetErrorString(code));
156  };
157 
158  template<std::size_t FieldAlignment>
159  inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count) const
160  {
161  std::byte* p = nullptr;
162  if(const auto code = cudaMalloc(&p, count); code != cudaSuccess)
163  throw std::runtime_error(std::string{"cudaMalloc failed with code "} + cudaGetErrorString(code));
164  if(reinterpret_cast<std::uintptr_t>(p) & (FieldAlignment - 1 != 0u))
165  throw std::runtime_error{"cudaMalloc does not align sufficiently"};
166  return std::unique_ptr<std::byte[], decltype(deleter)>(p, deleter);
167  }
168  };
169 #endif
170 
173  template<typename Size, typename Dev>
174  struct AlpakaBuf
175  {
176  Dev& dev;
177 
178  template<std::size_t Alignment>
179  inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count) const
180  {
181  return alpaka::allocBuf<std::byte, Size>(dev, static_cast<Size>(count));
182  }
183  };
184 
185 #if __has_include(<sycl/sycl.hpp>)
190  struct SyclMallocShared
191  {
192  sycl::queue queue;
193 
194  static auto makeDeleter(sycl::queue q)
195  {
196  // create lambda in function independent of FieldAlignment template paramter to avoid different blob types
197  return [q](void* p) { sycl::free(p, q); };
198  }
199 
200  template<std::size_t FieldAlignment>
201  inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count) const
202  {
203  std::byte* p = sycl::aligned_alloc_shared<std::byte>(FieldAlignment, count, queue);
204  if(reinterpret_cast<std::uintptr_t>(p) & (FieldAlignment - 1 != 0u))
205  throw std::runtime_error{"sycl::malloc_shared does not align sufficiently"};
206  return std::unique_ptr<std::byte[], decltype(makeDeleter(queue))>(p, makeDeleter(queue));
207  }
208  };
209 #endif
210 } // namespace llama::bloballoc
#define LLAMA_EXPORT
Definition: macros.hpp:192
#define LLAMA_FN_HOST_ACC_INLINE
Definition: macros.hpp:96
auto allocBuf(const TDev &dev, const TExtent &extent)
An STL compatible allocator allowing to specify alignment.
void deallocate(T *p, std::size_t)
auto operator!=(const AlignedAllocator< T, Alignment > &other) const -> bool
auto allocate(std::size_t n) -> T *
auto operator==(const AlignedAllocator< T, Alignment > &) const -> bool
AlignedAllocator() noexcept=default
Allocates alpaka buffers as blobs.
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const
auto operator()(std::integral_constant< std::size_t, Alignment >, [[maybe_unused]] std::size_t count) const -> AlignedArray< Alignment >
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const -> std::shared_ptr< std::byte[]>
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const