13 #if __has_include(<cuda_runtime.h>)
14 # include <cuda_runtime.h>
16 #if __has_include(<sycl/sycl.hpp>)
17 # include <sycl/sycl.hpp>
22 template<
typename TElem,
typename TIdx,
typename TExtent,
typename TDev>
23 auto allocBuf(
const TDev& dev,
const TExtent& extent);
31 template<std::
size_t BytesToReserve>
34 template<std::
size_t Alignment>
39 template<std::
size_t Alignment>
41 std::integral_constant<std::size_t, Alignment>,
44 assert(count == BytesToReserve);
48 #ifdef __cpp_lib_concepts
57 template<std::
size_t Alignment>
58 auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count)
const
61 =
static_cast<std::byte*
>(::operator
new[](count *
sizeof(std::byte), std::align_val_t{Alignment}));
62 auto deleter = [](std::byte* ptr) { ::operator
delete[](ptr, std::align_val_t{Alignment}); };
63 return std::unique_ptr<std::byte[], decltype(deleter)>{ptr, deleter};
66 #ifdef __cpp_lib_concepts
67 static_assert(BlobAllocator<UniquePtr>);
75 template<std::
size_t Alignment>
76 auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count)
const
77 -> std::shared_ptr<std::byte[]>
80 =
static_cast<std::byte*
>(::operator
new[](count *
sizeof(std::byte), std::align_val_t{Alignment}));
81 auto deleter = [](std::byte* ptr) { ::operator
delete[](ptr, std::align_val_t{Alignment}); };
82 return {ptr, deleter};
85 #ifdef __cpp_lib_concepts
86 static_assert(BlobAllocator<SharedPtr>);
91 template<
typename T, std::
size_t Alignment>
105 return static_cast<T*
>(::operator
new[](n *
sizeof(T), std::align_val_t{Alignment}));
110 ::operator
delete[](p, std::align_val_t{Alignment});
113 template<
typename T2>
121 return !(*
this == other);
135 template<std::
size_t Alignment>
136 inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count)
const
138 return std::vector<std::byte, AlignedAllocator<std::byte, Alignment>>(count);
141 #ifdef __cpp_lib_concepts
142 static_assert(BlobAllocator<Vector>);
145 #if __has_include(<cuda_runtime.h>)
152 inline static const auto deleter = [](
void* p)
154 if(
const auto code = cudaFree(p); code != cudaSuccess)
155 throw std::runtime_error(std::string{
"cudaFree failed with code "} + cudaGetErrorString(code));
158 template<std::
size_t FieldAlignment>
159 inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count)
const
161 std::byte* p =
nullptr;
162 if(
const auto code = cudaMalloc(&p, count); code != cudaSuccess)
163 throw std::runtime_error(std::string{
"cudaMalloc failed with code "} + cudaGetErrorString(code));
164 if(
reinterpret_cast<std::uintptr_t
>(p) & (
FieldAlignment - 1 != 0u))
165 throw std::runtime_error{
"cudaMalloc does not align sufficiently"};
166 return std::unique_ptr<std::byte[], decltype(deleter)>(p, deleter);
173 template<
typename Size,
typename Dev>
178 template<std::
size_t Alignment>
179 inline auto operator()(std::integral_constant<std::size_t, Alignment>, std::size_t count)
const
181 return alpaka::allocBuf<std::byte, Size>(dev,
static_cast<Size
>(count));
185 #if __has_include(<sycl/sycl.hpp>)
190 struct SyclMallocShared
194 static auto makeDeleter(sycl::queue q)
197 return [q](
void* p) { sycl::free(p, q); };
200 template<std::
size_t FieldAlignment>
201 inline auto operator()(std::integral_constant<std::size_t, FieldAlignment>, std::size_t count)
const
203 std::byte* p = sycl::aligned_alloc_shared<std::byte>(
FieldAlignment, count, queue);
204 if(
reinterpret_cast<std::uintptr_t
>(p) & (
FieldAlignment - 1 != 0u))
205 throw std::runtime_error{
"sycl::malloc_shared does not align sufficiently"};
206 return std::unique_ptr<std::byte[], decltype(makeDeleter(queue))>(p, makeDeleter(queue));
#define LLAMA_FN_HOST_ACC_INLINE
auto allocBuf(const TDev &dev, const TExtent &extent)
An STL compatible allocator allowing to specify alignment.
void deallocate(T *p, std::size_t)
auto operator!=(const AlignedAllocator< T, Alignment > &other) const -> bool
auto allocate(std::size_t n) -> T *
auto operator==(const AlignedAllocator< T, Alignment > &) const -> bool
AlignedAllocator() noexcept=default
Allocates alpaka buffers as blobs.
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const
auto operator()(std::integral_constant< std::size_t, Alignment >, [[maybe_unused]] std::size_t count) const -> AlignedArray< Alignment >
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const -> std::shared_ptr< std::byte[]>
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const
auto operator()(std::integral_constant< std::size_t, Alignment >, std::size_t count) const