alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BufCpu.hpp
Go to the documentation of this file.
1/* Copyright 2022 Alexander Matthes, Axel Huebl, Benjamin Worpitz, Andrea Bocci, Jan Stephan, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/core/Hip.hpp"
12#include "alpaka/dev/DevCpu.hpp"
13#include "alpaka/dev/Traits.hpp"
19#include "alpaka/vec/Vec.hpp"
20
21#include <functional>
22#include <memory>
23#include <type_traits>
24#include <utility>
25
26namespace alpaka
27{
28 namespace detail
29 {
30 //! The CPU memory buffer.
31 template<typename TElem, typename TDim, typename TIdx>
32 class BufCpuImpl final
33 {
34 static_assert(
35 !std::is_const_v<TElem>,
36 "The elem type of the buffer can not be const because the C++ Standard forbids containers of const "
37 "elements!");
38 static_assert(!std::is_const_v<TIdx>, "The idx type of the buffer can not be const!");
39
40 public:
41 template<typename TExtent>
43 DevCpu dev,
44 TElem* pMem,
45 std::function<void(TElem*)> deleter,
46 TExtent const& extent) noexcept
47 : m_dev(std::move(dev))
48 , m_extentElements(getExtentVecEnd<TDim>(extent))
49 , m_pMem(pMem)
50 , m_deleter(std::move(deleter))
51 {
53
54 static_assert(
55 TDim::value == Dim<TExtent>::value,
56 "The dimensionality of TExtent and the dimensionality of the TDim template parameter have to be "
57 "identical!");
58 static_assert(
59 std::is_same_v<TIdx, Idx<TExtent>>,
60 "The idx type of TExtent and the TIdx template parameter have to be identical!");
61
62#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
63 std::cout << __func__ << " e: " << m_extentElements << " ptr: " << static_cast<void*>(m_pMem)
64 << std::endl;
65#endif
66 }
67
69 auto operator=(BufCpuImpl&&) -> BufCpuImpl& = delete;
70
72 {
74
75 // NOTE: m_pMem is allowed to be a nullptr here.
77 }
78
79 public:
82 TElem* const m_pMem;
83 std::function<void(TElem*)> m_deleter;
84 };
85 } // namespace detail
86
87 //! The CPU memory buffer.
88 template<typename TElem, typename TDim, typename TIdx>
89 class BufCpu : public internal::ViewAccessOps<BufCpu<TElem, TDim, TIdx>>
90 {
91 public:
92 template<typename TExtent, typename Deleter>
93 ALPAKA_FN_HOST BufCpu(DevCpu const& dev, TElem* pMem, Deleter deleter, TExtent const& extent)
95 std::make_shared<detail::BufCpuImpl<TElem, TDim, TIdx>>(dev, pMem, std::move(deleter), extent)}
96 {
97 }
98
99 public:
100 std::shared_ptr<detail::BufCpuImpl<TElem, TDim, TIdx>> m_spBufCpuImpl;
101 };
102
103 namespace trait
104 {
105 //! The BufCpu device type trait specialization.
106 template<typename TElem, typename TDim, typename TIdx>
107 struct DevType<BufCpu<TElem, TDim, TIdx>>
108 {
109 using type = DevCpu;
110 };
111
112 //! The BufCpu device get trait specialization.
113 template<typename TElem, typename TDim, typename TIdx>
114 struct GetDev<BufCpu<TElem, TDim, TIdx>>
115 {
117 {
118 return buf.m_spBufCpuImpl->m_dev;
119 }
120 };
121
122 //! The BufCpu dimension getter trait.
123 template<typename TElem, typename TDim, typename TIdx>
124 struct DimType<BufCpu<TElem, TDim, TIdx>>
125 {
126 using type = TDim;
127 };
128
129 //! The BufCpu memory element type get trait specialization.
130 template<typename TElem, typename TDim, typename TIdx>
131 struct ElemType<BufCpu<TElem, TDim, TIdx>>
132 {
133 using type = TElem;
134 };
135
136 //! The BufCpu width get trait specialization.
137 template<typename TElem, typename TDim, typename TIdx>
138 struct GetExtents<BufCpu<TElem, TDim, TIdx>>
139 {
141 {
142 return buf.m_spBufCpuImpl->m_extentElements;
143 }
144 };
145
146 //! The BufCpu native pointer get trait specialization.
147 template<typename TElem, typename TDim, typename TIdx>
148 struct GetPtrNative<BufCpu<TElem, TDim, TIdx>>
149 {
150 ALPAKA_FN_HOST static auto getPtrNative(BufCpu<TElem, TDim, TIdx> const& buf) -> TElem const*
151 {
152 return buf.m_spBufCpuImpl->m_pMem;
153 }
154
156 {
157 return buf.m_spBufCpuImpl->m_pMem;
158 }
159 };
160
161 //! The BufCpu pointer on device get trait specialization.
162 template<typename TElem, typename TDim, typename TIdx>
163 struct GetPtrDev<BufCpu<TElem, TDim, TIdx>, DevCpu>
164 {
165 ALPAKA_FN_HOST static auto getPtrDev(BufCpu<TElem, TDim, TIdx> const& buf, DevCpu const& dev)
166 -> TElem const*
167 {
168 if(dev == getDev(buf))
169 {
170 return buf.m_spBufCpuImpl->m_pMem;
171 }
172 else
173 {
174 throw std::runtime_error("The buffer is not accessible from the given device!");
175 }
176 }
177
178 ALPAKA_FN_HOST static auto getPtrDev(BufCpu<TElem, TDim, TIdx>& buf, DevCpu const& dev) -> TElem*
179 {
180 if(dev == getDev(buf))
181 {
182 return buf.m_spBufCpuImpl->m_pMem;
183 }
184 else
185 {
186 throw std::runtime_error("The buffer is not accessible from the given device!");
187 }
188 }
189 };
190
191 //! The BufCpu memory allocation trait specialization.
192 template<typename TElem, typename TDim, typename TIdx>
193 struct BufAlloc<TElem, TDim, TIdx, DevCpu>
194 {
195 template<typename TExtent>
196 ALPAKA_FN_HOST static auto allocBuf(DevCpu const& dev, TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
197 {
199
200 // If ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT is defined, positive, and a power of 2, use it as the
201 // default alignment for host memory allocations. Otherwise, the alignment is chosen to enable optimal
202 // performance dependant on the target architecture.
203#if defined(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT)
204 static_assert(
205 ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT > 0
206 && ((ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT & (ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT - 1)) == 0),
207 "If defined, ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT must be a power of 2.");
208 constexpr std::size_t alignment = static_cast<std::size_t>(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT);
209#else
210 constexpr std::size_t alignment = core::vectorization::defaultAlignment;
211#endif
212 // alpaka::AllocCpuAligned is stateless
214 static_assert(std::is_empty_v<Allocator>, "AllocCpuAligned is expected to be stateless");
215 auto* memPtr = alpaka::malloc<TElem>(Allocator{}, static_cast<std::size_t>(getExtentProduct(extent)));
216 auto deleter = [](TElem* ptr) { alpaka::free(Allocator{}, ptr); };
217
218 return BufCpu<TElem, TDim, TIdx>(dev, memPtr, std::move(deleter), extent);
219 }
220 };
221
222 //! The BufCpu stream-ordered memory allocation trait specialization.
223 template<typename TElem, typename TDim, typename TIdx>
224 struct AsyncBufAlloc<TElem, TDim, TIdx, DevCpu>
225 {
226 template<typename TQueue, typename TExtent>
227 ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
228 {
230
231 static_assert(
232 std::is_same_v<Dev<TQueue>, DevCpu>,
233 "The BufCpu buffer can only be used with a queue on a DevCpu device!");
234 DevCpu const& dev = getDev(queue);
235
236 // If ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT is defined, positive, and a power of 2, use it as the
237 // default alignment for host memory allocations. Otherwise, the alignment is chosen to enable optimal
238 // performance dependant on the target architecture.
239#if defined(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT)
240 static_assert(
241 ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT > 0
242 && ((ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT & (ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT - 1)) == 0),
243 "If defined, ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT must be a power of 2.");
244 constexpr std::size_t alignment = static_cast<std::size_t>(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT);
245#else
246 constexpr std::size_t alignment = core::vectorization::defaultAlignment;
247#endif
248 // alpaka::AllocCpuAligned is stateless
250 static_assert(std::is_empty_v<Allocator>, "AllocCpuAligned is expected to be stateless");
251 auto* memPtr = alpaka::malloc<TElem>(Allocator{}, static_cast<std::size_t>(getExtentProduct(extent)));
252 auto deleter = [l_queue = std::move(queue)](TElem* ptr) mutable
253 {
255 l_queue,
256 [ptr]()
257 {
258 // free the memory
259 alpaka::free(Allocator{}, ptr);
260 });
261 };
262
263 return BufCpu<TElem, TDim, TIdx>(dev, memPtr, std::move(deleter), extent);
264 }
265 };
266
267 //! The BufCpu stream-ordered memory allocation capability trait specialization.
268 template<typename TDim>
269 struct HasAsyncBufSupport<TDim, DevCpu> : public std::true_type
270 {
271 };
272
273 //! The pinned/mapped memory allocation trait specialization.
274 template<typename TElem, typename TDim, typename TIdx>
275 struct BufAllocMapped<PlatformCpu, TElem, TDim, TIdx>
276 {
277 template<typename TExtent>
279 DevCpu const& host,
280 PlatformCpu const& /*platform*/,
281 TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
282 {
283 // Allocate standard host memory.
284 return allocBuf<TElem, TIdx>(host, extent);
285 }
286 };
287
288 //! The pinned/mapped memory allocation capability trait specialization.
289 template<>
290 struct HasMappedBufSupport<PlatformCpu> : public std::true_type
291 {
292 };
293
294 //! The BufCpu offset get trait specialization.
295 template<typename TElem, typename TDim, typename TIdx>
296 struct GetOffsets<BufCpu<TElem, TDim, TIdx>>
297 {
302 };
303
304 //! The BufCpu idx type trait specialization.
305 template<typename TElem, typename TDim, typename TIdx>
306 struct IdxType<BufCpu<TElem, TDim, TIdx>>
307 {
308 using type = TIdx;
309 };
310 } // namespace trait
311} // namespace alpaka
312
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
The CPU boost aligned allocator.
The CPU memory buffer.
Definition BufCpu.hpp:90
ALPAKA_FN_HOST BufCpu(DevCpu const &dev, TElem *pMem, Deleter deleter, TExtent const &extent)
Definition BufCpu.hpp:93
std::shared_ptr< detail::BufCpuImpl< TElem, TDim, TIdx > > m_spBufCpuImpl
Definition BufCpu.hpp:100
The CPU device handle.
Definition DevCpu.hpp:56
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
Definition Vec.hpp:99
The CPU memory buffer.
Definition BufCpu.hpp:33
std::function< void(TElem *)> m_deleter
Definition BufCpu.hpp:83
ALPAKA_FN_HOST ~BufCpuImpl()
Definition BufCpu.hpp:71
ALPAKA_FN_HOST BufCpuImpl(DevCpu dev, TElem *pMem, std::function< void(TElem *)> deleter, TExtent const &extent) noexcept
Definition BufCpu.hpp:42
Vec< TDim, TIdx > const m_extentElements
Definition BufCpu.hpp:81
BufCpuImpl(BufCpuImpl &&)=delete
auto operator=(BufCpuImpl &&) -> BufCpuImpl &=delete
#define ALPAKA_FN_HOST
Definition Common.hpp:40
constexpr std::size_t defaultAlignment
Definition Vectorize.hpp:34
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
Definition Traits.hpp:134
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition Traits.hpp:41
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
STL namespace.
The CPU device platform.
static ALPAKA_FN_HOST auto allocAsyncBuf(TQueue queue, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition BufCpu.hpp:227
The stream-ordered memory allocator trait.
Definition Traits.hpp:31
static ALPAKA_FN_HOST auto allocMappedBuf(DevCpu const &host, PlatformCpu const &, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition BufCpu.hpp:278
The pinned/mapped memory allocator trait.
Definition Traits.hpp:41
static ALPAKA_FN_HOST auto allocBuf(DevCpu const &dev, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition BufCpu.hpp:196
The memory allocator trait.
Definition Traits.hpp:27
The device type trait.
Definition Traits.hpp:23
The dimension getter type trait.
Definition Traits.hpp:14
The element type trait.
Definition Traits.hpp:16
static ALPAKA_FN_HOST auto getDev(BufCpu< TElem, TDim, TIdx > const &buf) -> DevCpu
Definition BufCpu.hpp:116
The device get trait.
Definition Traits.hpp:27
ALPAKA_FN_HOST auto operator()(BufCpu< TElem, TDim, TIdx > const &buf)
Definition BufCpu.hpp:140
The GetExtents trait for getting the extents of an object as an alpaka::Vec.
Definition Traits.hpp:37
ALPAKA_FN_HOST auto operator()(BufCpu< TElem, TDim, TIdx > const &) const -> Vec< TDim, TIdx >
Definition BufCpu.hpp:298
The GetOffsets trait for getting the offsets of an object as an alpaka::Vec.
Definition Traits.hpp:33
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > &buf, DevCpu const &dev) -> TElem *
Definition BufCpu.hpp:178
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > const &buf, DevCpu const &dev) -> TElem const *
Definition BufCpu.hpp:165
The pointer on device get trait.
Definition Traits.hpp:58
static ALPAKA_FN_HOST auto getPtrNative(BufCpu< TElem, TDim, TIdx > &buf) -> TElem *
Definition BufCpu.hpp:155
static ALPAKA_FN_HOST auto getPtrNative(BufCpu< TElem, TDim, TIdx > const &buf) -> TElem const *
Definition BufCpu.hpp:150
The native pointer get trait.
Definition Traits.hpp:54
The stream-ordered memory allocation capability trait.
Definition Traits.hpp:36
The pinned/mapped memory allocation capability trait.
Definition Traits.hpp:46
The idx type trait.
Definition Traits.hpp:25