alpaka
Abstraction Library for Parallel Kernel Acceleration
BufCpu.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Alexander Matthes, Axel Huebl, Benjamin Worpitz, Andrea Bocci, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
9 #include "alpaka/core/Cuda.hpp"
10 #include "alpaka/core/Hip.hpp"
12 #include "alpaka/dev/DevCpu.hpp"
13 #include "alpaka/dev/Traits.hpp"
19 #include "alpaka/vec/Vec.hpp"
20 
21 #include <functional>
22 #include <memory>
23 #include <type_traits>
24 #include <utility>
25 
26 namespace alpaka
27 {
28  namespace detail
29  {
30  //! The CPU memory buffer.
31  template<typename TElem, typename TDim, typename TIdx>
32  class BufCpuImpl final
33  {
34  static_assert(
35  !std::is_const_v<TElem>,
36  "The elem type of the buffer can not be const because the C++ Standard forbids containers of const "
37  "elements!");
38  static_assert(!std::is_const_v<TIdx>, "The idx type of the buffer can not be const!");
39 
40  public:
41  template<typename TExtent>
43  DevCpu dev,
44  TElem* pMem,
45  std::function<void(TElem*)> deleter,
46  TExtent const& extent) noexcept
47  : m_dev(std::move(dev))
48  , m_extentElements(getExtentVecEnd<TDim>(extent))
49  , m_pMem(pMem)
50  , m_deleter(std::move(deleter))
51  {
53 
54  static_assert(
55  TDim::value == Dim<TExtent>::value,
56  "The dimensionality of TExtent and the dimensionality of the TDim template parameter have to be "
57  "identical!");
58  static_assert(
59  std::is_same_v<TIdx, Idx<TExtent>>,
60  "The idx type of TExtent and the TIdx template parameter have to be identical!");
61 
62 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
63  std::cout << __func__ << " e: " << m_extentElements << " ptr: " << static_cast<void*>(m_pMem)
64  << std::endl;
65 #endif
66  }
67 
68  BufCpuImpl(BufCpuImpl&&) = delete;
69  auto operator=(BufCpuImpl&&) -> BufCpuImpl& = delete;
70 
72  {
74 
75  // NOTE: m_pMem is allowed to be a nullptr here.
77  }
78 
79  public:
80  DevCpu const m_dev;
82  TElem* const m_pMem;
83  std::function<void(TElem*)> m_deleter;
84  };
85  } // namespace detail
86 
87  //! The CPU memory buffer.
88  template<typename TElem, typename TDim, typename TIdx>
89  class BufCpu : public internal::ViewAccessOps<BufCpu<TElem, TDim, TIdx>>
90  {
91  public:
92  template<typename TExtent, typename Deleter>
93  ALPAKA_FN_HOST BufCpu(DevCpu const& dev, TElem* pMem, Deleter deleter, TExtent const& extent)
95  std::make_shared<detail::BufCpuImpl<TElem, TDim, TIdx>>(dev, pMem, std::move(deleter), extent)}
96  {
97  }
98 
99  public:
100  std::shared_ptr<detail::BufCpuImpl<TElem, TDim, TIdx>> m_spBufCpuImpl;
101  };
102 
103  namespace trait
104  {
105  //! The BufCpu device type trait specialization.
106  template<typename TElem, typename TDim, typename TIdx>
107  struct DevType<BufCpu<TElem, TDim, TIdx>>
108  {
109  using type = DevCpu;
110  };
111 
112  //! The BufCpu device get trait specialization.
113  template<typename TElem, typename TDim, typename TIdx>
114  struct GetDev<BufCpu<TElem, TDim, TIdx>>
115  {
117  {
118  return buf.m_spBufCpuImpl->m_dev;
119  }
120  };
121 
122  //! The BufCpu dimension getter trait.
123  template<typename TElem, typename TDim, typename TIdx>
124  struct DimType<BufCpu<TElem, TDim, TIdx>>
125  {
126  using type = TDim;
127  };
128 
129  //! The BufCpu memory element type get trait specialization.
130  template<typename TElem, typename TDim, typename TIdx>
131  struct ElemType<BufCpu<TElem, TDim, TIdx>>
132  {
133  using type = TElem;
134  };
135 
136  //! The BufCpu width get trait specialization.
137  template<typename TElem, typename TDim, typename TIdx>
138  struct GetExtents<BufCpu<TElem, TDim, TIdx>>
139  {
141  {
142  return buf.m_spBufCpuImpl->m_extentElements;
143  }
144  };
145 
146  //! The BufCpu native pointer get trait specialization.
147  template<typename TElem, typename TDim, typename TIdx>
148  struct GetPtrNative<BufCpu<TElem, TDim, TIdx>>
149  {
150  ALPAKA_FN_HOST static auto getPtrNative(BufCpu<TElem, TDim, TIdx> const& buf) -> TElem const*
151  {
152  return buf.m_spBufCpuImpl->m_pMem;
153  }
154 
156  {
157  return buf.m_spBufCpuImpl->m_pMem;
158  }
159  };
160 
161  //! The BufCpu pointer on device get trait specialization.
162  template<typename TElem, typename TDim, typename TIdx>
163  struct GetPtrDev<BufCpu<TElem, TDim, TIdx>, DevCpu>
164  {
165  ALPAKA_FN_HOST static auto getPtrDev(BufCpu<TElem, TDim, TIdx> const& buf, DevCpu const& dev)
166  -> TElem const*
167  {
168  if(dev == getDev(buf))
169  {
170  return buf.m_spBufCpuImpl->m_pMem;
171  }
172  else
173  {
174  throw std::runtime_error("The buffer is not accessible from the given device!");
175  }
176  }
177 
178  ALPAKA_FN_HOST static auto getPtrDev(BufCpu<TElem, TDim, TIdx>& buf, DevCpu const& dev) -> TElem*
179  {
180  if(dev == getDev(buf))
181  {
182  return buf.m_spBufCpuImpl->m_pMem;
183  }
184  else
185  {
186  throw std::runtime_error("The buffer is not accessible from the given device!");
187  }
188  }
189  };
190 
191  //! The BufCpu memory allocation trait specialization.
192  template<typename TElem, typename TDim, typename TIdx>
193  struct BufAlloc<TElem, TDim, TIdx, DevCpu>
194  {
195  template<typename TExtent>
196  ALPAKA_FN_HOST static auto allocBuf(DevCpu const& dev, TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
197  {
199 
200  // If ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT is defined, positive, and a power of 2, use it as the
201  // default alignment for host memory allocations. Otherwise, the alignment is chosen to enable optimal
202  // performance dependant on the target architecture.
203 #if defined(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT)
204  static_assert(
205  ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT > 0
206  && ((ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT & (ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT - 1)) == 0),
207  "If defined, ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT must be a power of 2.");
208  constexpr std::size_t alignment = static_cast<std::size_t>(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT);
209 #else
210  constexpr std::size_t alignment = core::vectorization::defaultAlignment;
211 #endif
212  // alpaka::AllocCpuAligned is stateless
214  static_assert(std::is_empty_v<Allocator>, "AllocCpuAligned is expected to be stateless");
215  auto* memPtr = alpaka::malloc<TElem>(Allocator{}, static_cast<std::size_t>(getExtentProduct(extent)));
216  auto deleter = [](TElem* ptr) { alpaka::free(Allocator{}, ptr); };
217 
218  return BufCpu<TElem, TDim, TIdx>(dev, memPtr, std::move(deleter), extent);
219  }
220  };
221 
222  //! The BufCpu stream-ordered memory allocation trait specialization.
223  template<typename TElem, typename TDim, typename TIdx>
224  struct AsyncBufAlloc<TElem, TDim, TIdx, DevCpu>
225  {
226  template<typename TQueue, typename TExtent>
227  ALPAKA_FN_HOST static auto allocAsyncBuf(TQueue queue, TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
228  {
230 
231  static_assert(
232  std::is_same_v<Dev<TQueue>, DevCpu>,
233  "The BufCpu buffer can only be used with a queue on a DevCpu device!");
234  DevCpu const& dev = getDev(queue);
235 
236  // If ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT is defined, positive, and a power of 2, use it as the
237  // default alignment for host memory allocations. Otherwise, the alignment is chosen to enable optimal
238  // performance dependant on the target architecture.
239 #if defined(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT)
240  static_assert(
241  ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT > 0
242  && ((ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT & (ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT - 1)) == 0),
243  "If defined, ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT must be a power of 2.");
244  constexpr std::size_t alignment = static_cast<std::size_t>(ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT);
245 #else
246  constexpr std::size_t alignment = core::vectorization::defaultAlignment;
247 #endif
248  // alpaka::AllocCpuAligned is stateless
250  static_assert(std::is_empty_v<Allocator>, "AllocCpuAligned is expected to be stateless");
251  auto* memPtr = alpaka::malloc<TElem>(Allocator{}, static_cast<std::size_t>(getExtentProduct(extent)));
252  auto deleter = [l_queue = std::move(queue)](TElem* ptr) mutable
253  {
255  l_queue,
256  [ptr]()
257  {
258  // free the memory
259  alpaka::free(Allocator{}, ptr);
260  });
261  };
262 
263  return BufCpu<TElem, TDim, TIdx>(dev, memPtr, std::move(deleter), extent);
264  }
265  };
266 
267  //! The BufCpu stream-ordered memory allocation capability trait specialization.
268  template<typename TDim>
269  struct HasAsyncBufSupport<TDim, DevCpu> : public std::true_type
270  {
271  };
272 
273  //! The pinned/mapped memory allocation trait specialization.
274  template<typename TElem, typename TDim, typename TIdx>
275  struct BufAllocMapped<PlatformCpu, TElem, TDim, TIdx>
276  {
277  template<typename TExtent>
279  DevCpu const& host,
280  PlatformCpu const& /*platform*/,
281  TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
282  {
283  // Allocate standard host memory.
284  return allocBuf<TElem, TIdx>(host, extent);
285  }
286  };
287 
288  //! The pinned/mapped memory allocation capability trait specialization.
289  template<>
290  struct HasMappedBufSupport<PlatformCpu> : public std::true_type
291  {
292  };
293 
294  //! The BufCpu offset get trait specialization.
295  template<typename TElem, typename TDim, typename TIdx>
296  struct GetOffsets<BufCpu<TElem, TDim, TIdx>>
297  {
299  {
300  return Vec<TDim, TIdx>::zeros();
301  }
302  };
303 
304  //! The BufCpu idx type trait specialization.
305  template<typename TElem, typename TDim, typename TIdx>
306  struct IdxType<BufCpu<TElem, TDim, TIdx>>
307  {
308  using type = TIdx;
309  };
310  } // namespace trait
311 } // namespace alpaka
312 
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
The CPU boost aligned allocator.
The CPU memory buffer.
Definition: BufCpu.hpp:90
ALPAKA_FN_HOST BufCpu(DevCpu const &dev, TElem *pMem, Deleter deleter, TExtent const &extent)
Definition: BufCpu.hpp:93
std::shared_ptr< detail::BufCpuImpl< TElem, TDim, TIdx > > m_spBufCpuImpl
Definition: BufCpu.hpp:100
The CPU device handle.
Definition: DevCpu.hpp:56
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
Definition: Vec.hpp:126
The CPU memory buffer.
Definition: BufCpu.hpp:33
std::function< void(TElem *)> m_deleter
Definition: BufCpu.hpp:83
ALPAKA_FN_HOST ~BufCpuImpl()
Definition: BufCpu.hpp:71
ALPAKA_FN_HOST BufCpuImpl(DevCpu dev, TElem *pMem, std::function< void(TElem *)> deleter, TExtent const &extent) noexcept
Definition: BufCpu.hpp:42
Vec< TDim, TIdx > const m_extentElements
Definition: BufCpu.hpp:81
BufCpuImpl(BufCpuImpl &&)=delete
auto operator=(BufCpuImpl &&) -> BufCpuImpl &=delete
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
constexpr std::size_t defaultAlignment
Definition: Vectorize.hpp:34
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
Definition: Traits.hpp:134
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition: Traits.hpp:41
typename trait::DevType< T >::type Dev
The device type trait alias template to remove the ::type.
Definition: Traits.hpp:56
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
The CPU device platform.
Definition: PlatformCpu.hpp:18
static ALPAKA_FN_HOST auto allocAsyncBuf(TQueue queue, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition: BufCpu.hpp:227
The stream-ordered memory allocator trait.
Definition: Traits.hpp:31
static ALPAKA_FN_HOST auto allocMappedBuf(DevCpu const &host, PlatformCpu const &, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition: BufCpu.hpp:278
The pinned/mapped memory allocator trait.
Definition: Traits.hpp:41
static ALPAKA_FN_HOST auto allocBuf(DevCpu const &dev, TExtent const &extent) -> BufCpu< TElem, TDim, TIdx >
Definition: BufCpu.hpp:196
The memory allocator trait.
Definition: Traits.hpp:27
The device type trait.
Definition: Traits.hpp:23
The dimension getter type trait.
Definition: Traits.hpp:14
The element type trait.
Definition: Traits.hpp:16
static ALPAKA_FN_HOST auto getDev(BufCpu< TElem, TDim, TIdx > const &buf) -> DevCpu
Definition: BufCpu.hpp:116
The device get trait.
Definition: Traits.hpp:27
ALPAKA_FN_HOST auto operator()(BufCpu< TElem, TDim, TIdx > const &buf)
Definition: BufCpu.hpp:140
The GetExtents trait for getting the extents of an object as an alpaka::Vec.
Definition: Traits.hpp:37
ALPAKA_FN_HOST auto operator()(BufCpu< TElem, TDim, TIdx > const &) const -> Vec< TDim, TIdx >
Definition: BufCpu.hpp:298
The GetOffsets trait for getting the offsets of an object as an alpaka::Vec.
Definition: Traits.hpp:33
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > &buf, DevCpu const &dev) -> TElem *
Definition: BufCpu.hpp:178
static ALPAKA_FN_HOST auto getPtrDev(BufCpu< TElem, TDim, TIdx > const &buf, DevCpu const &dev) -> TElem const *
Definition: BufCpu.hpp:165
The pointer on device get trait.
Definition: Traits.hpp:58
static ALPAKA_FN_HOST auto getPtrNative(BufCpu< TElem, TDim, TIdx > &buf) -> TElem *
Definition: BufCpu.hpp:155
static ALPAKA_FN_HOST auto getPtrNative(BufCpu< TElem, TDim, TIdx > const &buf) -> TElem const *
Definition: BufCpu.hpp:150
The native pointer get trait.
Definition: Traits.hpp:54
The stream-ordered memory allocation capability trait.
Definition: Traits.hpp:36
The pinned/mapped memory allocation capability trait.
Definition: Traits.hpp:46
The idx type trait.
Definition: Traits.hpp:25