alpaka
Abstraction Library for Parallel Kernel Acceleration
AllocCpuAligned.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Axel Huebl, Benjamin Worpitz, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/core/Common.hpp"
12 
13 #include <algorithm>
14 
15 namespace alpaka
16 {
17  //! The CPU boost aligned allocator.
18  //!
19  //! \tparam TAlignment An integral constant containing the alignment.
20  template<typename TAlignment>
21  class AllocCpuAligned : public concepts::Implements<ConceptMemAlloc, AllocCpuAligned<TAlignment>>
22  {
23  };
24 
25  namespace trait
26  {
27  //! The CPU boost aligned allocator memory allocation trait specialization.
28  template<typename T, typename TAlignment>
29  struct Malloc<T, AllocCpuAligned<TAlignment>>
30  {
31  ALPAKA_FN_HOST static auto malloc(
32  AllocCpuAligned<TAlignment> const& /* alloc */,
33  std::size_t const& sizeElems) -> T*
34  {
35 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
36  // For CUDA/HIP host memory must be aligned to 4 kib to pin it with `cudaHostRegister`,
37  // this was described in older programming guides but was removed later.
38  // From testing with PIConGPU and cuda-memcheck we found out that the alignment is still required.
39  //
40  // For HIP the required alignment is the size of a cache line.
41  // https://rocm-developer-tools.github.io/HIP/group__Memory.html#gab8258f051e1a1f7385f794a15300e674
42  // On most x86 systems the page size is 4KiB and on OpenPower 64KiB.
43  // Page size can be tested on the terminal with: `getconf PAGE_SIZE`
44  size_t minAlignement = std::max<size_t>(TAlignment::value, cpu::detail::getPageSize());
45 #else
46  constexpr size_t minAlignement = TAlignment::value;
47 #endif
48  return reinterpret_cast<T*>(core::alignedAlloc(minAlignement, sizeElems * sizeof(T)));
49  }
50  };
51 
52  //! The CPU boost aligned allocator memory free trait specialization.
53  template<typename T, typename TAlignment>
54  struct Free<T, AllocCpuAligned<TAlignment>>
55  {
56  ALPAKA_FN_HOST static auto free(AllocCpuAligned<TAlignment> const& /* alloc */, T const* const ptr) -> void
57  {
58 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
59  size_t minAlignement = std::max<size_t>(TAlignment::value, cpu::detail::getPageSize());
60 #else
61  constexpr size_t minAlignement = TAlignment::value;
62 #endif
63  core::alignedFree(minAlignement, const_cast<void*>(reinterpret_cast<void const*>(ptr)));
64  }
65  };
66  } // namespace trait
67 } // namespace alpaka
The CPU boost aligned allocator.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, void *ptr)
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
size_t getPageSize()
Definition: SysInfo.hpp:124
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
static ALPAKA_FN_HOST auto free(AllocCpuAligned< TAlignment > const &, T const *const ptr) -> void
The memory free trait.
Definition: Traits.hpp:28
static ALPAKA_FN_HOST auto malloc(AllocCpuAligned< TAlignment > const &, std::size_t const &sizeElems) -> T *
The memory allocation trait.
Definition: Traits.hpp:24