alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AllocCpuAligned.hpp
Go to the documentation of this file.
1/* Copyright 2022 Axel Huebl, Benjamin Worpitz, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
12
13#include <algorithm>
14
15namespace alpaka
16{
17 //! The CPU boost aligned allocator.
18 //!
19 //! \tparam TAlignment An integral constant containing the alignment.
20 template<typename TAlignment>
21 class AllocCpuAligned : public interface::Implements<ConceptMemAlloc, AllocCpuAligned<TAlignment>>
22 {
23 };
24
25 namespace trait
26 {
27 //! The CPU boost aligned allocator memory allocation trait specialization.
28 template<typename T, typename TAlignment>
29 struct Malloc<T, AllocCpuAligned<TAlignment>>
30 {
31 ALPAKA_FN_HOST static auto malloc(
32 AllocCpuAligned<TAlignment> const& /* alloc */,
33 std::size_t const& sizeElems) -> T*
34 {
35#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
36 // For CUDA/HIP host memory must be aligned to 4 kib to pin it with `cudaHostRegister`,
37 // this was described in older programming guides but was removed later.
38 // From testing with PIConGPU and cuda-memcheck we found out that the alignment is still required.
39 //
40 // For HIP the required alignment is the size of a cache line.
41 // https://rocm-developer-tools.github.io/HIP/group__Memory.html#gab8258f051e1a1f7385f794a15300e674
42 // On most x86 systems the page size is 4KiB and on OpenPower 64KiB.
43 // Page size can be tested on the terminal with: `getconf PAGE_SIZE`
44 size_t minAlignement = std::max<size_t>(TAlignment::value, cpu::detail::getPageSize());
45#else
46 constexpr size_t minAlignement = TAlignment::value;
47#endif
48 return reinterpret_cast<T*>(core::alignedAlloc(minAlignement, sizeElems * sizeof(T)));
49 }
50 };
51
52 //! The CPU boost aligned allocator memory free trait specialization.
53 template<typename T, typename TAlignment>
54 struct Free<T, AllocCpuAligned<TAlignment>>
55 {
56 ALPAKA_FN_HOST static auto free(AllocCpuAligned<TAlignment> const& /* alloc */, T const* const ptr) -> void
57 {
58#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
59 size_t minAlignement = std::max<size_t>(TAlignment::value, cpu::detail::getPageSize());
60#else
61 constexpr size_t minAlignement = TAlignment::value;
62#endif
63 core::alignedFree(minAlignement, const_cast<void*>(reinterpret_cast<void const*>(ptr)));
64 }
65 };
66 } // namespace trait
67} // namespace alpaka
The CPU boost aligned allocator.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
ALPAKA_FN_INLINE ALPAKA_FN_HOST void alignedFree(size_t alignment, void *ptr)
ALPAKA_FN_INLINE ALPAKA_FN_HOST auto alignedAlloc(size_t alignment, size_t size) -> void *
The alpaka accelerator library.
ALPAKA_FN_HOST auto free(TAlloc const &alloc, T const *const ptr) -> void
Frees the memory identified by the given pointer.
Definition Traits.hpp:41
ALPAKA_FN_HOST auto malloc(TAlloc const &alloc, std::size_t const &sizeElems) -> T *
Definition Traits.hpp:33
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15