alpaka
Abstraction Library for Parallel Kernel Acceleration
BlockSharedMemDynUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, RenĂ© Widera, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
10 
11 #include <cstddef>
12 #include <type_traits>
13 
14 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
15 
16 namespace alpaka
17 {
18  //! The GPU CUDA/HIP block shared memory allocator.
20  : public interface::Implements<ConceptBlockSharedDyn, BlockSharedMemDynUniformCudaHipBuiltIn>
21  {
22  };
23 
24 # if !defined(ALPAKA_HOST_ONLY)
25 
26 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
27 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
28 # endif
29 
30 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
31 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
32 # endif
33 
34  namespace trait
35  {
36  template<typename T>
38  {
39  __device__ static auto getMem(BlockSharedMemDynUniformCudaHipBuiltIn const&) -> T*
40  {
41  // Because unaligned access to variables is not allowed in device code,
42  // we use the widest possible alignment supported by CUDA types to have
43  // all types aligned correctly.
44  // See:
45  // - http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared
46  // - http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vector-types
47  extern __shared__ std::byte shMem alignas(std::max_align_t)[];
48  return reinterpret_cast<T*>(shMem);
49  }
50  };
51  } // namespace trait
52 
53 # endif
54 
55 } // namespace alpaka
56 
57 #endif
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
static __device__ auto getMem(BlockSharedMemDynUniformCudaHipBuiltIn const &) -> T *
The block shared dynamic memory get trait.
Definition: Traits.hpp:23