alpaka
Abstraction Library for Parallel Kernel Acceleration
BlockSharedMemDynUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, RenĂ© Widera, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
10 
11 #include <type_traits>
12 
13 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
14 
15 namespace alpaka
16 {
17  //! The GPU CUDA/HIP block shared memory allocator.
19  : public concepts::Implements<ConceptBlockSharedDyn, BlockSharedMemDynUniformCudaHipBuiltIn>
20  {
21  };
22 
23 # if !defined(ALPAKA_HOST_ONLY)
24 
25 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
26 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
27 # endif
28 
29 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
30 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
31 # endif
32 
33  namespace trait
34  {
35  template<typename T>
37  {
38  __device__ static auto getMem(BlockSharedMemDynUniformCudaHipBuiltIn const&) -> T*
39  {
40  // Because unaligned access to variables is not allowed in device code,
41  // we have to use the widest possible type to have all types aligned correctly.
42  // See: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared
43  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#vector-types
44  extern __shared__ float4 shMem[];
45  return reinterpret_cast<T*>(shMem);
46  }
47  };
48  } // namespace trait
49 
50 # endif
51 
52 } // namespace alpaka
53 
54 #endif
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
static __device__ auto getMem(BlockSharedMemDynUniformCudaHipBuiltIn const &) -> T *
The block shared dynamic memory get trait.
Definition: Traits.hpp:23