alpaka
Abstraction Library for Parallel Kernel Acceleration
WorkDivUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Axel Huebl, Benjamin Worpitz, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
9 #include "alpaka/core/Cuda.hpp"
10 #include "alpaka/core/Hip.hpp"
11 #include "alpaka/idx/Traits.hpp"
12 #include "alpaka/vec/Vec.hpp"
14 
15 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
16 
17 namespace alpaka
18 {
19  //! The GPU CUDA/HIP accelerator work division.
20  template<typename TDim, typename TIdx>
22  : public concepts::Implements<ConceptWorkDiv, WorkDivUniformCudaHipBuiltIn<TDim, TIdx>>
23  {
24  public:
26  : m_threadElemExtent(threadElemExtent)
27  {
28  }
29 
30  // \TODO: Optimize! Add WorkDivUniformCudaHipBuiltInNoElems that has no member m_threadElemExtent as well as
31  // AccGpuUniformCudaHipRtNoElems. Use it instead of AccGpuUniformCudaHipRt if the thread element extent is one
32  // to reduce the register usage.
34  };
35 
36 # if !defined(ALPAKA_HOST_ONLY)
37 
38 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
39 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
40 # endif
41 
42 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
43 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
44 # endif
45 
46  namespace trait
47  {
48  //! The GPU CUDA/HIP accelerator work division dimension get trait specialization.
49  template<typename TDim, typename TIdx>
51  {
52  using type = TDim;
53  };
54 
55  //! The GPU CUDA/HIP accelerator work division idx type trait specialization.
56  template<typename TDim, typename TIdx>
58  {
59  using type = TIdx;
60  };
61 
62  //! The GPU CUDA/HIP accelerator work division grid block extent trait specialization.
63  template<typename TDim, typename TIdx>
64  struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Grid, unit::Blocks>
65  {
66  //! \return The number of blocks in each dimension of the grid.
67  __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& /* workDiv */)
69  {
70 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
71  return castVec<TIdx>(getExtentVecEnd<TDim>(gridDim));
72 # else
73  return getExtentVecEnd<TDim>(Vec<std::integral_constant<typename TDim::value_type, 3>, TIdx>(
74  static_cast<TIdx>(hipGridDim_z),
75  static_cast<TIdx>(hipGridDim_y),
76  static_cast<TIdx>(hipGridDim_x)));
77 # endif
78  }
79  };
80 
81  //! The GPU CUDA/HIP accelerator work division block thread extent trait specialization.
82  template<typename TDim, typename TIdx>
83  struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Block, unit::Threads>
84  {
85  //! \return The number of threads in each dimension of a block.
86  __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& /* workDiv */)
88  {
89 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
90  return castVec<TIdx>(getExtentVecEnd<TDim>(blockDim));
91 # else
92  return getExtentVecEnd<TDim>(Vec<std::integral_constant<typename TDim::value_type, 3>, TIdx>(
93  static_cast<TIdx>(hipBlockDim_z),
94  static_cast<TIdx>(hipBlockDim_y),
95  static_cast<TIdx>(hipBlockDim_x)));
96 # endif
97  }
98  };
99 
100  //! The GPU CUDA/HIP accelerator work division thread element extent trait specialization.
101  template<typename TDim, typename TIdx>
102  struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Thread, unit::Elems>
103  {
104  //! \return The number of blocks in each dimension of the grid.
105  __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& workDiv)
106  -> Vec<TDim, TIdx>
107  {
108  return workDiv.m_threadElemExtent;
109  }
110  };
111  } // namespace trait
112 
113 # endif
114 
115 } // namespace alpaka
116 
117 #endif
The GPU CUDA/HIP accelerator work division.
ALPAKA_FN_HOST_ACC WorkDivUniformCudaHipBuiltIn(Vec< TDim, TIdx > const &threadElemExtent)
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
The dimension getter type trait.
Definition: Traits.hpp:14
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &) -> Vec< TDim, TIdx >
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &) -> Vec< TDim, TIdx >
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &workDiv) -> Vec< TDim, TIdx >
The work div trait.
Definition: Traits.hpp:27
The idx type trait.
Definition: Traits.hpp:25