alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
WorkDivUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1/* Copyright 2022 Axel Huebl, Benjamin Worpitz, Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9#include "alpaka/core/Hip.hpp"
11#include "alpaka/idx/Traits.hpp"
12#include "alpaka/vec/Vec.hpp"
14
15#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
16
17namespace alpaka
18{
19 //! The GPU CUDA/HIP accelerator work division.
20 template<typename TDim, typename TIdx>
22 : public interface::Implements<ConceptWorkDiv, WorkDivUniformCudaHipBuiltIn<TDim, TIdx>>
23 {
24 public:
26 : m_threadElemExtent(threadElemExtent)
27 {
28 }
29
30 // \TODO: Optimize! Add WorkDivUniformCudaHipBuiltInNoElems that has no member m_threadElemExtent as well as
31 // AccGpuUniformCudaHipRtNoElems. Use it instead of AccGpuUniformCudaHipRt if the thread element extent is one
32 // to reduce the register usage.
34 };
35
36# if !defined(ALPAKA_HOST_ONLY)
37
38# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
39# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
40# endif
41
42# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
43# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
44# endif
45
46 namespace trait
47 {
48 //! The GPU CUDA/HIP accelerator work division dimension get trait specialization.
49 template<typename TDim, typename TIdx>
51 {
52 using type = TDim;
53 };
54
55 //! The GPU CUDA/HIP accelerator work division idx type trait specialization.
56 template<typename TDim, typename TIdx>
58 {
59 using type = TIdx;
60 };
61
62 //! The GPU CUDA/HIP accelerator work division grid block extent trait specialization.
63 template<typename TDim, typename TIdx>
64 struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Grid, unit::Blocks>
65 {
66 //! \return The number of blocks in each dimension of the grid.
67 __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& /* workDiv */)
69 {
70# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
71 return castVec<TIdx>(getExtentVecEnd<TDim>(gridDim));
72# else
73 return getExtentVecEnd<TDim>(Vec<std::integral_constant<typename TDim::value_type, 3>, TIdx>(
74 static_cast<TIdx>(hipGridDim_z),
75 static_cast<TIdx>(hipGridDim_y),
76 static_cast<TIdx>(hipGridDim_x)));
77# endif
78 }
79 };
80
81 //! The GPU CUDA/HIP accelerator work division block thread extent trait specialization.
82 template<typename TDim, typename TIdx>
83 struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Block, unit::Threads>
84 {
85 //! \return The number of threads in each dimension of a block.
86 __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& /* workDiv */)
88 {
89# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
90 return castVec<TIdx>(getExtentVecEnd<TDim>(blockDim));
91# else
92 return getExtentVecEnd<TDim>(Vec<std::integral_constant<typename TDim::value_type, 3>, TIdx>(
93 static_cast<TIdx>(hipBlockDim_z),
94 static_cast<TIdx>(hipBlockDim_y),
95 static_cast<TIdx>(hipBlockDim_x)));
96# endif
97 }
98 };
99
100 //! The GPU CUDA/HIP accelerator work division thread element extent trait specialization.
101 template<typename TDim, typename TIdx>
102 struct GetWorkDiv<WorkDivUniformCudaHipBuiltIn<TDim, TIdx>, origin::Thread, unit::Elems>
103 {
104 //! \return The number of blocks in each dimension of the grid.
105 __device__ static auto getWorkDiv(WorkDivUniformCudaHipBuiltIn<TDim, TIdx> const& workDiv)
107 {
108 return workDiv.m_threadElemExtent;
109 }
110 };
111 } // namespace trait
112
113# endif
114
115} // namespace alpaka
116
117#endif
A n-dimensional vector.
Definition Vec.hpp:38
The GPU CUDA/HIP accelerator work division.
ALPAKA_FN_HOST_ACC WorkDivUniformCudaHipBuiltIn(Vec< TDim, TIdx > const &threadElemExtent)
#define ALPAKA_FN_HOST_ACC
Definition Common.hpp:39
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
The dimension getter type trait.
Definition Traits.hpp:14
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &) -> Vec< TDim, TIdx >
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &) -> Vec< TDim, TIdx >
static __device__ auto getWorkDiv(WorkDivUniformCudaHipBuiltIn< TDim, TIdx > const &workDiv) -> Vec< TDim, TIdx >
The work div trait.
Definition Traits.hpp:27
The idx type trait.
Definition Traits.hpp:25