alpaka
Abstraction Library for Parallel Kernel Acceleration
DevUniformCudaHipRt.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Benjamin Worpitz, Jakob Krude, RenĂ© Widera, Andrea Bocci, Bernhard Manfred Gruber,
2  * Antonio Di Pilato, Jan Stephan, Andrea Bocci
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
10 #include "alpaka/core/Cuda.hpp"
11 #include "alpaka/core/Hip.hpp"
12 #include "alpaka/dev/Traits.hpp"
17 #include "alpaka/queue/Traits.hpp"
19 #include "alpaka/traits/Traits.hpp"
20 #include "alpaka/wait/Traits.hpp"
21 
22 #include <cstddef>
23 #include <string>
24 #include <vector>
25 
26 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
27 
28 namespace alpaka
29 {
30  namespace trait
31  {
32  template<typename TPlatform, typename TSfinae>
33  struct GetDevByIdx;
34  } // namespace trait
35 
36  namespace uniform_cuda_hip::detail
37  {
38  template<typename TApi, bool TBlocking>
39  class QueueUniformCudaHipRt;
40  } // namespace uniform_cuda_hip::detail
41 
42  template<typename TApi>
44 
45  template<typename TApi>
47 
48  template<typename TApi>
50 
51  template<typename TApi, typename TElem, typename TDim, typename TIdx>
52  struct BufUniformCudaHipRt;
53 
54  //! The CUDA/HIP RT device handle.
55  template<typename TApi>
57  : public concepts::Implements<ConceptCurrentThreadWaitFor, DevUniformCudaHipRt<TApi>>
58  , public concepts::Implements<ConceptDev, DevUniformCudaHipRt<TApi>>
59  {
60  friend struct trait::GetDevByIdx<PlatformUniformCudaHipRt<TApi>>;
61 
63 
64  protected:
65  DevUniformCudaHipRt() : m_QueueRegistry{std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>()}
66  {
67  }
68 
69  public:
70  ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const& rhs) const -> bool
71  {
72  return m_iDevice == rhs.m_iDevice;
73  }
74 
75  ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const& rhs) const -> bool
76  {
77  return !((*this) == rhs);
78  }
79 
80  [[nodiscard]] auto getNativeHandle() const noexcept -> int
81  {
82  return m_iDevice;
83  }
84 
85  [[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector<std::shared_ptr<IDeviceQueue>>
86  {
87  return m_QueueRegistry->getAllExistingQueues();
88  }
89 
90  //! Registers the given queue on this device.
91  //! NOTE: Every queue has to be registered for correct functionality of device wait operations!
92  ALPAKA_FN_HOST auto registerQueue(std::shared_ptr<IDeviceQueue> spQueue) const -> void
93  {
94  m_QueueRegistry->registerQueue(spQueue);
95  }
96 
97  private:
98  DevUniformCudaHipRt(int iDevice)
99  : m_iDevice(iDevice)
100  , m_QueueRegistry(std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>())
101  {
102  }
103 
104  int m_iDevice;
105 
106  std::shared_ptr<alpaka::detail::QueueRegistry<IDeviceQueue>> m_QueueRegistry;
107  };
108 
109  namespace trait
110  {
111  //! The CUDA/HIP RT device name get trait specialization.
112  template<typename TApi>
114  {
115  ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt<TApi> const& dev) -> std::string
116  {
117  // There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties to get a
118  // single device property but it has no option to get the name
119  typename TApi::DeviceProp_t devProp;
120  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle()));
121 
122  return std::string(devProp.name);
123  }
124  };
125 
126  //! The CUDA/HIP RT device available memory get trait specialization.
127  template<typename TApi>
129  {
130  ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
131  {
132  // Set the current device to wait for.
133  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
134 
135  std::size_t freeInternal(0u);
136  std::size_t totalInternal(0u);
137 
138  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
139 
140  return totalInternal;
141  }
142  };
143 
144  //! The CUDA/HIP RT device free memory get trait specialization.
145  template<typename TApi>
147  {
148  ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
149  {
150  // Set the current device to wait for.
151  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
152 
153  std::size_t freeInternal(0u);
154  std::size_t totalInternal(0u);
155 
156  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
157 
158  return freeInternal;
159  }
160  };
161 
162  //! The CUDA/HIP RT device warp size get trait specialization.
163  template<typename TApi>
165  {
166  ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>
167  {
169  }
170  };
171 
172  //! The CUDA/HIP RT preferred device warp size get trait specialization.
173  template<typename TApi>
175  {
176  ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
177  {
178  int warpSize = 0;
179 
181  TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, dev.getNativeHandle()));
182  return static_cast<std::size_t>(warpSize);
183  }
184  };
185 
186 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
187  //! The CUDA RT preferred device warp size get trait specialization.
188  template<>
190  {
192  -> std::size_t
193  {
194  // All CUDA GPUs to date have a warp size of 32 threads.
195  return 32u;
196  }
197  };
198 # endif // ALPAKA_ACC_GPU_CUDA_ENABLED
199 
200  //! The CUDA/HIP RT device reset trait specialization.
201  template<typename TApi>
203  {
204  ALPAKA_FN_HOST static auto reset(DevUniformCudaHipRt<TApi> const& dev) -> void
205  {
207 
208  // Set the current device to wait for.
209  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
210  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceReset());
211  }
212  };
213 
214  //! The CUDA/HIP RT device native handle trait specialization.
215  template<typename TApi>
217  {
218  [[nodiscard]] static auto getNativeHandle(DevUniformCudaHipRt<TApi> const& dev)
219  {
220  return dev.getNativeHandle();
221  }
222  };
223 
224  //! The CUDA/HIP RT device memory buffer type trait specialization.
225  template<typename TApi, typename TElem, typename TDim, typename TIdx>
226  struct BufType<DevUniformCudaHipRt<TApi>, TElem, TDim, TIdx>
227  {
229  };
230 
231  //! The CUDA/HIP RT device platform type trait specialization.
232  template<typename TApi>
234  {
236  };
237 
238  //! The thread CUDA/HIP device wait specialization.
239  //!
240  //! Blocks until the device has completed all preceding requested tasks.
241  //! Tasks that are enqueued or queues that are created after this call is made are not waited for.
242  template<typename TApi>
244  {
246  {
248 
249  // Set the current device to wait for.
250  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
251  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceSynchronize());
252  }
253  };
254 
255  template<typename TApi>
256  struct QueueType<DevUniformCudaHipRt<TApi>, Blocking>
257  {
259  };
260 
261  template<typename TApi>
262  struct QueueType<DevUniformCudaHipRt<TApi>, NonBlocking>
263  {
265  };
266  } // namespace trait
267 } // namespace alpaka
268 
269 #endif
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition: Debug.hpp:62
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const &rhs) const -> bool
ALPAKA_FN_HOST auto registerQueue(std::shared_ptr< IDeviceQueue > spQueue) const -> void
Registers the given queue on this device. NOTE: Every queue has to be registered for correct function...
ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const &rhs) const -> bool
ALPAKA_FN_HOST auto getAllQueues() const -> std::vector< std::shared_ptr< IDeviceQueue >>
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition: Traits.hpp:85
The alpaka accelerator library.
constexpr ALPAKA_FN_HOST auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition: Traits.hpp:118
The CUDA/HIP memory buffer.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
The memory buffer type trait.
Definition: Traits.hpp:23
static ALPAKA_FN_HOST auto currentThreadWaitFor(DevUniformCudaHipRt< TApi > const &dev) -> void
The thread wait trait.
Definition: Traits.hpp:21
The device get trait.
Definition: DevCpu.hpp:41
static ALPAKA_FN_HOST auto getFreeMemBytes(DevUniformCudaHipRt< TApi > const &dev) -> std::size_t
The device free memory size get trait.
Definition: Traits.hpp:39
static ALPAKA_FN_HOST auto getMemBytes(DevUniformCudaHipRt< TApi > const &dev) -> std::size_t
The device memory size get trait.
Definition: Traits.hpp:35
static ALPAKA_FN_HOST auto getName(DevUniformCudaHipRt< TApi > const &dev) -> std::string
The device name get trait.
Definition: Traits.hpp:31
static constexpr ALPAKA_FN_HOST auto getPreferredWarpSize(DevUniformCudaHipRt< ApiCudaRt > const &) -> std::size_t
static ALPAKA_FN_HOST auto getPreferredWarpSize(DevUniformCudaHipRt< TApi > const &dev) -> std::size_t
The device preferred warp size get trait.
Definition: Traits.hpp:47
static ALPAKA_FN_HOST auto getWarpSizes(DevUniformCudaHipRt< TApi > const &dev) -> std::vector< std::size_t >
The device warp size get trait.
Definition: Traits.hpp:43
static auto getNativeHandle(DevUniformCudaHipRt< TApi > const &dev)
The native handle trait.
Definition: Traits.hpp:17
The platform type trait.
Definition: Traits.hpp:30
Queue for an accelerator.
Definition: Traits.hpp:35
static ALPAKA_FN_HOST auto reset(DevUniformCudaHipRt< TApi > const &dev) -> void
The device reset trait.
Definition: Traits.hpp:51