alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
DevUniformCudaHipRt.hpp
Go to the documentation of this file.
1/* Copyright 2024 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber,
2 * Antonio Di Pilato, Jan Stephan, Andrea Bocci
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
10#include "alpaka/core/Hip.hpp"
12#include "alpaka/dev/Traits.hpp"
22
23#include <cstddef>
24#include <mutex>
25#include <string>
26#include <vector>
27
28#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
29
30namespace alpaka
31{
32
33 template<typename TApi>
35
36 namespace trait
37 {
38 template<typename TPlatform, typename TSfinae>
39 struct GetDevByIdx;
40 } // namespace trait
41
42 namespace uniform_cuda_hip::detail
43 {
44 template<typename TApi, bool TBlocking>
45 class QueueUniformCudaHipRt;
46 } // namespace uniform_cuda_hip::detail
47
48 template<typename TApi>
50
51 template<typename TApi>
53
54 template<typename TApi>
56
57 //! The CUDA/HIP RT device handle.
58 template<typename TApi>
60 : public interface::Implements<ConceptCurrentThreadWaitFor, DevUniformCudaHipRt<TApi>>
61 , public interface::Implements<ConceptDev, DevUniformCudaHipRt<TApi>>
62 {
64
66
67 protected:
68 DevUniformCudaHipRt() : m_DevGenericImpl{std::make_shared<alpaka::detail::DevGenericImpl<IDeviceQueue>>()}
69 {
70 }
71
72 public:
73 ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const& rhs) const -> bool
74 {
75 return m_iDevice == rhs.m_iDevice;
76 }
77
78 ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const& rhs) const -> bool
79 {
80 return !((*this) == rhs);
81 }
82
83 [[nodiscard]] auto getNativeHandle() const noexcept -> int
84 {
85 return m_iDevice;
86 }
87
88 [[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector<std::shared_ptr<IDeviceQueue>>
89 {
90 return m_DevGenericImpl->getAllExistingQueues();
91 }
92
93 //! Registers the given queue on this device.
94 //! NOTE: Every queue has to be registered for correct functionality of device wait operations!
95 ALPAKA_FN_HOST auto registerQueue(std::shared_ptr<IDeviceQueue> spQueue) const -> void
96 {
97 m_DevGenericImpl->registerQueue(spQueue);
98 }
99
101 DevUniformCudaHipRt<TApi> const& device,
102 alpaka::DeviceProperties& devProperties)
103 {
104 // There is cuda/hip-DeviceGetAttribute as faster alternative to
105 // cuda/hip-GetDeviceProperties to get a single device property but it has no option to get
106 // the name
107 auto devHandle = device.getNativeHandle();
108 typename TApi::DeviceProp_t devProp;
109 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, devHandle));
110 devProperties.name = std::string(devProp.name);
111
112 std::size_t freeInternal(0u);
113 std::size_t totalInternal(0u);
114 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
115 devProperties.totalGlobalMem = totalInternal;
116
117 int warpSize = 0;
119 TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, devHandle));
120 devProperties.warpSizes = std::vector<std::size_t>{static_cast<std::size_t>(warpSize)};
121 devProperties.preferredWarpSize = static_cast<std::size_t>(warpSize);
122 }
123
124 friend struct trait::GetName<DevUniformCudaHipRt<TApi>>;
125 friend struct trait::GetMemBytes<DevUniformCudaHipRt<TApi>>;
126 friend struct trait::GetFreeMemBytes<DevUniformCudaHipRt<TApi>>;
127 friend struct trait::GetWarpSizes<DevUniformCudaHipRt<TApi>>;
129
130 private:
131 DevUniformCudaHipRt(int iDevice)
132 : m_iDevice(iDevice)
133 , m_DevGenericImpl(std::make_shared<alpaka::detail::DevGenericImpl<IDeviceQueue>>())
134 {
135 }
136
137 int m_iDevice;
138
139 std::shared_ptr<alpaka::detail::DevGenericImpl<IDeviceQueue>> m_DevGenericImpl;
140 };
141
142 namespace trait
143 {
144
145 //! The CUDA/HIP RT device name get trait specialization.
146 template<typename TApi>
147 struct GetName<DevUniformCudaHipRt<TApi>>
148 {
149 ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt<TApi> const& dev) -> std::string
150 {
151 return dev.m_DevGenericImpl->deviceProperties(dev)->name;
152 }
153 };
154
155 //! The CUDA/HIP RT device available memory get trait specialization.
156 template<typename TApi>
157 struct GetMemBytes<DevUniformCudaHipRt<TApi>>
158 {
159 ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
160 {
161 return dev.m_DevGenericImpl->deviceProperties(dev)->totalGlobalMem;
162 }
163 };
164
165 //! The CUDA/HIP RT device free memory get trait specialization.
166 template<typename TApi>
167 struct GetFreeMemBytes<DevUniformCudaHipRt<TApi>>
168 {
169 ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
170 {
171 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
172 std::size_t freeInternal(0u);
173 std::size_t totalInternal(0u);
174 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
175
176 return freeInternal;
177 }
178 };
179
180 //! The CUDA/HIP RT device warp size get trait specialization.
181 template<typename TApi>
182 struct GetWarpSizes<DevUniformCudaHipRt<TApi>>
183 {
184 ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>
185 {
186 return dev.m_DevGenericImpl->deviceProperties(dev)->warpSizes;
187 }
188 };
189
190 //! The CUDA/HIP RT preferred device warp size get trait specialization.
191 template<typename TApi>
192 struct GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>
193 {
194 ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
195 {
196 return dev.m_DevGenericImpl->deviceProperties(dev)->preferredWarpSize;
197 }
198 };
199
200# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
201 //! The CUDA RT preferred device warp size get trait specialization.
202 template<>
203 struct GetPreferredWarpSize<DevUniformCudaHipRt<ApiCudaRt>>
204 {
205 ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevUniformCudaHipRt<ApiCudaRt> const& /* dev */)
206 -> std::size_t
207 {
208 // All CUDA GPUs to date have a warp size of 32 threads.
209 return 32u;
210 }
211 };
212# endif // ALPAKA_ACC_GPU_CUDA_ENABLED
213
214 //! The CUDA/HIP RT device reset trait specialization.
215 template<typename TApi>
216 struct Reset<DevUniformCudaHipRt<TApi>>
217 {
218 ALPAKA_FN_HOST static auto reset(DevUniformCudaHipRt<TApi> const& dev) -> void
219 {
221
222 // Set the current device to wait for.
223 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
224 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceReset());
225 }
226 };
227
228 //! The CUDA/HIP RT device native handle trait specialization.
229 template<typename TApi>
230 struct NativeHandle<DevUniformCudaHipRt<TApi>>
231 {
232 [[nodiscard]] static auto getNativeHandle(DevUniformCudaHipRt<TApi> const& dev)
233 {
234 return dev.getNativeHandle();
235 }
236 };
237
238 //! The CUDA/HIP RT device platform type trait specialization.
239 template<typename TApi>
240 struct PlatformType<DevUniformCudaHipRt<TApi>>
241 {
242 using type = PlatformUniformCudaHipRt<TApi>;
243 };
244
245 //! The thread CUDA/HIP device wait specialization.
246 //!
247 //! Blocks until the device has completed all preceding requested tasks.
248 //! Tasks that are enqueued or queues that are created after this call is made are not waited for.
249 template<typename TApi>
250 struct CurrentThreadWaitFor<DevUniformCudaHipRt<TApi>>
251 {
252 ALPAKA_FN_HOST static auto currentThreadWaitFor(DevUniformCudaHipRt<TApi> const& dev) -> void
253 {
255
256 // Set the current device to wait for.
257 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
258 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceSynchronize());
259 }
260 };
261
262 template<typename TApi>
263 struct QueueType<DevUniformCudaHipRt<TApi>, Blocking>
264 {
265 using type = QueueUniformCudaHipRtBlocking<TApi>;
266 };
267
268 template<typename TApi>
269 struct QueueType<DevUniformCudaHipRt<TApi>, NonBlocking>
270 {
271 using type = QueueUniformCudaHipRtNonBlocking<TApi>;
272 };
273 } // namespace trait
274} // namespace alpaka
275
276#endif
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition Debug.hpp:62
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const &rhs) const -> bool
static void setDeviceProperties(DevUniformCudaHipRt< TApi > const &device, alpaka::DeviceProperties &devProperties)
ALPAKA_FN_HOST auto registerQueue(std::shared_ptr< IDeviceQueue > spQueue) const -> void
Registers the given queue on this device. NOTE: Every queue has to be registered for correct function...
ALPAKA_FN_HOST auto getAllQueues() const -> std::vector< std::shared_ptr< IDeviceQueue > >
ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const &rhs) const -> bool
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The alpaka accelerator library.
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition Traits.hpp:118
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition Traits.hpp:87
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition Traits.hpp:111
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition Traits.hpp:126
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:104
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36
STL namespace.
std::vector< std::size_t > warpSizes
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
The device get trait.
Definition DevCpu.hpp:41
The device free memory size get trait.
Definition Traits.hpp:39
The device memory size get trait.
Definition Traits.hpp:35
The device name get trait.
Definition Traits.hpp:31
The device preferred warp size get trait.
Definition Traits.hpp:47
The device warp size get trait.
Definition Traits.hpp:43
static auto getNativeHandle(TImpl const &)
Definition Traits.hpp:18