alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
DevUniformCudaHipRt.hpp
Go to the documentation of this file.
1/* Copyright 2024 Benjamin Worpitz, Jakob Krude, René Widera, Andrea Bocci, Bernhard Manfred Gruber,
2 * Antonio Di Pilato, Jan Stephan, Andrea Bocci
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
10#include "alpaka/core/Hip.hpp"
12#include "alpaka/dev/Traits.hpp"
21
22#include <cstddef>
23#include <string>
24#include <vector>
25
26#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
27
28namespace alpaka
29{
30 namespace trait
31 {
32 template<typename TPlatform, typename TSfinae>
33 struct GetDevByIdx;
34 } // namespace trait
35
36 namespace uniform_cuda_hip::detail
37 {
38 template<typename TApi, bool TBlocking>
39 class QueueUniformCudaHipRt;
40 } // namespace uniform_cuda_hip::detail
41
42 template<typename TApi>
44
45 template<typename TApi>
47
48 template<typename TApi>
50
51 template<typename TApi, typename TElem, typename TDim, typename TIdx>
53
54 //! The CUDA/HIP RT device handle.
55 template<typename TApi>
57 : public interface::Implements<ConceptCurrentThreadWaitFor, DevUniformCudaHipRt<TApi>>
58 , public interface::Implements<ConceptDev, DevUniformCudaHipRt<TApi>>
59 {
61
63
64 protected:
65 DevUniformCudaHipRt() : m_QueueRegistry{std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>()}
66 {
67 }
68
69 public:
70 ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const& rhs) const -> bool
71 {
72 return m_iDevice == rhs.m_iDevice;
73 }
74
75 ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const& rhs) const -> bool
76 {
77 return !((*this) == rhs);
78 }
79
80 [[nodiscard]] auto getNativeHandle() const noexcept -> int
81 {
82 return m_iDevice;
83 }
84
85 [[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector<std::shared_ptr<IDeviceQueue>>
86 {
87 return m_QueueRegistry->getAllExistingQueues();
88 }
89
90 //! Registers the given queue on this device.
91 //! NOTE: Every queue has to be registered for correct functionality of device wait operations!
92 ALPAKA_FN_HOST auto registerQueue(std::shared_ptr<IDeviceQueue> spQueue) const -> void
93 {
94 m_QueueRegistry->registerQueue(spQueue);
95 }
96
97 private:
98 DevUniformCudaHipRt(int iDevice)
99 : m_iDevice(iDevice)
100 , m_QueueRegistry(std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>())
101 {
102 }
103
104 int m_iDevice;
105
106 std::shared_ptr<alpaka::detail::QueueRegistry<IDeviceQueue>> m_QueueRegistry;
107 };
108
109 namespace trait
110 {
111 //! The CUDA/HIP RT device name get trait specialization.
112 template<typename TApi>
113 struct GetName<DevUniformCudaHipRt<TApi>>
114 {
115 ALPAKA_FN_HOST static auto getName(DevUniformCudaHipRt<TApi> const& dev) -> std::string
116 {
117 // There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties to get a
118 // single device property but it has no option to get the name
119 typename TApi::DeviceProp_t devProp;
120 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::getDeviceProperties(&devProp, dev.getNativeHandle()));
121
122 return std::string(devProp.name);
123 }
124 };
125
126 //! The CUDA/HIP RT device available memory get trait specialization.
127 template<typename TApi>
128 struct GetMemBytes<DevUniformCudaHipRt<TApi>>
129 {
130 ALPAKA_FN_HOST static auto getMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
131 {
132 // Set the current device to wait for.
133 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
134
135 std::size_t freeInternal(0u);
136 std::size_t totalInternal(0u);
137
138 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
139
140 return totalInternal;
141 }
142 };
143
144 //! The CUDA/HIP RT device free memory get trait specialization.
145 template<typename TApi>
146 struct GetFreeMemBytes<DevUniformCudaHipRt<TApi>>
147 {
148 ALPAKA_FN_HOST static auto getFreeMemBytes(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
149 {
150 // Set the current device to wait for.
151 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
152
153 std::size_t freeInternal(0u);
154 std::size_t totalInternal(0u);
155
156 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memGetInfo(&freeInternal, &totalInternal));
157
158 return freeInternal;
159 }
160 };
161
162 //! The CUDA/HIP RT device warp size get trait specialization.
163 template<typename TApi>
164 struct GetWarpSizes<DevUniformCudaHipRt<TApi>>
165 {
166 ALPAKA_FN_HOST static auto getWarpSizes(DevUniformCudaHipRt<TApi> const& dev) -> std::vector<std::size_t>
167 {
168 return {GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>::getPreferredWarpSize(dev)};
169 }
170 };
171
172 //! The CUDA/HIP RT preferred device warp size get trait specialization.
173 template<typename TApi>
174 struct GetPreferredWarpSize<DevUniformCudaHipRt<TApi>>
175 {
176 ALPAKA_FN_HOST static auto getPreferredWarpSize(DevUniformCudaHipRt<TApi> const& dev) -> std::size_t
177 {
178 int warpSize = 0;
179
181 TApi::deviceGetAttribute(&warpSize, TApi::deviceAttributeWarpSize, dev.getNativeHandle()));
182 return static_cast<std::size_t>(warpSize);
183 }
184 };
185
186# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
187 //! The CUDA RT preferred device warp size get trait specialization.
188 template<>
189 struct GetPreferredWarpSize<DevUniformCudaHipRt<ApiCudaRt>>
190 {
191 ALPAKA_FN_HOST static constexpr auto getPreferredWarpSize(DevUniformCudaHipRt<ApiCudaRt> const& /* dev */)
192 -> std::size_t
193 {
194 // All CUDA GPUs to date have a warp size of 32 threads.
195 return 32u;
196 }
197 };
198# endif // ALPAKA_ACC_GPU_CUDA_ENABLED
199
200 //! The CUDA/HIP RT device reset trait specialization.
201 template<typename TApi>
202 struct Reset<DevUniformCudaHipRt<TApi>>
203 {
204 ALPAKA_FN_HOST static auto reset(DevUniformCudaHipRt<TApi> const& dev) -> void
205 {
207
208 // Set the current device to wait for.
209 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
210 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceReset());
211 }
212 };
213
214 //! The CUDA/HIP RT device native handle trait specialization.
215 template<typename TApi>
216 struct NativeHandle<DevUniformCudaHipRt<TApi>>
217 {
218 [[nodiscard]] static auto getNativeHandle(DevUniformCudaHipRt<TApi> const& dev)
219 {
220 return dev.getNativeHandle();
221 }
222 };
223
224 //! The CUDA/HIP RT device memory buffer type trait specialization.
225 template<typename TApi, typename TElem, typename TDim, typename TIdx>
226 struct BufType<DevUniformCudaHipRt<TApi>, TElem, TDim, TIdx>
227 {
228 using type = BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>;
229 };
230
231 //! The CUDA/HIP RT device platform type trait specialization.
232 template<typename TApi>
233 struct PlatformType<DevUniformCudaHipRt<TApi>>
234 {
235 using type = PlatformUniformCudaHipRt<TApi>;
236 };
237
238 //! The thread CUDA/HIP device wait specialization.
239 //!
240 //! Blocks until the device has completed all preceding requested tasks.
241 //! Tasks that are enqueued or queues that are created after this call is made are not waited for.
242 template<typename TApi>
243 struct CurrentThreadWaitFor<DevUniformCudaHipRt<TApi>>
244 {
245 ALPAKA_FN_HOST static auto currentThreadWaitFor(DevUniformCudaHipRt<TApi> const& dev) -> void
246 {
248
249 // Set the current device to wait for.
250 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
251 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::deviceSynchronize());
252 }
253 };
254
255 template<typename TApi>
256 struct QueueType<DevUniformCudaHipRt<TApi>, Blocking>
257 {
258 using type = QueueUniformCudaHipRtBlocking<TApi>;
259 };
260
261 template<typename TApi>
262 struct QueueType<DevUniformCudaHipRt<TApi>, NonBlocking>
263 {
264 using type = QueueUniformCudaHipRtNonBlocking<TApi>;
265 };
266 } // namespace trait
267} // namespace alpaka
268
269#endif
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
Definition Debug.hpp:62
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
ALPAKA_FN_HOST auto operator!=(DevUniformCudaHipRt const &rhs) const -> bool
ALPAKA_FN_HOST auto registerQueue(std::shared_ptr< IDeviceQueue > spQueue) const -> void
Registers the given queue on this device. NOTE: Every queue has to be registered for correct function...
ALPAKA_FN_HOST auto getAllQueues() const -> std::vector< std::shared_ptr< IDeviceQueue > >
ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const &rhs) const -> bool
#define ALPAKA_FN_HOST
Definition Common.hpp:40
constexpr std::uint32_t warpSize
This is a shortcut for the trait defined above.
Definition Traits.hpp:109
The alpaka accelerator library.
ALPAKA_FN_HOST constexpr auto getPreferredWarpSize(TDev const &dev) -> std::size_t
Definition Traits.hpp:118
ALPAKA_FN_HOST auto getName(TDev const &dev) -> std::string
Definition Traits.hpp:87
ALPAKA_FN_HOST auto getWarpSizes(TDev const &dev) -> std::vector< std::size_t >
Definition Traits.hpp:111
ALPAKA_FN_HOST auto reset(TDev const &dev) -> void
Resets the device. What this method does is dependent on the accelerator.
Definition Traits.hpp:126
ALPAKA_FN_HOST auto getFreeMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:104
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
Definition Traits.hpp:95
decltype(getNativeHandle(std::declval< TImpl >())) NativeHandle
Alias to the type of the native handle.
Definition Traits.hpp:36
STL namespace.
The CUDA/HIP memory buffer.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
The device get trait.
Definition DevCpu.hpp:41
static auto getNativeHandle(TImpl const &)
Definition Traits.hpp:18