alpaka
Abstraction Library for Parallel Kernel Acceleration
EventUniformCudaHipRt.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Andrea Bocci, Bernhard Manfred Gruber, Antonio Di Pilato
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/core/Cuda.hpp"
8 #include "alpaka/core/Hip.hpp"
11 #include "alpaka/dev/Traits.hpp"
12 #include "alpaka/event/Traits.hpp"
15 #include "alpaka/wait/Traits.hpp"
16 
17 #include <functional>
18 #include <memory>
19 #include <stdexcept>
20 
21 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
22 
23 namespace alpaka
24 {
25  namespace uniform_cuda_hip::detail
26  {
27  //! The CUDA/HIP RT device event implementation.
28  template<typename TApi>
30  {
31  public:
33  : m_dev(dev)
34  , m_UniformCudaHipEvent()
35  {
37 
38  // Set the current device.
39  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(m_dev.getNativeHandle()));
40 
41  // Create the event on the current device with the specified flags. Valid flags include:
42  // - cuda/hip-EventDefault: Default event creation flag.
43  // - cuda/hip-EventBlockingSync : Specifies that event should use blocking synchronization.
44  // A host thread that uses cuda/hip-EventSynchronize() to wait on an event created with this flag
45  // will block until the event actually completes.
46  // - cuda/hip-EventDisableTiming : Specifies that the created event does not need to record timing
47  // data.
48  // Events created with this flag specified and the cuda/hip-EventBlockingSync flag not specified
49  // will provide the best performance when used with cudaStreamWaitEvent() and cudaEventQuery().
50  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventCreateWithFlags(
51  &m_UniformCudaHipEvent,
52  (bBusyWait ? TApi::eventDefault : TApi::eventBlockingSync) | TApi::eventDisableTiming));
53  }
54 
57 
59  {
61 
62  // In case event has been recorded but has not yet been completed when cuda/hip-EventDestroy() is
63  // called, the function will return immediately and the resources associated with event will be
64  // released automatically once the device has completed event.
65  // -> No need to synchronize here.
66  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::eventDestroy(m_UniformCudaHipEvent));
67  }
68 
69  [[nodiscard]] auto getNativeHandle() const noexcept
70  {
71  return m_UniformCudaHipEvent;
72  }
73 
74  public:
75  DevUniformCudaHipRt<TApi> const m_dev; //!< The device this event is bound to.
76 
77  private:
78  typename TApi::Event_t m_UniformCudaHipEvent;
79  };
80  } // namespace uniform_cuda_hip::detail
81 
82  //! The CUDA/HIP RT device event.
83  template<typename TApi>
85  : public interface::Implements<ConceptCurrentThreadWaitFor, EventUniformCudaHipRt<TApi>>
86  , public interface::Implements<ConceptGetDev, EventUniformCudaHipRt<TApi>>
87  {
88  public:
90  : m_spEventImpl(std::make_shared<uniform_cuda_hip::detail::EventUniformCudaHipImpl<TApi>>(dev, bBusyWait))
91  {
93  }
94 
95  ALPAKA_FN_HOST auto operator==(EventUniformCudaHipRt<TApi> const& rhs) const -> bool
96  {
97  return (m_spEventImpl == rhs.m_spEventImpl);
98  }
99 
101  {
102  return !((*this) == rhs);
103  }
104 
105  [[nodiscard]] auto getNativeHandle() const noexcept
106  {
107  return m_spEventImpl->getNativeHandle();
108  }
109 
110  public:
111  std::shared_ptr<uniform_cuda_hip::detail::EventUniformCudaHipImpl<TApi>> m_spEventImpl;
112  };
113 
114  namespace trait
115  {
116  //! The CUDA/HIP RT device event device type trait specialization.
117  template<typename TApi>
119  {
121  };
122 
123  //! The CUDA/HIP RT device event device get trait specialization.
124  template<typename TApi>
126  {
128  {
129  return event.m_spEventImpl->m_dev;
130  }
131  };
132 
133  //! The CUDA/HIP RT device event test trait specialization.
134  template<typename TApi>
136  {
137  ALPAKA_FN_HOST static auto isComplete(EventUniformCudaHipRt<TApi> const& event) -> bool
138  {
140 
141  // Query is allowed even for events on non current device.
142  typename TApi::Error_t ret = TApi::success;
144  ret = TApi::eventQuery(event.getNativeHandle()),
145  TApi::errorNotReady);
146  return (ret == TApi::success);
147  }
148  };
149 
150  //! The CUDA/HIP RT queue enqueue trait specialization.
151  template<typename TApi>
153  {
154  ALPAKA_FN_HOST static auto enqueue(
156  EventUniformCudaHipRt<TApi>& event) -> void
157  {
159 
160  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventRecord(event.getNativeHandle(), queue.getNativeHandle()));
161  }
162  };
163 
164  //! The CUDA/HIP RT queue enqueue trait specialization.
165  template<typename TApi>
167  {
168  ALPAKA_FN_HOST static auto enqueue(
170  EventUniformCudaHipRt<TApi>& event) -> void
171  {
173 
174  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventRecord(event.getNativeHandle(), queue.getNativeHandle()));
175  }
176  };
177 
178  //! The CUDA/HIP RT device event thread wait trait specialization.
179  //!
180  //! Waits until the event itself and therefore all tasks preceding it in the queue it is enqueued to have been
181  //! completed. If the event is not enqueued to a queue the method returns immediately.
182  template<typename TApi>
184  {
186  {
188 
189  // Sync is allowed even for events on non current device.
190  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::eventSynchronize(event.getNativeHandle()));
191  }
192  };
193 
194  //! The CUDA/HIP RT queue event wait trait specialization.
195  template<typename TApi>
197  {
200  EventUniformCudaHipRt<TApi> const& event) -> void
201  {
203 
205  TApi::streamWaitEvent(queue.getNativeHandle(), event.getNativeHandle(), 0));
206  }
207  };
208 
209  //! The CUDA/HIP RT queue event wait trait specialization.
210  template<typename TApi>
212  {
215  EventUniformCudaHipRt<TApi> const& event) -> void
216  {
218 
220  TApi::streamWaitEvent(queue.getNativeHandle(), event.getNativeHandle(), 0));
221  }
222  };
223 
224  //! The CUDA/HIP RT device event wait trait specialization.
225  //!
226  //! Any future work submitted in any queue of this device will wait for event to complete before beginning
227  //! execution.
228  template<typename TApi>
230  {
233  EventUniformCudaHipRt<TApi> const& event) -> void
234  {
236 
237  // Set the current device.
238  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));
239 
240  // Get all the queues on the device at the time of invocation.
241  // All queues added afterwards are ignored.
242  auto vQueues = dev.getAllQueues();
243  for(auto&& spQueue : vQueues)
244  {
246  TApi::streamWaitEvent(spQueue->getNativeHandle(), event.getNativeHandle(), 0));
247  }
248  }
249  };
250 
251  //! The CUDA/HIP RT event native handle trait specialization.
252  template<typename TApi>
254  {
255  [[nodiscard]] static auto getNativeHandle(EventUniformCudaHipRt<TApi> const& event)
256  {
257  return event.getNativeHandle();
258  }
259  };
260  } // namespace trait
261 } // namespace alpaka
262 
263 #endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(cmd)
CUDA/HIP runtime error checking with log.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(cmd,...)
CUDA/HIP runtime error checking with log and exception, ignoring specific error values.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
The CUDA/HIP RT device event.
std::shared_ptr< uniform_cuda_hip::detail::EventUniformCudaHipImpl< TApi > > m_spEventImpl
ALPAKA_FN_HOST auto operator==(EventUniformCudaHipRt< TApi > const &rhs) const -> bool
ALPAKA_FN_HOST EventUniformCudaHipRt(DevUniformCudaHipRt< TApi > const &dev, bool bBusyWait=true)
ALPAKA_FN_HOST auto operator!=(EventUniformCudaHipRt< TApi > const &rhs) const -> bool
EventUniformCudaHipImpl(EventUniformCudaHipImpl const &)=delete
DevUniformCudaHipRt< TApi > const m_dev
The device this event is bound to.
auto operator=(EventUniformCudaHipImpl const &) -> EventUniformCudaHipImpl &=delete
ALPAKA_FN_HOST EventUniformCudaHipImpl(DevUniformCudaHipRt< TApi > const &dev, bool bBusyWait)
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
static ALPAKA_FN_HOST auto currentThreadWaitFor(EventUniformCudaHipRt< TApi > const &event) -> void
The thread wait trait.
Definition: Traits.hpp:21
The device type trait.
Definition: Traits.hpp:23
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtBlocking< TApi > &queue, EventUniformCudaHipRt< TApi > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtNonBlocking< TApi > &queue, EventUniformCudaHipRt< TApi > &event) -> void
The queue enqueue trait.
Definition: Traits.hpp:27
static ALPAKA_FN_HOST auto getDev(EventUniformCudaHipRt< TApi > const &event) -> DevUniformCudaHipRt< TApi >
The device get trait.
Definition: Traits.hpp:27
static ALPAKA_FN_HOST auto isComplete(EventUniformCudaHipRt< TApi > const &event) -> bool
The event tester trait.
Definition: Traits.hpp:21
static auto getNativeHandle(EventUniformCudaHipRt< TApi > const &event)
The native handle trait.
Definition: Traits.hpp:17
static ALPAKA_FN_HOST auto waiterWaitFor(DevUniformCudaHipRt< TApi > &dev, EventUniformCudaHipRt< TApi > const &event) -> void
static ALPAKA_FN_HOST auto waiterWaitFor(QueueUniformCudaHipRtBlocking< TApi > &queue, EventUniformCudaHipRt< TApi > const &event) -> void
static ALPAKA_FN_HOST auto waiterWaitFor(QueueUniformCudaHipRtNonBlocking< TApi > &queue, EventUniformCudaHipRt< TApi > const &event) -> void
The waiter wait trait.
Definition: Traits.hpp:25