alpaka
Abstraction Library for Parallel Kernel Acceleration
EventHostManualTrigger.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Benjamin Worpitz, Matthias Werner, Jan Stephan, Jeffrey Kelling, Andrea Bocci,
2  * Bernhard Manfred Gruber, Aurora Perego
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
8 #include "alpaka/alpaka.hpp"
9 
10 #include <condition_variable>
11 #include <mutex>
12 #include <utility>
13 
14 namespace alpaka::test
15 {
16  namespace trait
17  {
18  template<typename TDev>
20 
21  template<typename TDev>
23  } // namespace trait
24 
25  //! The event host manual trigger type trait alias template to remove the ::type.
26  template<typename TDev>
28 
29  template<typename TDev>
30  ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const& dev) -> bool
31  {
33  }
34 
35  namespace cpu::detail
36  {
37  //! Event that can be enqueued into a queue and can be triggered by the Host.
38  template<class TDev = DevCpu>
40  {
41  public:
42  //! Constructor.
44  : m_dev(std::move(dev))
45  , m_mutex()
46  , m_enqueueCount(0u)
47  , m_bIsReady(true)
48  {
49  }
50 
53 
54  void trigger()
55  {
56  {
57  std::unique_lock<std::mutex> lock(m_mutex);
58  m_bIsReady = true;
59  }
60  m_conditionVariable.notify_one();
61  // Give alpaka time to update into the new state, process all events and tasks.
62  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
63  }
64 
65  public:
66  TDev const m_dev; //!< The device this event is bound to.
67 
68  mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
69 
70  mutable std::condition_variable m_conditionVariable; //!< The condition signaling the event completion.
71  std::size_t m_enqueueCount; //!< The number of times this event has been enqueued.
72 
73  bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
74  //!< completed).
75  };
76  } // namespace cpu::detail
77 
78  //! Event that can be enqueued into a queue and can be triggered by the Host.
79  template<class TDev = DevCpu>
81  {
82  public:
83  //! Constructor.
85  : m_spEventImpl(std::make_shared<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>(dev))
86  {
87  }
88 
89  //! Equality comparison operator.
90  ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const& rhs) const -> bool
91  {
92  return (m_spEventImpl == rhs.m_spEventImpl);
93  }
94 
95  //! Inequality comparison operator.
96  ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const& rhs) const -> bool
97  {
98  return !((*this) == rhs);
99  }
100 
101  void trigger()
102  {
103  m_spEventImpl->trigger();
104  // Give alpaka time to update into the new state, process all events and tasks.
105  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
106  }
107 
108  public:
109  std::shared_ptr<cpu::detail::EventHostManualTriggerCpuImpl<TDev>> m_spEventImpl;
110  };
111 
112  namespace trait
113  {
114  template<>
116  {
118  };
119 
120  //! The CPU event host manual trigger support get trait specialization.
121  template<>
123  {
124  ALPAKA_FN_HOST static auto isSupported(DevCpu const&) -> bool
125  {
126  return true;
127  }
128  };
129  } // namespace trait
130 } // namespace alpaka::test
131 
132 namespace alpaka::trait
133 {
134  //! The CPU device event device get trait specialization.
135  template<typename TDev>
136  struct GetDev<test::EventHostManualTriggerCpu<TDev>>
137  {
138  //
140  {
141  return event.m_spEventImpl->m_dev;
142  }
143  };
144 
145  //! The CPU device event test trait specialization.
146  template<typename TDev>
147  struct IsComplete<test::EventHostManualTriggerCpu<TDev>>
148  {
149  //! \return If the event is not waiting within a queue (not enqueued or already handled).
151  {
152  std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
153 
154  return event.m_spEventImpl->m_bIsReady;
155  }
156  };
157 
158  template<typename TDev>
159  struct Enqueue<QueueGenericThreadsNonBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
160  {
161  //
162  ALPAKA_FN_HOST static auto enqueue(
165  {
167 
168  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
169  auto spEventImpl = event.m_spEventImpl;
170 
171  // Setting the event state and enqueuing it has to be atomic.
172  std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
173 
174  // The event should not yet be enqueued.
175  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
176 
177  // Set its state to enqueued.
178  spEventImpl->m_bIsReady = false;
179 
180  // Increment the enqueue counter. This is used to skip waits for events that had already been finished
181  // and re-enqueued which would lead to deadlocks.
182  ++spEventImpl->m_enqueueCount;
183 
184  auto const enqueueCount = spEventImpl->m_enqueueCount;
185 
186  // Enqueue a task that only resets the events flag if it is completed.
187  queue.m_spQueueImpl->m_workerThread.submit(
188  [spEventImpl, enqueueCount]() mutable
189  {
190  std::unique_lock<std::mutex> lk2(spEventImpl->m_mutex);
191  spEventImpl->m_conditionVariable.wait(
192  lk2,
193  [spEventImpl, enqueueCount]
194  { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
195  });
196  }
197  };
198 
199  template<typename TDev>
200  struct Enqueue<QueueGenericThreadsBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
201  {
202  //
203  ALPAKA_FN_HOST static auto enqueue(
206  {
208 
209  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
210  auto spEventImpl = event.m_spEventImpl;
211 
212  // Setting the event state and enqueuing it has to be atomic.
213  std::unique_lock<std::mutex> lk(spEventImpl->m_mutex);
214 
215  // The event should not yet be enqueued.
216  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
217 
218  // Set its state to enqueued.
219  spEventImpl->m_bIsReady = false;
220 
221  // Increment the enqueue counter. This is used to skip waits for events that had already been finished
222  // and re-enqueued which would lead to deadlocks.
223  ++spEventImpl->m_enqueueCount;
224 
225  auto const enqueueCount = spEventImpl->m_enqueueCount;
226 
227  spEventImpl->m_conditionVariable.wait(
228  lk,
229  [spEventImpl, enqueueCount]
230  { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
231  }
232  };
233 } // namespace alpaka::trait
234 
235 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
236 
237 # include "alpaka/core/BoostPredef.hpp"
238 
239 # include <cuda.h>
240 
241 # if !BOOST_LANG_CUDA && !defined(ALPAKA_HOST_ONLY)
242 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
243 # endif
244 
245 # include "alpaka/core/Cuda.hpp"
246 
247 namespace alpaka::test
248 {
249  namespace uniform_cuda_hip::detail
250  {
252  {
253  using TApi = alpaka::ApiCudaRt;
254 
255  public:
257  : m_dev(dev)
258  , m_mutex()
259  , m_bIsReady(true)
260  {
262 
263  // Set the current device.
265  // Allocate the buffer on this device.
266  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
267  // Initiate the memory set.
269  cudaMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
270  }
271 
274 
276  {
278 
279  // Free the buffer.
281  }
282 
283  void trigger()
284  {
285  std::unique_lock<std::mutex> lock(m_mutex);
286  m_bIsReady = true;
287 
288  // Set the current device.
290  // Initiate the memory set.
292  cudaMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
293  // Give alpaka time to update into the new state, process all events and tasks.
294  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
295  }
296 
297  public:
298  DevCudaRt const m_dev; //!< The device this event is bound to.
299 
300  mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
301  void* m_devMem;
302 
303  bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
304  //!< completed).
305  };
306  } // namespace uniform_cuda_hip::detail
307 
309  {
310  public:
312  : m_spEventImpl(std::make_shared<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>(dev))
313  {
315  }
316 
317  ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const& rhs) const -> bool
318  {
319  return (m_spEventImpl == rhs.m_spEventImpl);
320  }
321 
322  ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const& rhs) const -> bool
323  {
324  return !((*this) == rhs);
325  }
326 
327  void trigger()
328  {
329  m_spEventImpl->trigger();
330  // Give alpaka time to update into the new state, process all events and tasks.
331  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
332  }
333 
334  public:
335  std::shared_ptr<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl> m_spEventImpl;
336  };
337 
338  namespace trait
339  {
340  template<>
342  {
344  };
345 
346  //! The CPU event host manual trigger support get trait specialization.
347  template<>
349  {
350  ALPAKA_FN_HOST static auto isSupported([[maybe_unused]] DevCudaRt const& dev) -> bool
351  {
352 # if CUDA_VERSION < 11070
353  int result = 0;
354  cuDeviceGetAttribute(&result, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, dev.getNativeHandle());
355  return result != 0;
356 # else
357  return true; // Always enabled as of CUDA 11.7
358 # endif
359  }
360  };
361  } // namespace trait
362 } // namespace alpaka::test
363 
364 namespace alpaka::trait
365 {
366  namespace detail
367  {
368  // TODO: Replace with cuStreamWaitValue32 once support for CUDA < 12 is dropped.
369  inline auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags)
370  -> CUresult
371  {
372  // NVIDIA introduced a new stream memory ops API with CUDA 11.7 (called v2). The corresponding CUDA
373  // functions were suffixed with `_v2`. With CUDA 12.0 v1 of the API was removed and the `_v2` removed
374  // from the new functions. So CUDA <= 11.6 and CUDA >= 12.0 share the same function signature but
375  // internally do different things.
376 # if(CUDA_VERSION < 11070) || (CUDA_VERSION >= 12000)
377  return cuStreamWaitValue32(stream, addr, value, flags);
378 # else
379  return cuStreamWaitValue32_v2(stream, addr, value, flags);
380 # endif
381  }
382  } // namespace detail
383 
384  //! The CPU device event device get trait specialization.
385  template<>
386  struct GetDev<test::EventHostManualTriggerCuda>
387  {
389  {
390  return event.m_spEventImpl->m_dev;
391  }
392  };
393 
394  //! The CPU device event test trait specialization.
395  template<>
396  struct IsComplete<test::EventHostManualTriggerCuda>
397  {
398  //! \return If the event is not waiting within a queue (not enqueued or already handled).
400  {
401  std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
402 
403  return event.m_spEventImpl->m_bIsReady;
404  }
405  };
406 
407  template<>
408  struct Enqueue<QueueCudaRtNonBlocking, test::EventHostManualTriggerCuda>
409  {
411  -> void
412  {
414 
415  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
416  auto spEventImpl(event.m_spEventImpl);
417 
418  // Setting the event state and enqueuing it has to be atomic.
419  std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
420 
421  // The event should not yet be enqueued.
422  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
423 
424  // Set its state to enqueued.
425  spEventImpl->m_bIsReady = false;
426 
427  // PGI Profiler`s User Guide:
428  // The following are known issues related to Events and Metrics:
429  // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
430  // on host updates may hang. This includes synchronization between the host and
431  // the device build upon value-based CUDA queue synchronization APIs such as
432  // cuStreamWaitValue32() and cuStreamWriteValue32().
434  static_cast<CUstream>(queue.getNativeHandle()),
435  reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
436  0x0101'0101u,
437  CU_STREAM_WAIT_VALUE_GEQ));
438  }
439  };
440 
441  template<>
442  struct Enqueue<QueueCudaRtBlocking, test::EventHostManualTriggerCuda>
443  {
445  {
447 
448  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
449  auto spEventImpl(event.m_spEventImpl);
450 
451  // Setting the event state and enqueuing it has to be atomic.
452  std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
453 
454  // The event should not yet be enqueued.
455  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
456 
457  // Set its state to enqueued.
458  spEventImpl->m_bIsReady = false;
459 
460  // PGI Profiler`s User Guide:
461  // The following are known issues related to Events and Metrics:
462  // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
463  // on host updates may hang. This includes synchronization between the host and
464  // the device build upon value-based CUDA queue synchronization APIs such as
465  // cuStreamWaitValue32() and cuStreamWriteValue32().
467  static_cast<CUstream>(queue.getNativeHandle()),
468  reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
469  0x0101'0101u,
470  CU_STREAM_WAIT_VALUE_GEQ));
471  }
472  };
473 } // namespace alpaka::trait
474 #endif
475 
476 
477 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
478 
479 # include <hip/hip_runtime.h>
480 
481 # if !BOOST_LANG_HIP && !defined(ALPAKA_HOST_ONLY)
482 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
483 # endif
484 
485 # include "alpaka/core/Hip.hpp"
486 
487 namespace alpaka::test
488 {
489  namespace hip::detail
490  {
491  class EventHostManualTriggerHipImpl final
492  {
493  using TApi = alpaka::ApiHipRt;
494 
495  public:
496  ALPAKA_FN_HOST EventHostManualTriggerHipImpl(DevHipRt const& dev) : m_dev(dev), m_mutex(), m_bIsReady(true)
497  {
499 
500  // Set the current device.
501  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
502  // Allocate the buffer on this device.
503  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
504  // Initiate the memory set.
506  hipMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
507  }
508 
509  EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl const&) = delete;
510  auto operator=(EventHostManualTriggerHipImpl const&) -> EventHostManualTriggerHipImpl& = delete;
511 
512  ALPAKA_FN_HOST ~EventHostManualTriggerHipImpl()
513  {
515 
516  // Free the buffer.
518  }
519 
520  void trigger()
521  {
522  std::unique_lock<std::mutex> lock(m_mutex);
523  m_bIsReady = true;
524 
525  // Set the current device.
526  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
527  // Initiate the memory set.
529  hipMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
530  // Give alpaka time to update into the new state, process all events and tasks.
531  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
532  }
533 
534  public:
535  DevHipRt const m_dev; //!< The device this event is bound to.
536 
537  mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
538  void* m_devMem;
539 
540  bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
541  //!< completed).
542  };
543  } // namespace hip::detail
544 
545  class EventHostManualTriggerHip final
546  {
547  public:
548  ALPAKA_FN_HOST EventHostManualTriggerHip(DevHipRt const& dev)
549  : m_spEventImpl(std::make_shared<hip::detail::EventHostManualTriggerHipImpl>(dev))
550  {
552  }
553 
554  ALPAKA_FN_HOST auto operator==(EventHostManualTriggerHip const& rhs) const -> bool
555  {
556  return (m_spEventImpl == rhs.m_spEventImpl);
557  }
558 
559  ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerHip const& rhs) const -> bool
560  {
561  return !((*this) == rhs);
562  }
563 
564  void trigger()
565  {
566  m_spEventImpl->trigger();
567  // Give alpaka time to update into the new state, process all events and tasks.
568  std::this_thread::sleep_for(std::chrono::milliseconds(200u));
569  }
570 
571  public:
572  std::shared_ptr<hip::detail::EventHostManualTriggerHipImpl> m_spEventImpl;
573  };
574 
575  namespace trait
576  {
577  template<>
578  struct EventHostManualTriggerType<DevHipRt>
579  {
580  using type = test::EventHostManualTriggerHip;
581  };
582 
583  //! The HIP event host manual trigger support get trait specialization.
584  template<>
585  struct IsEventHostManualTriggerSupported<DevHipRt>
586  {
587  // TODO: there is no CUDA_VERSION in the HIP compiler path.
588  // TODO: there is a hipDeviceGetAttribute, but there is no pendant for
589  // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS.
590  ALPAKA_FN_HOST static auto isSupported(DevHipRt const&) -> bool
591  {
592  return false;
593  }
594  };
595  } // namespace trait
596 } // namespace alpaka::test
597 
598 namespace alpaka::trait
599 {
600  //! The CPU device event device get trait specialization.
601  template<>
602  struct GetDev<test::EventHostManualTriggerHip>
603  {
604  ALPAKA_FN_HOST static auto getDev(test::EventHostManualTriggerHip const& event) -> DevHipRt
605  {
606  return event.m_spEventImpl->m_dev;
607  }
608  };
609 
610  //! The CPU device event test trait specialization.
611  template<>
612  struct IsComplete<test::EventHostManualTriggerHip>
613  {
614  //! \return If the event is not waiting within a queue (not enqueued or already handled).
615  ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerHip const& event) -> bool
616  {
617  std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
618 
619  return event.m_spEventImpl->m_bIsReady;
620  }
621  };
622 
623  template<>
624  struct Enqueue<QueueHipRtNonBlocking, test::EventHostManualTriggerHip>
625  {
626  using TApi = alpaka::ApiHipRt;
627 
628  ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, test::EventHostManualTriggerHip& event)
629  -> void
630  {
632 
633  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
634  auto spEventImpl(event.m_spEventImpl);
635 
636  // Setting the event state and enqueuing it has to be atomic.
637  std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
638 
639  // The event should not yet be enqueued.
640  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
641 
642  // Set its state to enqueued.
643  spEventImpl->m_bIsReady = false;
644 
645  // PGI Profiler`s User Guide:
646  // The following are known issues related to Events and Metrics:
647  // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
648  // on host updates may hang. This includes synchronization between the host and
649  // the device build upon value-based CUDA queue synchronization APIs such as
650  // cuStreamWaitValue32() and cuStreamWriteValue32().
651  int32_t hostMem = 0;
652 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
653  std::cerr << "[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not "
654  "available.\n";
655 # endif
656  while(hostMem < 0x0101'0101)
657  {
658  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpyDtoHAsync(
659  &hostMem,
660  reinterpret_cast<hipDeviceptr_t>(event.m_spEventImpl->m_devMem),
661  sizeof(int32_t),
662  queue.getNativeHandle()));
663  ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamSynchronize(queue.getNativeHandle()));
664  }
665  }
666  };
667 
668  template<>
669  struct Enqueue<QueueHipRtBlocking, test::EventHostManualTriggerHip>
670  {
671  using TApi = alpaka::ApiHipRt;
672 
673  ALPAKA_FN_HOST static auto enqueue(QueueHipRtBlocking& /* queue */, test::EventHostManualTriggerHip& event)
674  -> void
675  {
677 
678  // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
679  auto spEventImpl(event.m_spEventImpl);
680 
681  // Setting the event state and enqueuing it has to be atomic.
682  std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
683 
684  // The event should not yet be enqueued.
685  ALPAKA_ASSERT(spEventImpl->m_bIsReady);
686 
687  // Set its state to enqueued.
688  spEventImpl->m_bIsReady = false;
689 
690  // PGI Profiler`s User Guide:
691  // The following are known issues related to Events and Metrics:
692  // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
693  // on host updates may hang. This includes synchronization between the host and
694  // the device build upon value-based HIP queue synchronization APIs such as
695  // cuStreamWaitValue32() and cuStreamWriteValue32().
696 
697  // workaround for missing cuStreamWaitValue32 in HIP
698  std::uint32_t hmem = 0;
699  do
700  {
701  std::this_thread::sleep_for(std::chrono::milliseconds(10u));
703  hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault));
704  } while(hmem < 0x0101'0101u);
705  }
706  };
707 } // namespace alpaka::trait
708 #endif
709 
710 #ifdef ALPAKA_ACC_SYCL_ENABLED
711 namespace alpaka
712 {
713  namespace test
714  {
715  template<concepts::Tag TTag>
716  class EventHostManualTriggerSycl
717  {
718  public:
719  EventHostManualTriggerSycl(DevGenericSycl<TTag> const&)
720  {
721  }
722 
723  auto trigger()
724  {
725  }
726  };
727 
728  namespace trait
729  {
730  template<concepts::Tag TTag>
731  struct EventHostManualTriggerType<DevGenericSycl<TTag>>
732  {
733  using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
734  };
735 
736  template<concepts::Tag TTag>
737  struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
738  {
739  ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag> const&) -> bool
740  {
741  return false;
742  }
743  };
744  } // namespace trait
745  } // namespace test
746 
747  namespace trait
748  {
749  template<concepts::Tag TTag>
750  struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
751  {
752  ALPAKA_FN_HOST static auto enqueue(
753  QueueGenericSyclBlocking<TTag>& /* queue */,
754  test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
755  {
756  }
757  };
758 
759  template<concepts::Tag TTag>
760  struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
761  {
762  ALPAKA_FN_HOST static auto enqueue(
763  QueueGenericSyclNonBlocking<TTag>& /* queue */,
764  test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
765  {
766  }
767  };
768 
769  template<concepts::Tag TTag>
770  struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
771  {
772  ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl<TTag> const& /* event */) -> bool
773  {
774  return true;
775  }
776  };
777  } // namespace trait
778 } // namespace alpaka
779 #endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition: Assert.hpp:13
#define ALPAKA_CUDA_DRV_CHECK(cmd)
CUDA driver error checking with log and exception.
Definition: Cuda.hpp:54
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition: Debug.hpp:55
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(cmd)
CUDA/HIP runtime error checking with log.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CPU device handle.
Definition: DevCpu.hpp:56
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
Event that can be enqueued into a queue and can be triggered by the Host.
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const &rhs) const -> bool
Inequality comparison operator.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const &rhs) const -> bool
Equality comparison operator.
std::shared_ptr< cpu::detail::EventHostManualTriggerCpuImpl< TDev > > m_spEventImpl
ALPAKA_FN_HOST EventHostManualTriggerCpu(TDev const &dev)
Constructor.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const &rhs) const -> bool
ALPAKA_FN_HOST EventHostManualTriggerCuda(DevCudaRt const &dev)
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const &rhs) const -> bool
std::shared_ptr< uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl > m_spEventImpl
Event that can be enqueued into a queue and can be triggered by the Host.
auto operator=(EventHostManualTriggerCpuImpl const &) -> EventHostManualTriggerCpuImpl &=delete
TDev const m_dev
The device this event is bound to.
EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const &other)=delete
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::size_t m_enqueueCount
The number of times this event has been enqueued.
std::mutex m_mutex
The mutex used to synchronize access to the event.
ALPAKA_FN_HOST EventHostManualTriggerCpuImpl(TDev dev) noexcept
Constructor.
std::condition_variable m_conditionVariable
The condition signaling the event completion.
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::mutex m_mutex
The mutex used to synchronize access to the event.
auto operator=(EventHostManualTriggerCudaImpl const &) -> EventHostManualTriggerCudaImpl &=delete
EventHostManualTriggerCudaImpl(EventHostManualTriggerCudaImpl const &)=delete
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition: Complex.hpp:292
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition: Complex.hpp:318
The test specifics.
Definition: TestAccs.hpp:27
ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const &dev) -> bool
typename trait::EventHostManualTriggerType< TDev >::type EventHostManualTrigger
The event host manual trigger type trait alias template to remove the ::type.
auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) -> CUresult
The accelerator traits.
The alpaka accelerator library.
ALPAKA_FN_HOST auto isComplete(TEvent const &event) -> bool
Tests if the given event has already been completed.
Definition: Traits.hpp:34
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
static ALPAKA_FN_HOST auto isSupported(DevCpu const &) -> bool
static ALPAKA_FN_HOST auto isSupported([[maybe_unused]] DevCudaRt const &dev) -> bool
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtNonBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsBlocking< TDev > &, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsNonBlocking< TDev > &queue, test::EventHostManualTriggerCpu< TDev > &event) -> void
The queue enqueue trait.
Definition: Traits.hpp:27
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCpu< TDev > const &event) -> TDev
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCuda const &event) -> DevCudaRt
The device get trait.
Definition: Traits.hpp:27
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCpu< TDev > const &event) -> bool
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCuda const &event) -> bool
The event tester trait.
Definition: Traits.hpp:21