alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
EventHostManualTrigger.hpp
Go to the documentation of this file.
1/* Copyright 2024 Benjamin Worpitz, Matthias Werner, Jan Stephan, Jeffrey Kelling, Andrea Bocci,
2 * Bernhard Manfred Gruber, Aurora Perego
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
8#include "alpaka/alpaka.hpp"
9
10#include <condition_variable>
11#include <mutex>
12#include <utility>
13
14namespace alpaka::test
15{
16 namespace trait
17 {
18 template<typename TDev>
20
21 template<typename TDev>
23 } // namespace trait
24
25 //! The event host manual trigger type trait alias template to remove the ::type.
26 template<typename TDev>
28
29 template<typename TDev>
34
35 namespace cpu::detail
36 {
37 //! Event that can be enqueued into a queue and can be triggered by the Host.
38 template<class TDev = DevCpu>
40 {
41 public:
42 //! Constructor.
44 : m_dev(std::move(dev))
45 , m_mutex()
46 , m_enqueueCount(0u)
47 , m_bIsReady(true)
48 {
49 }
50
53
54 void trigger()
55 {
56 {
57 std::unique_lock<std::mutex> lock(m_mutex);
58 m_bIsReady = true;
59 }
60 m_conditionVariable.notify_one();
61 // Give alpaka time to update into the new state, process all events and tasks.
62 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
63 }
64
65 public:
66 TDev const m_dev; //!< The device this event is bound to.
67
68 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
69
70 mutable std::condition_variable m_conditionVariable; //!< The condition signaling the event completion.
71 std::size_t m_enqueueCount; //!< The number of times this event has been enqueued.
72
73 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
74 //!< completed).
75 };
76 } // namespace cpu::detail
77
78 //! Event that can be enqueued into a queue and can be triggered by the Host.
79 template<class TDev = DevCpu>
81 {
82 public:
83 //! Constructor.
85 : m_spEventImpl(std::make_shared<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>(dev))
86 {
87 }
88
89 //! Equality comparison operator.
91 {
92 return (m_spEventImpl == rhs.m_spEventImpl);
93 }
94
95 //! Inequality comparison operator.
97 {
98 return !((*this) == rhs);
99 }
100
101 void trigger()
102 {
103 m_spEventImpl->trigger();
104 // Give alpaka time to update into the new state, process all events and tasks.
105 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
106 }
107
108 public:
109 std::shared_ptr<cpu::detail::EventHostManualTriggerCpuImpl<TDev>> m_spEventImpl;
110 };
111
112 namespace trait
113 {
114 template<>
119
120 //! The CPU event host manual trigger support get trait specialization.
121 template<>
123 {
124 ALPAKA_FN_HOST static auto isSupported(DevCpu const&) -> bool
125 {
126 return true;
127 }
128 };
129 } // namespace trait
130} // namespace alpaka::test
131
132namespace alpaka::trait
133{
134 //! The CPU device event device get trait specialization.
135 template<typename TDev>
136 struct GetDev<test::EventHostManualTriggerCpu<TDev>>
137 {
138 //
140 {
141 return event.m_spEventImpl->m_dev;
142 }
143 };
144
145 //! The CPU device event test trait specialization.
146 template<typename TDev>
147 struct IsComplete<test::EventHostManualTriggerCpu<TDev>>
148 {
149 //! \return If the event is not waiting within a queue (not enqueued or already handled).
151 {
152 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
153
154 return event.m_spEventImpl->m_bIsReady;
155 }
156 };
157
158 template<typename TDev>
159 struct Enqueue<QueueGenericThreadsNonBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
160 {
161 //
165 {
167
168 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
169 auto spEventImpl = event.m_spEventImpl;
170
171 // Setting the event state and enqueuing it has to be atomic.
172 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
173
174 // The event should not yet be enqueued.
175 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
176
177 // Set its state to enqueued.
178 spEventImpl->m_bIsReady = false;
179
180 // Increment the enqueue counter. This is used to skip waits for events that had already been finished
181 // and re-enqueued which would lead to deadlocks.
182 ++spEventImpl->m_enqueueCount;
183
184 auto const enqueueCount = spEventImpl->m_enqueueCount;
185
186 // Enqueue a task that only resets the events flag if it is completed.
187 queue.m_spQueueImpl->m_workerThread.submit(
188 [spEventImpl, enqueueCount]() mutable
189 {
190 std::unique_lock<std::mutex> lk2(spEventImpl->m_mutex);
191 spEventImpl->m_conditionVariable.wait(
192 lk2,
193 [spEventImpl, enqueueCount]
194 { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
195 });
196 }
197 };
198
199 template<typename TDev>
200 struct Enqueue<QueueGenericThreadsBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
201 {
202 //
206 {
208
209 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
210 auto spEventImpl = event.m_spEventImpl;
211
212 // Setting the event state and enqueuing it has to be atomic.
213 std::unique_lock<std::mutex> lk(spEventImpl->m_mutex);
214
215 // The event should not yet be enqueued.
216 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
217
218 // Set its state to enqueued.
219 spEventImpl->m_bIsReady = false;
220
221 // Increment the enqueue counter. This is used to skip waits for events that had already been finished
222 // and re-enqueued which would lead to deadlocks.
223 ++spEventImpl->m_enqueueCount;
224
225 auto const enqueueCount = spEventImpl->m_enqueueCount;
226
227 spEventImpl->m_conditionVariable.wait(
228 lk,
229 [spEventImpl, enqueueCount]
230 { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
231 }
232 };
233} // namespace alpaka::trait
234
235#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
236
238
239# include <cuda.h>
240
241# if !BOOST_LANG_CUDA && !defined(ALPAKA_HOST_ONLY)
242# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
243# endif
244
245# include "alpaka/core/Cuda.hpp"
246
247namespace alpaka::test
248{
249 namespace uniform_cuda_hip::detail
250 {
252 {
253 using TApi = alpaka::ApiCudaRt;
254
255 public:
257 : m_dev(dev)
258 , m_mutex()
259 , m_bIsReady(true)
260 {
262
263 // Set the current device.
265 // Allocate the buffer on this device.
266 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
267 // Initiate the memory set.
269 cudaMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
270 }
271
274
282
283 void trigger()
284 {
285 std::unique_lock<std::mutex> lock(m_mutex);
286 m_bIsReady = true;
287
288 // Set the current device.
290 // Initiate the memory set.
292 cudaMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
293 // Give alpaka time to update into the new state, process all events and tasks.
294 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
295 }
296
297 public:
298 DevCudaRt const m_dev; //!< The device this event is bound to.
299
300 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
301 void* m_devMem;
302
303 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
304 //!< completed).
305 };
306 } // namespace uniform_cuda_hip::detail
307
309 {
310 public:
312 : m_spEventImpl(std::make_shared<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>(dev))
313 {
315 }
316
318 {
319 return (m_spEventImpl == rhs.m_spEventImpl);
320 }
321
323 {
324 return !((*this) == rhs);
325 }
326
327 void trigger()
328 {
329 m_spEventImpl->trigger();
330 // Give alpaka time to update into the new state, process all events and tasks.
331 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
332 }
333
334 public:
335 std::shared_ptr<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl> m_spEventImpl;
336 };
337
338 namespace trait
339 {
340 template<>
345
346 //! The CPU event host manual trigger support get trait specialization.
347 template<>
349 {
350 ALPAKA_FN_HOST static auto isSupported([[maybe_unused]] DevCudaRt const& dev) -> bool
351 {
352# if CUDA_VERSION < 11070
353 int result = 0;
354 cuDeviceGetAttribute(&result, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, dev.getNativeHandle());
355 return result != 0;
356# else
357 return true; // Always enabled as of CUDA 11.7
358# endif
359 }
360 };
361 } // namespace trait
362} // namespace alpaka::test
363
364namespace alpaka::trait
365{
366 namespace detail
367 {
368 // TODO: Replace with cuStreamWaitValue32 once support for CUDA < 12 is dropped.
369 inline auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags)
370 -> CUresult
371 {
372 // NVIDIA introduced a new stream memory ops API with CUDA 11.7 (called v2). The corresponding CUDA
373 // functions were suffixed with `_v2`. With CUDA 12.0 v1 of the API was removed and the `_v2` removed
374 // from the new functions. So CUDA <= 11.6 and CUDA >= 12.0 share the same function signature but
375 // internally do different things.
376# if(CUDA_VERSION < 11070) || (CUDA_VERSION >= 12000)
377 return cuStreamWaitValue32(stream, addr, value, flags);
378# else
379 return cuStreamWaitValue32_v2(stream, addr, value, flags);
380# endif
381 }
382 } // namespace detail
383
384 //! The CPU device event device get trait specialization.
385 template<>
386 struct GetDev<test::EventHostManualTriggerCuda>
387 {
389 {
390 return event.m_spEventImpl->m_dev;
391 }
392 };
393
394 //! The CPU device event test trait specialization.
395 template<>
396 struct IsComplete<test::EventHostManualTriggerCuda>
397 {
398 //! \return If the event is not waiting within a queue (not enqueued or already handled).
400 {
401 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
402
403 return event.m_spEventImpl->m_bIsReady;
404 }
405 };
406
407 template<>
408 struct Enqueue<QueueCudaRtNonBlocking, test::EventHostManualTriggerCuda>
409 {
411 -> void
412 {
414
415 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
416 auto spEventImpl(event.m_spEventImpl);
417
418 // Setting the event state and enqueuing it has to be atomic.
419 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
420
421 // The event should not yet be enqueued.
422 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
423
424 // Set its state to enqueued.
425 spEventImpl->m_bIsReady = false;
426
427 // PGI Profiler`s User Guide:
428 // The following are known issues related to Events and Metrics:
429 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
430 // on host updates may hang. This includes synchronization between the host and
431 // the device build upon value-based CUDA queue synchronization APIs such as
432 // cuStreamWaitValue32() and cuStreamWriteValue32().
434 static_cast<CUstream>(queue.getNativeHandle()),
435 reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
436 0x0101'0101u,
437 CU_STREAM_WAIT_VALUE_GEQ));
438 }
439 };
440
441 template<>
442 struct Enqueue<QueueCudaRtBlocking, test::EventHostManualTriggerCuda>
443 {
445 {
447
448 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
449 auto spEventImpl(event.m_spEventImpl);
450
451 // Setting the event state and enqueuing it has to be atomic.
452 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
453
454 // The event should not yet be enqueued.
455 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
456
457 // Set its state to enqueued.
458 spEventImpl->m_bIsReady = false;
459
460 // PGI Profiler`s User Guide:
461 // The following are known issues related to Events and Metrics:
462 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
463 // on host updates may hang. This includes synchronization between the host and
464 // the device build upon value-based CUDA queue synchronization APIs such as
465 // cuStreamWaitValue32() and cuStreamWriteValue32().
467 static_cast<CUstream>(queue.getNativeHandle()),
468 reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
469 0x0101'0101u,
470 CU_STREAM_WAIT_VALUE_GEQ));
471 }
472 };
473} // namespace alpaka::trait
474#endif
475
476
477#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
478
479# include <hip/hip_runtime.h>
480
481# if !BOOST_LANG_HIP && !defined(ALPAKA_HOST_ONLY)
482# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
483# endif
484
485# include "alpaka/core/Hip.hpp"
486
487namespace alpaka::test
488{
489 namespace hip::detail
490 {
491 class EventHostManualTriggerHipImpl final
492 {
493 using TApi = alpaka::ApiHipRt;
494
495 public:
496 ALPAKA_FN_HOST EventHostManualTriggerHipImpl(DevHipRt const& dev) : m_dev(dev), m_mutex(), m_bIsReady(true)
497 {
499
500 // Set the current device.
501 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
502 // Allocate the buffer on this device.
503 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
504 // Initiate the memory set.
506 hipMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
507 }
508
509 EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl const&) = delete;
510 auto operator=(EventHostManualTriggerHipImpl const&) -> EventHostManualTriggerHipImpl& = delete;
511
512 ALPAKA_FN_HOST ~EventHostManualTriggerHipImpl()
513 {
515
516 // Free the buffer.
518 }
519
520 void trigger()
521 {
522 std::unique_lock<std::mutex> lock(m_mutex);
523 m_bIsReady = true;
524
525 // Set the current device.
526 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
527 // Initiate the memory set.
529 hipMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
530 // Give alpaka time to update into the new state, process all events and tasks.
531 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
532 }
533
534 public:
535 DevHipRt const m_dev; //!< The device this event is bound to.
536
537 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
538 void* m_devMem;
539
540 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
541 //!< completed).
542 };
543 } // namespace hip::detail
544
545 class EventHostManualTriggerHip final
546 {
547 public:
548 ALPAKA_FN_HOST EventHostManualTriggerHip(DevHipRt const& dev)
549 : m_spEventImpl(std::make_shared<hip::detail::EventHostManualTriggerHipImpl>(dev))
550 {
552 }
553
554 ALPAKA_FN_HOST auto operator==(EventHostManualTriggerHip const& rhs) const -> bool
555 {
556 return (m_spEventImpl == rhs.m_spEventImpl);
557 }
558
559 ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerHip const& rhs) const -> bool
560 {
561 return !((*this) == rhs);
562 }
563
564 void trigger()
565 {
566 m_spEventImpl->trigger();
567 // Give alpaka time to update into the new state, process all events and tasks.
568 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
569 }
570
571 public:
572 std::shared_ptr<hip::detail::EventHostManualTriggerHipImpl> m_spEventImpl;
573 };
574
575 namespace trait
576 {
577 template<>
578 struct EventHostManualTriggerType<DevHipRt>
579 {
580 using type = test::EventHostManualTriggerHip;
581 };
582
583 //! The HIP event host manual trigger support get trait specialization.
584 template<>
585 struct IsEventHostManualTriggerSupported<DevHipRt>
586 {
587 // TODO: there is no CUDA_VERSION in the HIP compiler path.
588 // TODO: there is a hipDeviceGetAttribute, but there is no pendant for
589 // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS.
590 ALPAKA_FN_HOST static auto isSupported(DevHipRt const&) -> bool
591 {
592 return false;
593 }
594 };
595 } // namespace trait
596} // namespace alpaka::test
597
598namespace alpaka::trait
599{
600 //! The CPU device event device get trait specialization.
601 template<>
602 struct GetDev<test::EventHostManualTriggerHip>
603 {
604 ALPAKA_FN_HOST static auto getDev(test::EventHostManualTriggerHip const& event) -> DevHipRt
605 {
606 return event.m_spEventImpl->m_dev;
607 }
608 };
609
610 //! The CPU device event test trait specialization.
611 template<>
612 struct IsComplete<test::EventHostManualTriggerHip>
613 {
614 //! \return If the event is not waiting within a queue (not enqueued or already handled).
615 ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerHip const& event) -> bool
616 {
617 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
618
619 return event.m_spEventImpl->m_bIsReady;
620 }
621 };
622
623 template<>
624 struct Enqueue<QueueHipRtNonBlocking, test::EventHostManualTriggerHip>
625 {
626 using TApi = alpaka::ApiHipRt;
627
628 ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, test::EventHostManualTriggerHip& event)
629 -> void
630 {
632
633 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
634 auto spEventImpl(event.m_spEventImpl);
635
636 // Setting the event state and enqueuing it has to be atomic.
637 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
638
639 // The event should not yet be enqueued.
640 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
641
642 // Set its state to enqueued.
643 spEventImpl->m_bIsReady = false;
644
645 // PGI Profiler`s User Guide:
646 // The following are known issues related to Events and Metrics:
647 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
648 // on host updates may hang. This includes synchronization between the host and
649 // the device build upon value-based CUDA queue synchronization APIs such as
650 // cuStreamWaitValue32() and cuStreamWriteValue32().
651 int32_t hostMem = 0;
652# if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
653 std::cerr << "[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not "
654 "available.\n";
655# endif
656 while(hostMem < 0x0101'0101)
657 {
658 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpyDtoHAsync(
659 &hostMem,
660 reinterpret_cast<hipDeviceptr_t>(event.m_spEventImpl->m_devMem),
661 sizeof(int32_t),
662 queue.getNativeHandle()));
663 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamSynchronize(queue.getNativeHandle()));
664 }
665 }
666 };
667
668 template<>
669 struct Enqueue<QueueHipRtBlocking, test::EventHostManualTriggerHip>
670 {
671 using TApi = alpaka::ApiHipRt;
672
673 ALPAKA_FN_HOST static auto enqueue(QueueHipRtBlocking& /* queue */, test::EventHostManualTriggerHip& event)
674 -> void
675 {
677
678 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
679 auto spEventImpl(event.m_spEventImpl);
680
681 // Setting the event state and enqueuing it has to be atomic.
682 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
683
684 // The event should not yet be enqueued.
685 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
686
687 // Set its state to enqueued.
688 spEventImpl->m_bIsReady = false;
689
690 // PGI Profiler`s User Guide:
691 // The following are known issues related to Events and Metrics:
692 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
693 // on host updates may hang. This includes synchronization between the host and
694 // the device build upon value-based HIP queue synchronization APIs such as
695 // cuStreamWaitValue32() and cuStreamWriteValue32().
696
697 // workaround for missing cuStreamWaitValue32 in HIP
698 std::uint32_t hmem = 0;
699 do
700 {
701 std::this_thread::sleep_for(std::chrono::milliseconds(10u));
703 hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault));
704 } while(hmem < 0x0101'0101u);
705 }
706 };
707} // namespace alpaka::trait
708#endif
709
710#ifdef ALPAKA_ACC_SYCL_ENABLED
711namespace alpaka
712{
713 namespace test
714 {
715 template<concepts::Tag TTag>
716 class EventHostManualTriggerSycl
717 {
718 public:
719 EventHostManualTriggerSycl(DevGenericSycl<TTag> const&)
720 {
721 }
722
723 auto trigger()
724 {
725 }
726 };
727
728 namespace trait
729 {
730 template<concepts::Tag TTag>
731 struct EventHostManualTriggerType<DevGenericSycl<TTag>>
732 {
733 using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
734 };
735
736 template<concepts::Tag TTag>
737 struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
738 {
739 ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag> const&) -> bool
740 {
741 return false;
742 }
743 };
744 } // namespace trait
745 } // namespace test
746
747 namespace trait
748 {
749 template<concepts::Tag TTag>
750 struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
751 {
752 ALPAKA_FN_HOST static auto enqueue(
753 QueueGenericSyclBlocking<TTag>& /* queue */,
754 test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
755 {
756 }
757 };
758
759 template<concepts::Tag TTag>
760 struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
761 {
762 ALPAKA_FN_HOST static auto enqueue(
763 QueueGenericSyclNonBlocking<TTag>& /* queue */,
764 test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
765 {
766 }
767 };
768
769 template<concepts::Tag TTag>
770 struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
771 {
772 ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl<TTag> const& /* event */) -> bool
773 {
774 return true;
775 }
776 };
777 } // namespace trait
778} // namespace alpaka
779#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_CUDA_DRV_CHECK(cmd)
CUDA driver error checking with log and exception.
Definition Cuda.hpp:54
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(cmd)
CUDA/HIP runtime error checking with log.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CPU device handle.
Definition DevCpu.hpp:56
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
Event that can be enqueued into a queue and can be triggered by the Host.
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const &rhs) const -> bool
Inequality comparison operator.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const &rhs) const -> bool
Equality comparison operator.
std::shared_ptr< cpu::detail::EventHostManualTriggerCpuImpl< TDev > > m_spEventImpl
ALPAKA_FN_HOST EventHostManualTriggerCpu(TDev const &dev)
Constructor.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const &rhs) const -> bool
ALPAKA_FN_HOST EventHostManualTriggerCuda(DevCudaRt const &dev)
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const &rhs) const -> bool
std::shared_ptr< uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl > m_spEventImpl
Event that can be enqueued into a queue and can be triggered by the Host.
auto operator=(EventHostManualTriggerCpuImpl const &) -> EventHostManualTriggerCpuImpl &=delete
EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const &other)=delete
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::size_t m_enqueueCount
The number of times this event has been enqueued.
std::mutex m_mutex
The mutex used to synchronize access to the event.
ALPAKA_FN_HOST EventHostManualTriggerCpuImpl(TDev dev) noexcept
Constructor.
std::condition_variable m_conditionVariable
The condition signaling the event completion.
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::mutex m_mutex
The mutex used to synchronize access to the event.
auto operator=(EventHostManualTriggerCudaImpl const &) -> EventHostManualTriggerCudaImpl &=delete
EventHostManualTriggerCudaImpl(EventHostManualTriggerCudaImpl const &)=delete
#define ALPAKA_FN_HOST
Definition Common.hpp:40
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition Complex.hpp:294
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition Complex.hpp:320
The test specifics.
Definition TestAccs.hpp:27
ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const &dev) -> bool
typename trait::EventHostManualTriggerType< TDev >::type EventHostManualTrigger
The event host manual trigger type trait alias template to remove the ::type.
auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) -> CUresult
The accelerator traits.
The alpaka accelerator library.
ALPAKA_FN_HOST auto isComplete(TEvent const &event) -> bool
Tests if the given event has already been completed.
Definition Traits.hpp:34
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
STL namespace.
static ALPAKA_FN_HOST auto isSupported(DevCpu const &) -> bool
static ALPAKA_FN_HOST auto isSupported(DevCudaRt const &dev) -> bool
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtNonBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsBlocking< TDev > &, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsNonBlocking< TDev > &queue, test::EventHostManualTriggerCpu< TDev > &event) -> void
The queue enqueue trait.
Definition Traits.hpp:27
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCpu< TDev > const &event) -> TDev
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCuda const &event) -> DevCudaRt
The device get trait.
Definition Traits.hpp:27
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCpu< TDev > const &event) -> bool
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCuda const &event) -> bool
The event tester trait.
Definition Traits.hpp:21