alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
EventHostManualTrigger.hpp
Go to the documentation of this file.
1/* Copyright 2024 Benjamin Worpitz, Matthias Werner, Jan Stephan, Jeffrey Kelling, Andrea Bocci,
2 * Bernhard Manfred Gruber, Aurora Perego
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
8#include "alpaka/alpaka.hpp"
9
10#include <condition_variable>
11#include <mutex>
12#include <utility>
13
14namespace alpaka::test
15{
16 namespace trait
17 {
18 template<typename TDev>
20
21 template<typename TDev>
23 } // namespace trait
24
25 //! The event host manual trigger type trait alias template to remove the ::type.
26 template<typename TDev>
28
29 template<typename TDev>
34
35 namespace cpu::detail
36 {
37 //! Event that can be enqueued into a queue and can be triggered by the Host.
38 template<class TDev = DevCpu>
40 {
41 public:
42 //! Constructor.
44 : m_dev(std::move(dev))
45 , m_mutex()
46 , m_enqueueCount(0u)
47 , m_bIsReady(true)
48 {
49 }
50
53
54 void trigger()
55 {
56 {
57 std::unique_lock<std::mutex> lock(m_mutex);
58 m_bIsReady = true;
59 }
60 m_conditionVariable.notify_one();
61 // Give alpaka time to update into the new state, process all events and tasks.
62 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
63 }
64
65 public:
66 TDev const m_dev; //!< The device this event is bound to.
67
68 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
69
70 mutable std::condition_variable m_conditionVariable; //!< The condition signaling the event completion.
71 std::size_t m_enqueueCount; //!< The number of times this event has been enqueued.
72
73 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
74 //!< completed).
75 };
76 } // namespace cpu::detail
77
78 //! Event that can be enqueued into a queue and can be triggered by the Host.
79 template<class TDev = DevCpu>
81 {
82 public:
83 //! Constructor.
85 : m_spEventImpl(std::make_shared<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>(dev))
86 {
87 }
88
89 //! Equality comparison operator.
91 {
92 return (m_spEventImpl == rhs.m_spEventImpl);
93 }
94
95 //! Inequality comparison operator.
97 {
98 return !((*this) == rhs);
99 }
100
101 void trigger()
102 {
103 m_spEventImpl->trigger();
104 // Give alpaka time to update into the new state, process all events and tasks.
105 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
106 }
107
108 public:
109 std::shared_ptr<cpu::detail::EventHostManualTriggerCpuImpl<TDev>> m_spEventImpl;
110 };
111
112 namespace trait
113 {
114 template<>
119
120 //! The CPU event host manual trigger support get trait specialization.
121 template<>
123 {
124 ALPAKA_FN_HOST static auto isSupported(DevCpu const&) -> bool
125 {
126 return true;
127 }
128 };
129 } // namespace trait
130} // namespace alpaka::test
131
132namespace alpaka::trait
133{
134 //! The CPU device event device get trait specialization.
135 template<typename TDev>
136 struct GetDev<test::EventHostManualTriggerCpu<TDev>>
137 {
138 //
140 {
141 return event.m_spEventImpl->m_dev;
142 }
143 };
144
145 //! The CPU device event test trait specialization.
146 template<typename TDev>
147 struct IsComplete<test::EventHostManualTriggerCpu<TDev>>
148 {
149 //! \return If the event is not waiting within a queue (not enqueued or already handled).
151 {
152 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
153
154 return event.m_spEventImpl->m_bIsReady;
155 }
156 };
157
158 template<typename TDev>
159 struct Enqueue<QueueGenericThreadsNonBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
160 {
161 //
165 {
167
168 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
169 auto spEventImpl = event.m_spEventImpl;
170
171 // Setting the event state and enqueuing it has to be atomic.
172 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
173
174 // The event should not yet be enqueued.
175 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
176
177 // Set its state to enqueued.
178 spEventImpl->m_bIsReady = false;
179
180 // Increment the enqueue counter. This is used to skip waits for events that had already been finished
181 // and re-enqueued which would lead to deadlocks.
182 ++spEventImpl->m_enqueueCount;
183
184 auto const enqueueCount = spEventImpl->m_enqueueCount;
185
186 // Enqueue a task that only resets the events flag if it is completed.
187 queue.m_spQueueImpl->m_workerThread.submit(
188 [spEventImpl, enqueueCount]() mutable
189 {
190 std::unique_lock<std::mutex> lk2(spEventImpl->m_mutex);
191 spEventImpl->m_conditionVariable.wait(
192 lk2,
193 [spEventImpl, enqueueCount]
194 { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
195 });
196 }
197 };
198
199 template<typename TDev>
200 struct Enqueue<QueueGenericThreadsBlocking<TDev>, test::EventHostManualTriggerCpu<TDev>>
201 {
202 //
206 {
208
209 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
210 auto spEventImpl = event.m_spEventImpl;
211
212 // Setting the event state and enqueuing it has to be atomic.
213 std::unique_lock<std::mutex> lk(spEventImpl->m_mutex);
214
215 // The event should not yet be enqueued.
216 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
217
218 // Set its state to enqueued.
219 spEventImpl->m_bIsReady = false;
220
221 // Increment the enqueue counter. This is used to skip waits for events that had already been finished
222 // and re-enqueued which would lead to deadlocks.
223 ++spEventImpl->m_enqueueCount;
224
225 auto const enqueueCount = spEventImpl->m_enqueueCount;
226
227 spEventImpl->m_conditionVariable.wait(
228 lk,
229 [spEventImpl, enqueueCount]
230 { return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
231 }
232 };
233} // namespace alpaka::trait
234
235#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
236
237# include "alpaka/core/Config.hpp"
238
239# include <cuda.h>
240
241# if !ALPAKA_LANG_CUDA && !defined(ALPAKA_HOST_ONLY)
242# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
243# endif
244
245# include "alpaka/core/Cuda.hpp"
246
247namespace alpaka::test
248{
249 namespace uniform_cuda_hip::detail
250 {
252 {
253 using TApi = alpaka::ApiCudaRt;
254
255 public:
257 : m_dev(dev)
258 , m_mutex()
259 , m_bIsReady(true)
260 {
262
263 // Set the current device.
265 // Allocate the buffer on this device.
266 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
267 // Initiate the memory set.
269 cudaMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
270 }
271
274
282
283 void trigger()
284 {
285 std::unique_lock<std::mutex> lock(m_mutex);
286 m_bIsReady = true;
287
288 // Set the current device.
290 // Initiate the memory set.
292 cudaMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
293 // Give alpaka time to update into the new state, process all events and tasks.
294 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
295 }
296
297 public:
298 DevCudaRt const m_dev; //!< The device this event is bound to.
299
300 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
301 void* m_devMem;
302
303 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
304 //!< completed).
305 };
306 } // namespace uniform_cuda_hip::detail
307
309 {
310 public:
312 : m_spEventImpl(std::make_shared<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>(dev))
313 {
315 }
316
318 {
319 return (m_spEventImpl == rhs.m_spEventImpl);
320 }
321
323 {
324 return !((*this) == rhs);
325 }
326
327 void trigger()
328 {
329 m_spEventImpl->trigger();
330 // Give alpaka time to update into the new state, process all events and tasks.
331 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
332 }
333
334 public:
335 std::shared_ptr<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl> m_spEventImpl;
336 };
337
338 namespace trait
339 {
340 template<>
345
346 //! The CPU event host manual trigger support get trait specialization.
347 template<>
349 {
350 ALPAKA_FN_HOST static auto isSupported([[maybe_unused]] DevCudaRt const& dev) -> bool
351 {
352# if CUDA_VERSION < 11070
353 int result = 0;
354 cuDeviceGetAttribute(&result, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, dev.getNativeHandle());
355 return result != 0;
356# else
357 return true; // Always enabled as of CUDA 11.7
358# endif
359 }
360 };
361 } // namespace trait
362} // namespace alpaka::test
363
364namespace alpaka::trait
365{
366 namespace detail
367 {
368 // TODO: Replace with cuStreamWaitValue32 once support for CUDA < 12 is dropped.
369 inline auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags)
370 -> CUresult
371 {
372 // NVIDIA introduced a new stream memory ops API with CUDA 11.7 (called v2). The corresponding CUDA
373 // functions were suffixed with `_v2`. With CUDA 12.0 v1 of the API was removed and the `_v2` removed
374 // from the new functions. So CUDA <= 11.6 and CUDA >= 12.0 share the same function signature but
375 // internally do different things.
376# if(CUDA_VERSION < 11070) || (CUDA_VERSION >= 12000)
377 return cuStreamWaitValue32(stream, addr, value, flags);
378# else
379 return cuStreamWaitValue32_v2(stream, addr, value, flags);
380# endif
381 }
382 } // namespace detail
383
384 //! The CPU device event device get trait specialization.
385 template<>
386 struct GetDev<test::EventHostManualTriggerCuda>
387 {
389 {
390 return event.m_spEventImpl->m_dev;
391 }
392 };
393
394 //! The CPU device event test trait specialization.
395 template<>
396 struct IsComplete<test::EventHostManualTriggerCuda>
397 {
398 //! \return If the event is not waiting within a queue (not enqueued or already handled).
400 {
401 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
402
403 return event.m_spEventImpl->m_bIsReady;
404 }
405 };
406
407 template<>
408 struct Enqueue<QueueCudaRtNonBlocking, test::EventHostManualTriggerCuda>
409 {
411 -> void
412 {
414
415 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
416 auto spEventImpl(event.m_spEventImpl);
417
418 // Setting the event state and enqueuing it has to be atomic.
419 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
420
421 // The event should not yet be enqueued.
422 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
423
424 // Set its state to enqueued.
425 spEventImpl->m_bIsReady = false;
426
427 // PGI Profiler`s User Guide:
428 // The following are known issues related to Events and Metrics:
429 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
430 // on host updates may hang. This includes synchronization between the host and
431 // the device build upon value-based CUDA queue synchronization APIs such as
432 // cuStreamWaitValue32() and cuStreamWriteValue32().
434 static_cast<CUstream>(queue.getNativeHandle()),
435 reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
436 0x0101'0101u,
437 CU_STREAM_WAIT_VALUE_GEQ));
438 }
439 };
440
441 template<>
442 struct Enqueue<QueueCudaRtBlocking, test::EventHostManualTriggerCuda>
443 {
445 {
447
448 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
449 auto spEventImpl(event.m_spEventImpl);
450
451 // Setting the event state and enqueuing it has to be atomic.
452 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
453
454 // The event should not yet be enqueued.
455 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
456
457 // Set its state to enqueued.
458 spEventImpl->m_bIsReady = false;
459
460 // PGI Profiler`s User Guide:
461 // The following are known issues related to Events and Metrics:
462 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
463 // on host updates may hang. This includes synchronization between the host and
464 // the device build upon value-based CUDA queue synchronization APIs such as
465 // cuStreamWaitValue32() and cuStreamWriteValue32().
467 static_cast<CUstream>(queue.getNativeHandle()),
468 reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
469 0x0101'0101u,
470 CU_STREAM_WAIT_VALUE_GEQ));
471 }
472 };
473} // namespace alpaka::trait
474#endif
475
476
477#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
478
479# if ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0) && ALPAKA_COMP_HIP < ALPAKA_VERSION_NUMBER(7, 0, 0)
480# define HIP_ENABLE_WARP_SYNC_BUILTINS
481# endif
482# include <hip/hip_runtime.h>
483
484# if !ALPAKA_LANG_HIP && !defined(ALPAKA_HOST_ONLY)
485# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
486# endif
487
488# include "alpaka/core/Hip.hpp"
489
490namespace alpaka::test
491{
492 namespace hip::detail
493 {
494 class EventHostManualTriggerHipImpl final
495 {
496 using TApi = alpaka::ApiHipRt;
497
498 public:
499 ALPAKA_FN_HOST EventHostManualTriggerHipImpl(DevHipRt const& dev) : m_dev(dev), m_mutex(), m_bIsReady(true)
500 {
502
503 // Set the current device.
504 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
505 // Allocate the buffer on this device.
506 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMalloc(&m_devMem, static_cast<size_t>(sizeof(int32_t))));
507 // Initiate the memory set.
509 hipMemset(m_devMem, static_cast<int>(0u), static_cast<size_t>(sizeof(int32_t))));
510 }
511
512 EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl const&) = delete;
513 auto operator=(EventHostManualTriggerHipImpl const&) -> EventHostManualTriggerHipImpl& = delete;
514
515 ALPAKA_FN_HOST ~EventHostManualTriggerHipImpl()
516 {
518
519 // Free the buffer.
521 }
522
523 void trigger()
524 {
525 std::unique_lock<std::mutex> lock(m_mutex);
526 m_bIsReady = true;
527
528 // Set the current device.
529 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipSetDevice(m_dev.getNativeHandle()));
530 // Initiate the memory set.
532 hipMemset(m_devMem, static_cast<int>(1u), static_cast<size_t>(sizeof(int32_t))));
533 // Give alpaka time to update into the new state, process all events and tasks.
534 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
535 }
536
537 public:
538 DevHipRt const m_dev; //!< The device this event is bound to.
539
540 mutable std::mutex m_mutex; //!< The mutex used to synchronize access to the event.
541 void* m_devMem;
542
543 bool m_bIsReady; //!< If the event is not waiting within a queue (not enqueued or already
544 //!< completed).
545 };
546 } // namespace hip::detail
547
548 class EventHostManualTriggerHip final
549 {
550 public:
551 ALPAKA_FN_HOST EventHostManualTriggerHip(DevHipRt const& dev)
552 : m_spEventImpl(std::make_shared<hip::detail::EventHostManualTriggerHipImpl>(dev))
553 {
555 }
556
557 ALPAKA_FN_HOST auto operator==(EventHostManualTriggerHip const& rhs) const -> bool
558 {
559 return (m_spEventImpl == rhs.m_spEventImpl);
560 }
561
562 ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerHip const& rhs) const -> bool
563 {
564 return !((*this) == rhs);
565 }
566
567 void trigger()
568 {
569 m_spEventImpl->trigger();
570 // Give alpaka time to update into the new state, process all events and tasks.
571 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
572 }
573
574 public:
575 std::shared_ptr<hip::detail::EventHostManualTriggerHipImpl> m_spEventImpl;
576 };
577
578 namespace trait
579 {
580 template<>
581 struct EventHostManualTriggerType<DevHipRt>
582 {
583 using type = test::EventHostManualTriggerHip;
584 };
585
586 //! The HIP event host manual trigger support get trait specialization.
587 template<>
588 struct IsEventHostManualTriggerSupported<DevHipRt>
589 {
590 // TODO: there is no CUDA_VERSION in the HIP compiler path.
591 // TODO: there is a hipDeviceGetAttribute, but there is no pendant for
592 // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS.
593 ALPAKA_FN_HOST static auto isSupported(DevHipRt const&) -> bool
594 {
595 return false;
596 }
597 };
598 } // namespace trait
599} // namespace alpaka::test
600
601namespace alpaka::trait
602{
603 //! The CPU device event device get trait specialization.
604 template<>
605 struct GetDev<test::EventHostManualTriggerHip>
606 {
607 ALPAKA_FN_HOST static auto getDev(test::EventHostManualTriggerHip const& event) -> DevHipRt
608 {
609 return event.m_spEventImpl->m_dev;
610 }
611 };
612
613 //! The CPU device event test trait specialization.
614 template<>
615 struct IsComplete<test::EventHostManualTriggerHip>
616 {
617 //! \return If the event is not waiting within a queue (not enqueued or already handled).
618 ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerHip const& event) -> bool
619 {
620 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
621
622 return event.m_spEventImpl->m_bIsReady;
623 }
624 };
625
626 template<>
627 struct Enqueue<QueueHipRtNonBlocking, test::EventHostManualTriggerHip>
628 {
629 using TApi = alpaka::ApiHipRt;
630
631 ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, test::EventHostManualTriggerHip& event)
632 -> void
633 {
635
636 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
637 auto spEventImpl(event.m_spEventImpl);
638
639 // Setting the event state and enqueuing it has to be atomic.
640 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
641
642 // The event should not yet be enqueued.
643 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
644
645 // Set its state to enqueued.
646 spEventImpl->m_bIsReady = false;
647
648 // PGI Profiler`s User Guide:
649 // The following are known issues related to Events and Metrics:
650 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
651 // on host updates may hang. This includes synchronization between the host and
652 // the device build upon value-based CUDA queue synchronization APIs such as
653 // cuStreamWaitValue32() and cuStreamWriteValue32().
654 int32_t hostMem = 0;
655# if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
656 std::cerr << "[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not "
657 "available.\n";
658# endif
659 while(hostMem < 0x0101'0101)
660 {
661 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpyDtoHAsync(
662 &hostMem,
663 reinterpret_cast<hipDeviceptr_t>(event.m_spEventImpl->m_devMem),
664 sizeof(int32_t),
665 queue.getNativeHandle()));
666 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamSynchronize(queue.getNativeHandle()));
667 }
668 }
669 };
670
671 template<>
672 struct Enqueue<QueueHipRtBlocking, test::EventHostManualTriggerHip>
673 {
674 using TApi = alpaka::ApiHipRt;
675
676 ALPAKA_FN_HOST static auto enqueue(QueueHipRtBlocking& /* queue */, test::EventHostManualTriggerHip& event)
677 -> void
678 {
680
681 // Copy the shared pointer to ensure that the event implementation is alive as long as it is enqueued.
682 auto spEventImpl(event.m_spEventImpl);
683
684 // Setting the event state and enqueuing it has to be atomic.
685 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
686
687 // The event should not yet be enqueued.
688 ALPAKA_ASSERT(spEventImpl->m_bIsReady);
689
690 // Set its state to enqueued.
691 spEventImpl->m_bIsReady = false;
692
693 // PGI Profiler`s User Guide:
694 // The following are known issues related to Events and Metrics:
695 // * In event or metric profiling, kernel launches are blocking. Thus kernels waiting
696 // on host updates may hang. This includes synchronization between the host and
697 // the device build upon value-based HIP queue synchronization APIs such as
698 // cuStreamWaitValue32() and cuStreamWriteValue32().
699
700 // workaround for missing cuStreamWaitValue32 in HIP
701 std::uint32_t hmem = 0;
702 do
703 {
704 std::this_thread::sleep_for(std::chrono::milliseconds(10u));
706 hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault));
707 } while(hmem < 0x0101'0101u);
708 }
709 };
710} // namespace alpaka::trait
711#endif
712
713#ifdef ALPAKA_ACC_SYCL_ENABLED
714namespace alpaka
715{
716 namespace test
717 {
718 template<concepts::Tag TTag>
719 class EventHostManualTriggerSycl
720 {
721 public:
722 EventHostManualTriggerSycl(DevGenericSycl<TTag> const&)
723 {
724 }
725
726 auto trigger()
727 {
728 }
729 };
730
731 namespace trait
732 {
733 template<concepts::Tag TTag>
734 struct EventHostManualTriggerType<DevGenericSycl<TTag>>
735 {
736 using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
737 };
738
739 template<concepts::Tag TTag>
740 struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
741 {
742 ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag> const&) -> bool
743 {
744 return false;
745 }
746 };
747 } // namespace trait
748 } // namespace test
749
750 namespace trait
751 {
752 template<concepts::Tag TTag>
753 struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
754 {
755 ALPAKA_FN_HOST static auto enqueue(
756 QueueGenericSyclBlocking<TTag>& /* queue */,
757 test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
758 {
759 }
760 };
761
762 template<concepts::Tag TTag>
763 struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
764 {
765 ALPAKA_FN_HOST static auto enqueue(
766 QueueGenericSyclNonBlocking<TTag>& /* queue */,
767 test::EventHostManualTriggerSycl<TTag>& /* event */) -> void
768 {
769 }
770 };
771
772 template<concepts::Tag TTag>
773 struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
774 {
775 ALPAKA_FN_HOST static auto isComplete(test::EventHostManualTriggerSycl<TTag> const& /* event */) -> bool
776 {
777 return true;
778 }
779 };
780 } // namespace trait
781} // namespace alpaka
782#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_CUDA_DRV_CHECK(cmd)
CUDA driver error checking with log and exception.
Definition Cuda.hpp:54
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(cmd)
CUDA/HIP runtime error checking with log.
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CPU device handle.
Definition DevCpu.hpp:56
The CUDA/HIP RT device handle.
auto getNativeHandle() const noexcept -> int
Event that can be enqueued into a queue and can be triggered by the Host.
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const &rhs) const -> bool
Inequality comparison operator.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const &rhs) const -> bool
Equality comparison operator.
std::shared_ptr< cpu::detail::EventHostManualTriggerCpuImpl< TDev > > m_spEventImpl
ALPAKA_FN_HOST EventHostManualTriggerCpu(TDev const &dev)
Constructor.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const &rhs) const -> bool
ALPAKA_FN_HOST EventHostManualTriggerCuda(DevCudaRt const &dev)
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const &rhs) const -> bool
std::shared_ptr< uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl > m_spEventImpl
Event that can be enqueued into a queue and can be triggered by the Host.
auto operator=(EventHostManualTriggerCpuImpl const &) -> EventHostManualTriggerCpuImpl &=delete
EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const &other)=delete
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::size_t m_enqueueCount
The number of times this event has been enqueued.
std::mutex m_mutex
The mutex used to synchronize access to the event.
ALPAKA_FN_HOST EventHostManualTriggerCpuImpl(TDev dev) noexcept
Constructor.
std::condition_variable m_conditionVariable
The condition signaling the event completion.
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::mutex m_mutex
The mutex used to synchronize access to the event.
auto operator=(EventHostManualTriggerCudaImpl const &) -> EventHostManualTriggerCudaImpl &=delete
EventHostManualTriggerCudaImpl(EventHostManualTriggerCudaImpl const &)=delete
#define ALPAKA_FN_HOST
Definition Common.hpp:43
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
Definition Complex.hpp:294
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
Definition Complex.hpp:320
The test specifics.
Definition TestAccs.hpp:27
ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const &dev) -> bool
typename trait::EventHostManualTriggerType< TDev >::type EventHostManualTrigger
The event host manual trigger type trait alias template to remove the ::type.
auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) -> CUresult
The accelerator traits.
The alpaka accelerator library.
ALPAKA_FN_HOST auto isComplete(TEvent const &event) -> bool
Tests if the given event has already been completed.
Definition Traits.hpp:34
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition Traits.hpp:47
STL namespace.
static ALPAKA_FN_HOST auto isSupported(DevCpu const &) -> bool
static ALPAKA_FN_HOST auto isSupported(DevCudaRt const &dev) -> bool
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtNonBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsBlocking< TDev > &, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsNonBlocking< TDev > &queue, test::EventHostManualTriggerCpu< TDev > &event) -> void
The queue enqueue trait.
Definition Traits.hpp:27
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCpu< TDev > const &event) -> TDev
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCuda const &event) -> DevCudaRt
The device get trait.
Definition Traits.hpp:27
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCpu< TDev > const &event) -> bool
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCuda const &event) -> bool
The event tester trait.
Definition Traits.hpp:21