10#include <condition_variable>
18 template<
typename TDev>
21 template<
typename TDev>
26 template<
typename TDev>
29 template<
typename TDev>
38 template<
class TDev = DevCpu>
44 :
m_dev(std::move(dev))
57 std::unique_lock<std::mutex> lock(
m_mutex);
62 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
79 template<
class TDev = DevCpu>
85 :
m_spEventImpl(
std::make_shared<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>(dev))
98 return !((*this) == rhs);
105 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
109 std::shared_ptr<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>
m_spEventImpl;
135 template<
typename TDev>
136 struct GetDev<test::EventHostManualTriggerCpu<TDev>>
141 return event.m_spEventImpl->m_dev;
146 template<
typename TDev>
152 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
154 return event.m_spEventImpl->m_bIsReady;
158 template<
typename TDev>
169 auto spEventImpl =
event.m_spEventImpl;
172 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
178 spEventImpl->m_bIsReady =
false;
182 ++spEventImpl->m_enqueueCount;
184 auto const enqueueCount = spEventImpl->m_enqueueCount;
187 queue.m_spQueueImpl->m_workerThread.submit(
188 [spEventImpl, enqueueCount]()
mutable
190 std::unique_lock<std::mutex> lk2(spEventImpl->m_mutex);
191 spEventImpl->m_conditionVariable.wait(
193 [spEventImpl, enqueueCount]
194 {
return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
199 template<
typename TDev>
210 auto spEventImpl =
event.m_spEventImpl;
213 std::unique_lock<std::mutex> lk(spEventImpl->m_mutex);
219 spEventImpl->m_bIsReady =
false;
223 ++spEventImpl->m_enqueueCount;
225 auto const enqueueCount = spEventImpl->m_enqueueCount;
227 spEventImpl->m_conditionVariable.wait(
229 [spEventImpl, enqueueCount]
230 {
return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
235#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
241# if !ALPAKA_LANG_CUDA && !defined(ALPAKA_HOST_ONLY)
242# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
249 namespace uniform_cuda_hip::detail
269 cudaMemset(
m_devMem,
static_cast<int>(0u),
static_cast<size_t>(
sizeof(int32_t))));
285 std::unique_lock<std::mutex> lock(
m_mutex);
292 cudaMemset(
m_devMem,
static_cast<int>(1u),
static_cast<size_t>(
sizeof(int32_t))));
294 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
312 :
m_spEventImpl(
std::make_shared<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>(dev))
324 return !((*this) == rhs);
331 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
335 std::shared_ptr<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>
m_spEventImpl;
352# if CUDA_VERSION < 11070
354 cuDeviceGetAttribute(&result, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, dev.getNativeHandle());
369 inline auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value,
unsigned int flags)
376# if(CUDA_VERSION < 11070) || (CUDA_VERSION >= 12000)
377 return cuStreamWaitValue32(stream, addr, value, flags);
379 return cuStreamWaitValue32_v2(stream, addr, value, flags);
386 struct GetDev<test::EventHostManualTriggerCuda>
390 return event.m_spEventImpl->m_dev;
401 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
403 return event.m_spEventImpl->m_bIsReady;
416 auto spEventImpl(event.m_spEventImpl);
419 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
425 spEventImpl->m_bIsReady =
false;
434 static_cast<CUstream
>(queue.getNativeHandle()),
435 reinterpret_cast<CUdeviceptr
>(event.m_spEventImpl->m_devMem),
437 CU_STREAM_WAIT_VALUE_GEQ));
449 auto spEventImpl(event.m_spEventImpl);
452 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
458 spEventImpl->m_bIsReady =
false;
467 static_cast<CUstream
>(queue.getNativeHandle()),
468 reinterpret_cast<CUdeviceptr
>(event.m_spEventImpl->m_devMem),
470 CU_STREAM_WAIT_VALUE_GEQ));
477#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
479# if ALPAKA_COMP_HIP >= ALPAKA_VERSION_NUMBER(6, 2, 0) && ALPAKA_COMP_HIP < ALPAKA_VERSION_NUMBER(7, 0, 0)
480# define HIP_ENABLE_WARP_SYNC_BUILTINS
482# include <hip/hip_runtime.h>
484# if !ALPAKA_LANG_HIP && !defined(ALPAKA_HOST_ONLY)
485# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
492 namespace hip::detail
494 class EventHostManualTriggerHipImpl final
496 using TApi = alpaka::ApiHipRt;
499 ALPAKA_FN_HOST EventHostManualTriggerHipImpl(DevHipRt
const& dev) : m_dev(dev), m_mutex(), m_bIsReady(true)
509 hipMemset(m_devMem,
static_cast<int>(0u),
static_cast<size_t>(
sizeof(int32_t))));
512 EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl
const&) =
delete;
513 auto operator=(EventHostManualTriggerHipImpl
const&) -> EventHostManualTriggerHipImpl& =
delete;
525 std::unique_lock<std::mutex> lock(m_mutex);
532 hipMemset(m_devMem,
static_cast<int>(1u),
static_cast<size_t>(
sizeof(int32_t))));
534 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
538 DevHipRt
const m_dev;
540 mutable std::mutex m_mutex;
548 class EventHostManualTriggerHip final
552 : m_spEventImpl(
std::make_shared<hip::detail::EventHostManualTriggerHipImpl>(dev))
559 return (m_spEventImpl == rhs.m_spEventImpl);
564 return !((*this) == rhs);
569 m_spEventImpl->trigger();
571 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
575 std::shared_ptr<hip::detail::EventHostManualTriggerHipImpl> m_spEventImpl;
581 struct EventHostManualTriggerType<DevHipRt>
583 using type = test::EventHostManualTriggerHip;
588 struct IsEventHostManualTriggerSupported<DevHipRt>
605 struct GetDev<test::EventHostManualTriggerHip>
609 return event.m_spEventImpl->m_dev;
615 struct IsComplete<test::EventHostManualTriggerHip>
620 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
622 return event.m_spEventImpl->m_bIsReady;
627 struct Enqueue<QueueHipRtNonBlocking, test::EventHostManualTriggerHip>
629 using TApi = alpaka::ApiHipRt;
631 ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, test::EventHostManualTriggerHip& event)
637 auto spEventImpl(event.m_spEventImpl);
640 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
646 spEventImpl->m_bIsReady =
false;
655# if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
656 std::cerr <<
"[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not "
659 while(hostMem < 0x0101'0101)
663 reinterpret_cast<hipDeviceptr_t
>(event.m_spEventImpl->m_devMem),
665 queue.getNativeHandle()));
672 struct Enqueue<QueueHipRtBlocking, test::EventHostManualTriggerHip>
674 using TApi = alpaka::ApiHipRt;
682 auto spEventImpl(event.m_spEventImpl);
685 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
691 spEventImpl->m_bIsReady =
false;
701 std::uint32_t hmem = 0;
704 std::this_thread::sleep_for(std::chrono::milliseconds(10u));
706 hipMemcpy(&hmem, event.m_spEventImpl->m_devMem,
sizeof(std::uint32_t), hipMemcpyDefault));
707 }
while(hmem < 0x0101'0101u);
713#ifdef ALPAKA_ACC_SYCL_ENABLED
718 template<concepts::Tag TTag>
719 class EventHostManualTriggerSycl
722 EventHostManualTriggerSycl(DevGenericSycl<TTag>
const&)
733 template<concepts::Tag TTag>
734 struct EventHostManualTriggerType<DevGenericSycl<TTag>>
736 using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
739 template<concepts::Tag TTag>
740 struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
742 ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag>
const&) ->
bool
752 template<concepts::Tag TTag>
753 struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
756 QueueGenericSyclBlocking<TTag>& ,
757 test::EventHostManualTriggerSycl<TTag>& ) ->
void
762 template<concepts::Tag TTag>
763 struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
766 QueueGenericSyclNonBlocking<TTag>& ,
767 test::EventHostManualTriggerSycl<TTag>& ) ->
void
772 template<concepts::Tag TTag>
773 struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
#define ALPAKA_CUDA_DRV_CHECK(cmd)
CUDA driver error checking with log and exception.
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Event that can be enqueued into a queue and can be triggered by the Host.
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const &rhs) const -> bool
Inequality comparison operator.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const &rhs) const -> bool
Equality comparison operator.
std::shared_ptr< cpu::detail::EventHostManualTriggerCpuImpl< TDev > > m_spEventImpl
ALPAKA_FN_HOST EventHostManualTriggerCpu(TDev const &dev)
Constructor.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const &rhs) const -> bool
ALPAKA_FN_HOST EventHostManualTriggerCuda(DevCudaRt const &dev)
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const &rhs) const -> bool
std::shared_ptr< uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl > m_spEventImpl
Event that can be enqueued into a queue and can be triggered by the Host.
auto operator=(EventHostManualTriggerCpuImpl const &) -> EventHostManualTriggerCpuImpl &=delete
TDev const m_dev
The device this event is bound to.
EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const &other)=delete
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::size_t m_enqueueCount
The number of times this event has been enqueued.
std::mutex m_mutex
The mutex used to synchronize access to the event.
ALPAKA_FN_HOST EventHostManualTriggerCpuImpl(TDev dev) noexcept
Constructor.
std::condition_variable m_conditionVariable
The condition signaling the event completion.
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const &dev) -> bool
typename trait::EventHostManualTriggerType< TDev >::type EventHostManualTrigger
The event host manual trigger type trait alias template to remove the ::type.
auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) -> CUresult
The alpaka accelerator library.
ALPAKA_FN_HOST auto isComplete(TEvent const &event) -> bool
Tests if the given event has already been completed.
ALPAKA_FN_HOST auto getDev(T const &t)
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
static ALPAKA_FN_HOST auto isSupported(DevCpu const &) -> bool
static ALPAKA_FN_HOST auto isSupported(DevCudaRt const &dev) -> bool
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtNonBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsBlocking< TDev > &, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsNonBlocking< TDev > &queue, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCpu< TDev > const &event) -> TDev
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCuda const &event) -> DevCudaRt
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCpu< TDev > const &event) -> bool
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCuda const &event) -> bool