10 #include <condition_variable>
18 template<
typename TDev>
21 template<
typename TDev>
26 template<
typename TDev>
29 template<
typename TDev>
38 template<
class TDev = DevCpu>
44 :
m_dev(std::move(dev))
57 std::unique_lock<std::mutex> lock(
m_mutex);
62 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
79 template<
class TDev = DevCpu>
85 :
m_spEventImpl(std::make_shared<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>(dev))
98 return !((*this) == rhs);
105 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
109 std::shared_ptr<cpu::detail::EventHostManualTriggerCpuImpl<TDev>>
m_spEventImpl;
135 template<
typename TDev>
136 struct GetDev<test::EventHostManualTriggerCpu<TDev>>
141 return event.m_spEventImpl->m_dev;
146 template<
typename TDev>
152 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
154 return event.m_spEventImpl->m_bIsReady;
158 template<
typename TDev>
169 auto spEventImpl =
event.m_spEventImpl;
172 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
178 spEventImpl->m_bIsReady =
false;
182 ++spEventImpl->m_enqueueCount;
184 auto const enqueueCount = spEventImpl->m_enqueueCount;
187 queue.m_spQueueImpl->m_workerThread.submit(
188 [spEventImpl, enqueueCount]()
mutable
190 std::unique_lock<std::mutex> lk2(spEventImpl->m_mutex);
191 spEventImpl->m_conditionVariable.wait(
193 [spEventImpl, enqueueCount]
194 {
return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
199 template<
typename TDev>
210 auto spEventImpl =
event.m_spEventImpl;
213 std::unique_lock<std::mutex> lk(spEventImpl->m_mutex);
219 spEventImpl->m_bIsReady =
false;
223 ++spEventImpl->m_enqueueCount;
225 auto const enqueueCount = spEventImpl->m_enqueueCount;
227 spEventImpl->m_conditionVariable.wait(
229 [spEventImpl, enqueueCount]
230 {
return (enqueueCount != spEventImpl->m_enqueueCount) || spEventImpl->m_bIsReady; });
235 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
241 # if !BOOST_LANG_CUDA && !defined(ALPAKA_HOST_ONLY)
242 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
249 namespace uniform_cuda_hip::detail
269 cudaMemset(
m_devMem,
static_cast<int>(0u),
static_cast<size_t>(
sizeof(int32_t))));
285 std::unique_lock<std::mutex> lock(
m_mutex);
292 cudaMemset(
m_devMem,
static_cast<int>(1u),
static_cast<size_t>(
sizeof(int32_t))));
294 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
312 :
m_spEventImpl(std::make_shared<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>(dev))
324 return !((*this) == rhs);
331 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
335 std::shared_ptr<uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl>
m_spEventImpl;
352 # if CUDA_VERSION < 11070
354 cuDeviceGetAttribute(&result, CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS, dev.getNativeHandle());
369 inline auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value,
unsigned int flags)
376 # if(CUDA_VERSION < 11070) || (CUDA_VERSION >= 12000)
377 return cuStreamWaitValue32(stream, addr, value, flags);
379 return cuStreamWaitValue32_v2(stream, addr, value, flags);
386 struct GetDev<test::EventHostManualTriggerCuda>
390 return event.m_spEventImpl->m_dev;
401 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
403 return event.m_spEventImpl->m_bIsReady;
416 auto spEventImpl(event.m_spEventImpl);
419 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
425 spEventImpl->m_bIsReady =
false;
434 static_cast<CUstream
>(queue.getNativeHandle()),
435 reinterpret_cast<CUdeviceptr
>(event.m_spEventImpl->m_devMem),
437 CU_STREAM_WAIT_VALUE_GEQ));
449 auto spEventImpl(event.m_spEventImpl);
452 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
458 spEventImpl->m_bIsReady =
false;
467 static_cast<CUstream
>(queue.getNativeHandle()),
468 reinterpret_cast<CUdeviceptr
>(event.m_spEventImpl->m_devMem),
470 CU_STREAM_WAIT_VALUE_GEQ));
477 #ifdef ALPAKA_ACC_GPU_HIP_ENABLED
479 # include <hip/hip_runtime.h>
481 # if !BOOST_LANG_HIP && !defined(ALPAKA_HOST_ONLY)
482 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
489 namespace hip::detail
491 class EventHostManualTriggerHipImpl final
493 using TApi = alpaka::ApiHipRt;
496 ALPAKA_FN_HOST EventHostManualTriggerHipImpl(DevHipRt
const& dev) : m_dev(dev), m_mutex(), m_bIsReady(true)
506 hipMemset(m_devMem,
static_cast<int>(0u),
static_cast<size_t>(
sizeof(int32_t))));
509 EventHostManualTriggerHipImpl(EventHostManualTriggerHipImpl
const&) =
delete;
510 auto operator=(EventHostManualTriggerHipImpl
const&) -> EventHostManualTriggerHipImpl& =
delete;
522 std::unique_lock<std::mutex> lock(m_mutex);
529 hipMemset(m_devMem,
static_cast<int>(1u),
static_cast<size_t>(
sizeof(int32_t))));
531 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
535 DevHipRt
const m_dev;
537 mutable std::mutex m_mutex;
545 class EventHostManualTriggerHip final
549 : m_spEventImpl(std::make_shared<hip::detail::EventHostManualTriggerHipImpl>(dev))
556 return (m_spEventImpl == rhs.m_spEventImpl);
561 return !((*this) == rhs);
566 m_spEventImpl->trigger();
568 std::this_thread::sleep_for(std::chrono::milliseconds(200u));
572 std::shared_ptr<hip::detail::EventHostManualTriggerHipImpl> m_spEventImpl;
578 struct EventHostManualTriggerType<DevHipRt>
580 using type = test::EventHostManualTriggerHip;
585 struct IsEventHostManualTriggerSupported<DevHipRt>
602 struct GetDev<test::EventHostManualTriggerHip>
606 return event.m_spEventImpl->m_dev;
612 struct IsComplete<test::EventHostManualTriggerHip>
617 std::lock_guard<std::mutex> lk(event.m_spEventImpl->m_mutex);
619 return event.m_spEventImpl->m_bIsReady;
624 struct Enqueue<QueueHipRtNonBlocking, test::EventHostManualTriggerHip>
626 using TApi = alpaka::ApiHipRt;
628 ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, test::EventHostManualTriggerHip& event)
634 auto spEventImpl(event.m_spEventImpl);
637 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
643 spEventImpl->m_bIsReady =
false;
652 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
653 std::cerr <<
"[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not "
656 while(hostMem < 0x0101'0101)
660 reinterpret_cast<hipDeviceptr_t
>(event.m_spEventImpl->m_devMem),
662 queue.getNativeHandle()));
669 struct Enqueue<QueueHipRtBlocking, test::EventHostManualTriggerHip>
671 using TApi = alpaka::ApiHipRt;
679 auto spEventImpl(event.m_spEventImpl);
682 std::lock_guard<std::mutex> lk(spEventImpl->m_mutex);
688 spEventImpl->m_bIsReady =
false;
698 std::uint32_t hmem = 0;
701 std::this_thread::sleep_for(std::chrono::milliseconds(10u));
703 hipMemcpy(&hmem, event.m_spEventImpl->m_devMem,
sizeof(std::uint32_t), hipMemcpyDefault));
704 }
while(hmem < 0x0101'0101u);
710 #ifdef ALPAKA_ACC_SYCL_ENABLED
715 template<concepts::Tag TTag>
716 class EventHostManualTriggerSycl
719 EventHostManualTriggerSycl(DevGenericSycl<TTag>
const&)
730 template<concepts::Tag TTag>
731 struct EventHostManualTriggerType<DevGenericSycl<TTag>>
733 using type = alpaka::test::EventHostManualTriggerSycl<TTag>;
736 template<concepts::Tag TTag>
737 struct IsEventHostManualTriggerSupported<DevGenericSycl<TTag>>
739 ALPAKA_FN_HOST static auto isSupported(DevGenericSycl<TTag>
const&) ->
bool
749 template<concepts::Tag TTag>
750 struct Enqueue<QueueGenericSyclBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
753 QueueGenericSyclBlocking<TTag>& ,
754 test::EventHostManualTriggerSycl<TTag>& ) ->
void
759 template<concepts::Tag TTag>
760 struct Enqueue<QueueGenericSyclNonBlocking<TTag>, test::EventHostManualTriggerSycl<TTag>>
763 QueueGenericSyclNonBlocking<TTag>& ,
764 test::EventHostManualTriggerSycl<TTag>& ) ->
void
769 template<concepts::Tag TTag>
770 struct IsComplete<test::EventHostManualTriggerSycl<TTag>>
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
#define ALPAKA_CUDA_DRV_CHECK(cmd)
CUDA driver error checking with log and exception.
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Event that can be enqueued into a queue and can be triggered by the Host.
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCpu const &rhs) const -> bool
Inequality comparison operator.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCpu const &rhs) const -> bool
Equality comparison operator.
std::shared_ptr< cpu::detail::EventHostManualTriggerCpuImpl< TDev > > m_spEventImpl
ALPAKA_FN_HOST EventHostManualTriggerCpu(TDev const &dev)
Constructor.
ALPAKA_FN_HOST auto operator==(EventHostManualTriggerCuda const &rhs) const -> bool
ALPAKA_FN_HOST EventHostManualTriggerCuda(DevCudaRt const &dev)
ALPAKA_FN_HOST auto operator!=(EventHostManualTriggerCuda const &rhs) const -> bool
std::shared_ptr< uniform_cuda_hip::detail::EventHostManualTriggerCudaImpl > m_spEventImpl
Event that can be enqueued into a queue and can be triggered by the Host.
auto operator=(EventHostManualTriggerCpuImpl const &) -> EventHostManualTriggerCpuImpl &=delete
TDev const m_dev
The device this event is bound to.
EventHostManualTriggerCpuImpl(EventHostManualTriggerCpuImpl const &other)=delete
bool m_bIsReady
If the event is not waiting within a queue (not enqueued or already completed).
std::size_t m_enqueueCount
The number of times this event has been enqueued.
std::mutex m_mutex
The mutex used to synchronize access to the event.
ALPAKA_FN_HOST EventHostManualTriggerCpuImpl(TDev dev) noexcept
Constructor.
std::condition_variable m_conditionVariable
The condition signaling the event completion.
constexpr ALPAKA_FN_HOST_ACC bool operator==(Complex< T > const &lhs, Complex< T > const &rhs)
Equality of two complex numbers.
constexpr ALPAKA_FN_HOST_ACC bool operator!=(Complex< T > const &lhs, Complex< T > const &rhs)
Inequality of two complex numbers.
ALPAKA_FN_HOST auto isEventHostManualTriggerSupported(TDev const &dev) -> bool
typename trait::EventHostManualTriggerType< TDev >::type EventHostManualTrigger
The event host manual trigger type trait alias template to remove the ::type.
auto streamWaitValue(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) -> CUresult
The alpaka accelerator library.
ALPAKA_FN_HOST auto isComplete(TEvent const &event) -> bool
Tests if the given event has already been completed.
ALPAKA_FN_HOST auto getDev(T const &t)
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
static ALPAKA_FN_HOST auto isSupported(DevCpu const &) -> bool
static ALPAKA_FN_HOST auto isSupported([[maybe_unused]] DevCudaRt const &dev) -> bool
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueCudaRtNonBlocking &queue, test::EventHostManualTriggerCuda &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsBlocking< TDev > &, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto enqueue(QueueGenericThreadsNonBlocking< TDev > &queue, test::EventHostManualTriggerCpu< TDev > &event) -> void
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCpu< TDev > const &event) -> TDev
static ALPAKA_FN_HOST auto getDev(test::EventHostManualTriggerCuda const &event) -> DevCudaRt
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCpu< TDev > const &event) -> bool
static ALPAKA_FN_HOST auto isComplete(test::EventHostManualTriggerCuda const &event) -> bool