15#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
24# if !defined(ALPAKA_HOST_ONLY)
25# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !ALPAKA_LANG_CUDA
26# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
29# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !ALPAKA_LANG_HIP
30# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
40 template<alpaka::MemoryOrder TMemOrder>
41 [[maybe_unused]]
static constexpr __device__
void cuda_ptx_fence_device([[maybe_unused]] TMemOrder order)
43# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(9, 0, 0)
45 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
48 else if constexpr(std::is_same_v<TMemOrder, mem_order::Acquire>)
50 asm volatile(
"fence.acquire.gpu;" ::);
52 else if constexpr(std::is_same_v<TMemOrder, mem_order::Release>)
54 asm volatile(
"fence.release.gpu;" ::);
56 else if constexpr(std::is_same_v<TMemOrder, mem_order::AcqRel>)
58 asm volatile(
"fence.acq_rel.gpu;" ::);
62 asm volatile(
"fence.sc.gpu;" ::);
64# elif ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
66 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
69 else if constexpr(std::is_same_v<TMemOrder, mem_order::Acquire>)
71 asm volatile(
"fence.acq_rel.gpu;" ::);
73 else if constexpr(std::is_same_v<TMemOrder, mem_order::Release>)
75 asm volatile(
"fence.acq_rel.gpu;" ::);
77 else if constexpr(std::is_same_v<TMemOrder, mem_order::AcqRel>)
79 asm volatile(
"fence.acq_rel.gpu;" ::);
84 asm volatile(
"fence.sc.gpu;" ::);
89 template<alpaka::MemoryOrder TMemOrder>
90 [[maybe_unused]]
static constexpr __device__
void cuda_ptx_fence_block([[maybe_unused]] TMemOrder order)
92# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(9, 0, 0)
94 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
97 else if constexpr(std::is_same_v<TMemOrder, mem_order::Acquire>)
99 asm volatile(
"fence.acquire.cta;" ::);
101 else if constexpr(std::is_same_v<TMemOrder, mem_order::Release>)
103 asm volatile(
"fence.release.cta;" ::);
105 else if constexpr(std::is_same_v<TMemOrder, mem_order::AcqRel>)
107 asm volatile(
"fence.acq_rel.cta;" ::);
111 asm volatile(
"fence.sc.cta;" ::);
113# elif ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
115 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
118 else if constexpr(std::is_same_v<TMemOrder, mem_order::Acquire>)
120 asm volatile(
"fence.acq_rel.cta;" ::);
122 else if constexpr(std::is_same_v<TMemOrder, mem_order::Release>)
124 asm volatile(
"fence.acq_rel.cta;" ::);
126 else if constexpr(std::is_same_v<TMemOrder, mem_order::AcqRel>)
128 asm volatile(
"fence.acq_rel.cta;" ::);
132 asm volatile(
"fence.sc.cta;" ::);
137 template<alpaka::MemoryOrder TMemOrder>
138 [[maybe_unused]]
static constexpr __device__
void cuda_mem_fence_block([[maybe_unused]] TMemOrder order)
140 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
144# ifdef ALPAKA_CUDA_ATOMIC
145 ::cuda::atomic_thread_fence(
MemOrderCuda::get(order), ::cuda::thread_scope_block);
148# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
149 cuda_ptx_fence_block(order);
151 __threadfence_block();
157 template<alpaka::MemoryOrder TMemOrder>
158 [[maybe_unused]]
static constexpr __device__
void cuda_mem_fence_device([[maybe_unused]] TMemOrder order)
160 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
164# ifdef ALPAKA_CUDA_ATOMIC
165 ::cuda::atomic_thread_fence(
MemOrderCuda::get(order), ::cuda::thread_scope_device);
168# if ALPAKA_ARCH_PTX >= ALPAKA_VERSION_NUMBER(7, 0, 0)
169 cuda_ptx_fence_device(order);
181 struct MemFenceDefaultOrder<MemFenceUniformCudaHipBuiltIn>
183 using type = mem_order::SeqCst;
187 template<MemoryOrder TMemOrder>
188 struct MemFence<MemFenceUniformCudaHipBuiltIn, TMemOrder, memory_scope::Block>
191 MemFenceUniformCudaHipBuiltIn
const&,
193 memory_scope::Block
const&)
195# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
196 alpaka::detail::cuda_mem_fence_block(order);
198 __builtin_amdgcn_fence(MemOrderHip::get(order),
"workgroup");
203 template<MemoryOrder TMemOrder,
typename TMemScope>
204 struct MemFence<MemFenceUniformCudaHipBuiltIn, TMemOrder, TMemScope>
206 static __device__
auto mem_fence(MemFenceUniformCudaHipBuiltIn
const&, TMemOrder order, TMemScope
const&)
210# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
211 alpaka::detail::cuda_mem_fence_device(order);
213 __builtin_amdgcn_fence(MemOrderHip::get(order),
"agent");
static constexpr SeqCst seq_cst
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto mem_fence(TMemFence const &fence, TMemOrder order, TMemScope const &scope) -> void
Issues memory fence instructions.
static constexpr auto get(TMemOrder)
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...