alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Traits.hpp
Go to the documentation of this file.
1/* Copyright 2022 Jan Stephan, Andrea Bocci, Tapish Narwal
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11namespace alpaka
12{
14 {
15 };
16
17 namespace memory_scope
18 {
20 {
21 };
22
23 //! Memory fences are observed by all threads in the same block.
25 {
26 };
27
28 //! Memory fences are observed by all threads in the same grid.
30 {
31 };
32
33 //! Memory fences are observed by all threads on the device.
35 {
36 };
37 } // namespace memory_scope
38
39 template<typename T>
40 concept MemoryScope = std::derived_from<T, memory_scope::MemoryScopeTag>;
41
42 //! The memory fence trait.
43 namespace trait
44 {
45 //! The mem_fence trait.
46 template<typename TMemFence, MemoryOrder TMemOrder, MemoryScope TMemScope, typename TSfinae = void>
47 struct MemFence;
48
49 template<typename TAcc>
51
52 template<typename TAcc>
54
55 template<typename TAcc>
57
58 } // namespace trait
59
60 //! Issues memory fence instructions.
61 //
62 // Issues a memory fence instruction for a given memory scope (\a memory_scope::Block or \a memory_scope::Device).
63 // This guarantees the following:
64 // * All \a LOAD instructions preceeding the fence will always occur before the LOAD instructions following the
65 // fence (\a LoadLoad coherence)
66 // * All \a STORE instructions preceeding the fence will always occur before the STORE instructions following the
67 // fence (\a LoadStore coherence). The pre-fence STORE results will be propagated to the other threads in the
68 // scope at an unknown point in time.
69 //
70 // Note that there are no further guarantees, especially with regard to \a LoadStore ordering. Users should not
71 // mistake this as a synchronization function between threads (please use syncBlockThreads() instead).
72 //
73 //! \tparam TMemFence The memory fence implementation type.
74 //! \tparam TMemScope The memory scope type.
75 //! \tparam TMemOrder The memory order type.
76 //! \param fence The memory fence implementation.
77 //! \param scope The memory scope.
79 template<typename TMemFence, MemoryOrder TMemOrder, MemoryScope TMemScope>
80 ALPAKA_FN_ACC auto mem_fence(TMemFence const& fence, TMemOrder order, TMemScope const& scope) -> void
81 {
83 if constexpr(std::is_same_v<TMemOrder, mem_order::Relaxed>)
84 {
85 // Relaxed ordering requires no fence.
86 // Relaxed memory fences make no sense at all anyway. It is an oxymoron. This should not be used.
87 // STL says it is a noop. https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence.html
88 // OpenMP does not provide a relaxed flush at all. https://www.openmp.org/spec-html/5.0/openmpsu96.html
89 // Sycl says it is a noop. https://github.khronos.org/SYCL_Reference/iface/barriers-and-fences.html
90 // When using relaxed with mem fences, nvcc generates PTX for a sequenitally consistent fence
91 // This may be a problem also with HIP, so we explicitly skip it for all backends
92 }
93 else
94 {
96 }
97 }
98
100 template<typename TMemFence, MemoryScope TMemScope>
101 ALPAKA_FN_ACC auto mem_fence(TMemFence const& fence, TMemScope const& scope) -> void
102 {
104 mem_fence(fence, trait::MemFenceDefaultOrder_v<ImplementationBase>, scope);
105 }
106
107} // namespace alpaka
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:41
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition Common.hpp:85
typename detail::ImplementationBaseType< TInterface, TDerived >::type ImplementationBase
Returns the type that implements the given interface in the inheritance hierarchy.
Definition Interface.hpp:66
typename MemFenceDefaultOrder< TAcc >::type MemFenceDefaultOrder_t
Definition Traits.hpp:53
constexpr auto MemFenceDefaultOrder_v
Definition Traits.hpp:56
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto mem_fence(TMemFence const &fence, TMemOrder order, TMemScope const &scope) -> void
Issues memory fence instructions.
Definition Traits.hpp:80
Memory fences are observed by all threads in the same block.
Definition Traits.hpp:25
Memory fences are observed by all threads on the device.
Definition Traits.hpp:35
Memory fences are observed by all threads in the same grid.
Definition Traits.hpp:30
The mem_fence trait.
Definition Traits.hpp:47