alpaka
Abstraction Library for Parallel Kernel Acceleration
MemFenceUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
10 
11 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12 
13 namespace alpaka
14 {
15  //! The GPU CUDA/HIP memory fence.
16  class MemFenceUniformCudaHipBuiltIn : public concepts::Implements<ConceptMemFence, MemFenceUniformCudaHipBuiltIn>
17  {
18  };
19 
20 # if !defined(ALPAKA_HOST_ONLY)
21 
22 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
23 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
24 # endif
25 
26 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
27 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
28 # endif
29 
30  namespace trait
31  {
32  template<>
33  struct MemFence<MemFenceUniformCudaHipBuiltIn, memory_scope::Block>
34  {
35  __device__ static auto mem_fence(MemFenceUniformCudaHipBuiltIn const&, memory_scope::Block const&)
36  {
37  __threadfence_block();
38  }
39  };
40 
41  template<>
42  struct MemFence<MemFenceUniformCudaHipBuiltIn, memory_scope::Grid>
43  {
44  __device__ static auto mem_fence(MemFenceUniformCudaHipBuiltIn const&, memory_scope::Grid const&)
45  {
46  // CUDA and HIP do not have a per-grid memory fence, so a device-level fence is used
47  __threadfence();
48  }
49  };
50 
51  template<>
52  struct MemFence<MemFenceUniformCudaHipBuiltIn, memory_scope::Device>
53  {
54  __device__ static auto mem_fence(MemFenceUniformCudaHipBuiltIn const&, memory_scope::Device const&)
55  {
56  __threadfence();
57  }
58  };
59  } // namespace trait
60 
61 # endif
62 
63 } // namespace alpaka
64 
65 #endif
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
Memory fences are observed by all threads in the same block.
Definition: Traits.hpp:20
Memory fences are observed by all threads on the device.
Definition: Traits.hpp:30
Memory fences are observed by all threads in the same grid.
Definition: Traits.hpp:25
static __device__ auto mem_fence(MemFenceUniformCudaHipBuiltIn const &, memory_scope::Block const &)
static __device__ auto mem_fence(MemFenceUniformCudaHipBuiltIn const &, memory_scope::Device const &)
static __device__ auto mem_fence(MemFenceUniformCudaHipBuiltIn const &, memory_scope::Grid const &)
The mem_fence trait.
Definition: Traits.hpp:39