alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BlockSyncUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, Matthias Werner, Andrea Bocci, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12
13namespace alpaka
14{
15 //! The GPU CUDA/HIP block synchronization.
17 : public interface::Implements<ConceptBlockSync, BlockSyncUniformCudaHipBuiltIn>
18 {
19 };
20
21# if !defined(ALPAKA_HOST_ONLY)
22
23# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
24# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
25# endif
26
27# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
28# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
29# endif
30
31 namespace trait
32 {
33 template<>
34 struct SyncBlockThreads<BlockSyncUniformCudaHipBuiltIn>
35 {
36 __device__ static auto syncBlockThreads(BlockSyncUniformCudaHipBuiltIn const& /*blockSync*/) -> void
37 {
38 __syncthreads();
39 }
40 };
41
42 template<>
43 struct SyncBlockThreadsPredicate<BlockCount, BlockSyncUniformCudaHipBuiltIn>
44 {
45 __device__ static auto syncBlockThreadsPredicate(
46 BlockSyncUniformCudaHipBuiltIn const& /*blockSync*/,
47 int predicate) -> int
48 {
49# if defined(__HIP_ARCH_HAS_SYNC_THREAD_EXT__) && __HIP_ARCH_HAS_SYNC_THREAD_EXT__ == 0 && BOOST_COMP_HIP
50 // workaround for unsupported syncthreads_* operation on AMD hardware without sync extension
51 __shared__ int tmp;
52 __syncthreads();
53 if(threadIdx.x == 0)
54 tmp = 0;
55 __syncthreads();
56 if(predicate)
57 ::atomicAdd(&tmp, 1);
58 __syncthreads();
59
60 return tmp;
61# else
62 return __syncthreads_count(predicate);
63# endif
64 }
65 };
66
67 template<>
68 struct SyncBlockThreadsPredicate<BlockAnd, BlockSyncUniformCudaHipBuiltIn>
69 {
70 __device__ static auto syncBlockThreadsPredicate(
71 BlockSyncUniformCudaHipBuiltIn const& /*blockSync*/,
72 int predicate) -> int
73 {
74# if defined(__HIP_ARCH_HAS_SYNC_THREAD_EXT__) && __HIP_ARCH_HAS_SYNC_THREAD_EXT__ == 0 && BOOST_COMP_HIP
75 // workaround for unsupported syncthreads_* operation on AMD hardware without sync extension
76 __shared__ int tmp;
77 __syncthreads();
78 if(threadIdx.x == 0)
79 tmp = 1;
80 __syncthreads();
81 if(!predicate)
82 ::atomicAnd(&tmp, 0);
83 __syncthreads();
84
85 return tmp;
86# else
87 return __syncthreads_and(predicate);
88# endif
89 }
90 };
91
92 template<>
93 struct SyncBlockThreadsPredicate<BlockOr, BlockSyncUniformCudaHipBuiltIn>
94 {
95 __device__ static auto syncBlockThreadsPredicate(
96 BlockSyncUniformCudaHipBuiltIn const& /*blockSync*/,
97 int predicate) -> int
98 {
99# if defined(__HIP_ARCH_HAS_SYNC_THREAD_EXT__) && __HIP_ARCH_HAS_SYNC_THREAD_EXT__ == 0 && BOOST_COMP_HIP
100 // workaround for unsupported syncthreads_* operation on AMD hardware without sync extension
101 __shared__ int tmp;
102 __syncthreads();
103 if(threadIdx.x == 0)
104 tmp = 0;
105 __syncthreads();
106 if(predicate)
107 ::atomicOr(&tmp, 1);
108 __syncthreads();
109
110 return tmp;
111# else
112 return __syncthreads_or(predicate);
113# endif
114 }
115 };
116 } // namespace trait
117
118# endif
119
120} // namespace alpaka
121
122#endif
The GPU CUDA/HIP block synchronization.
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreads(TBlockSync const &blockSync) -> void
Synchronizes all threads within the current block (independently for all blocks).
Definition Traits.hpp:36
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicAdd(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic add operation.
Definition Traits.hpp:114
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto syncBlockThreadsPredicate(TBlockSync const &blockSync, int predicate) -> int
Synchronizes all threads within the current block (independently for all blocks), evaluates the predi...
Definition Traits.hpp:100
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOr(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic or operation.
Definition Traits.hpp:258
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicAnd(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &hier=THierarchy()) -> T
Executes an atomic and operation.
Definition Traits.hpp:240
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15