alpaka
Abstraction Library for Parallel Kernel Acceleration
BlockSyncBarrierOmp.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Axel Hübl, Benjamin Worpitz, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/core/Common.hpp"
9 
10 #include <cstdint>
11 
12 #ifdef _OPENMP
13 
14 namespace alpaka
15 {
16  //! The OpenMP barrier block synchronization.
17  class BlockSyncBarrierOmp : public concepts::Implements<ConceptBlockSync, BlockSyncBarrierOmp>
18  {
19  public:
20  std::uint8_t mutable m_generation = 0u;
21  int mutable m_result[2];
22  };
23 
24  namespace trait
25  {
26  template<>
28  {
29  ALPAKA_FN_HOST static auto syncBlockThreads(BlockSyncBarrierOmp const& /* blockSync */) -> void
30  {
31 // NOTE: This waits for all threads in all blocks.
32 // If multiple blocks are executed in parallel this is not optimal.
33 # pragma omp barrier
34  }
35  };
36 
37  namespace detail
38  {
39  template<typename TOp>
40  struct AtomicOp;
41 
42  template<>
44  {
45  void operator()(int& result, bool value)
46  {
47 # pragma omp atomic
48  result += static_cast<int>(value);
49  }
50  };
51 
52  template<>
54  {
55  void operator()(int& result, bool value)
56  {
57 # pragma omp atomic
58  result &= static_cast<int>(value);
59  }
60  };
61 
62  template<>
63  struct AtomicOp<BlockOr>
64  {
65  void operator()(int& result, bool value)
66  {
67 # pragma omp atomic
68  result |= static_cast<int>(value);
69  }
70  };
71  } // namespace detail
72 
73  template<typename TOp>
75  {
77 
78  ALPAKA_FN_ACC static auto syncBlockThreadsPredicate(BlockSyncBarrierOmp const& blockSync, int predicate)
79  -> int
80  {
81 // The first thread initializes the value.
82 // There is an implicit barrier at the end of omp single.
83 // NOTE: This code is executed only once for all OpenMP threads.
84 // If multiple blocks with multiple threads are executed in parallel
85 // this reduction is executed only for one block!
86 # pragma omp single
87  {
88  ++blockSync.m_generation;
89  blockSync.m_result[blockSync.m_generation % 2u] = TOp::InitialValue;
90  }
91 
92  auto const generationMod2(blockSync.m_generation % 2u);
93  int& result(blockSync.m_result[generationMod2]);
94  bool const predicateBool(predicate != 0);
95 
96  detail::AtomicOp<TOp>()(result, predicateBool);
97 
98 // Wait for all threads to write their predicate into the vector.
99 // NOTE: This waits for all threads in all blocks.
100 // If multiple blocks are executed in parallel this is not optimal.
101 # pragma omp barrier
102 
103  return blockSync.m_result[generationMod2];
104  }
105  };
106  } // namespace trait
107 } // namespace alpaka
108 
109 #endif
The OpenMP barrier block synchronization.
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition: Common.hpp:38
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition: Common.hpp:82
The alpaka accelerator library.
The logical and function object.
Definition: Traits.hpp:60
The counting function object.
Definition: Traits.hpp:44
The logical or function object.
Definition: Traits.hpp:76
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_ACC auto syncBlockThreadsPredicate(BlockSyncBarrierOmp const &blockSync, int predicate) -> int
The block synchronization and predicate operation trait.
Definition: Traits.hpp:27
static ALPAKA_FN_HOST auto syncBlockThreads(BlockSyncBarrierOmp const &) -> void
The block synchronization operation trait.
Definition: Traits.hpp:23