alpaka
Abstraction Library for Parallel Kernel Acceleration
BarrierThread.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Matthias Werner, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 // Uncomment this to disable the standard spinlock behaviour of the threads
8 // #define ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
9 
11 #include "alpaka/core/Common.hpp"
12 
13 #include <condition_variable>
14 #include <mutex>
15 #ifndef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
16 # include <atomic>
17 # include <thread>
18 #endif
19 
20 namespace alpaka::core
21 {
22  namespace threads
23  {
24  //! A self-resetting barrier.
25  template<typename TIdx>
26  class BarrierThread final
27  {
28  public:
29  explicit BarrierThread(TIdx const& threadCount)
30  : m_threadCount(threadCount)
31  , m_curThreadCount(threadCount)
32  , m_generation(0)
33  {
34  }
35 
36  //! Waits for all the other threads to reach the barrier.
37  auto wait() -> void
38  {
39  TIdx const generationWhenEnteredTheWait = m_generation;
40 #ifdef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
41  std::unique_lock<std::mutex> lock(m_mtxBarrier);
42 #endif
43  if(--m_curThreadCount == 0)
44  {
45  m_curThreadCount = m_threadCount;
46  ++m_generation;
47 #ifdef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
48  m_cvAllThreadsReachedBarrier.notify_all();
49 #endif
50  }
51  else
52  {
53 #ifdef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
54  m_cvAllThreadsReachedBarrier.wait(
55  lock,
56  [this, generationWhenEnteredTheWait] { return generationWhenEnteredTheWait != m_generation; });
57 #else
58  while(generationWhenEnteredTheWait == m_generation)
59  {
60  std::this_thread::yield();
61  }
62 #endif
63  }
64  }
65 
66  private:
67 #ifdef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
68  std::mutex m_mtxBarrier;
69  std::condition_variable m_cvAllThreadsReachedBarrier;
70 #endif
71  const TIdx m_threadCount;
72 #ifdef ALPAKA_THREAD_BARRIER_DISABLE_SPINLOCK
73  TIdx m_curThreadCount;
74  TIdx m_generation;
75 #else
76  std::atomic<TIdx> m_curThreadCount;
77  std::atomic<TIdx> m_generation;
78 #endif
79  };
80 
81  namespace detail
82  {
83  template<typename TOp>
84  struct AtomicOp;
85 
86  template<>
88  {
89  void operator()(std::atomic<int>& result, bool value)
90  {
91  result += static_cast<int>(value);
92  }
93  };
94 
95  template<>
97  {
98  void operator()(std::atomic<int>& result, bool value)
99  {
100  result &= static_cast<int>(value);
101  }
102  };
103 
104  template<>
106  {
107  void operator()(std::atomic<int>& result, bool value)
108  {
109  result |= static_cast<int>(value);
110  }
111  };
112  } // namespace detail
113 
114  //! A self-resetting barrier with barrier.
115  template<typename TIdx>
117  {
118  public:
119  explicit BarrierThreadWithPredicate(TIdx const& threadCount)
120  : m_threadCount(threadCount)
121  , m_curThreadCount(threadCount)
122  , m_generation(0)
123  {
124  }
125 
126  //! Waits for all the other threads to reach the barrier.
127  template<typename TOp>
128  ALPAKA_FN_HOST auto wait(int predicate) -> int
129  {
130  TIdx const generationWhenEnteredTheWait = m_generation;
131  std::unique_lock<std::mutex> lock(m_mtxBarrier);
132 
133  auto const generationMod2 = m_generation % static_cast<TIdx>(2u);
134  if(m_curThreadCount == m_threadCount)
135  {
136  m_result[generationMod2] = TOp::InitialValue;
137  }
138 
139  std::atomic<int>& result(m_result[generationMod2]);
140  bool const predicateBool(predicate != 0);
141 
142  detail::AtomicOp<TOp>()(result, predicateBool);
143 
144  if(--m_curThreadCount == 0)
145  {
146  m_curThreadCount = m_threadCount;
147  ++m_generation;
148  m_cvAllThreadsReachedBarrier.notify_all();
149  }
150  else
151  {
152  m_cvAllThreadsReachedBarrier.wait(
153  lock,
154  [this, generationWhenEnteredTheWait] { return generationWhenEnteredTheWait != m_generation; });
155  }
156  return m_result[generationMod2];
157  }
158 
159  private:
160  std::mutex m_mtxBarrier;
161  std::condition_variable m_cvAllThreadsReachedBarrier;
162  const TIdx m_threadCount;
163  TIdx m_curThreadCount;
164  TIdx m_generation;
165  std::atomic<int> m_result[2];
166  };
167  } // namespace threads
168 } // namespace alpaka::core
A self-resetting barrier with barrier.
ALPAKA_FN_HOST auto wait(int predicate) -> int
Waits for all the other threads to reach the barrier.
BarrierThread(TIdx const &threadCount)
auto wait() -> void
Waits for all the other threads to reach the barrier.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The logical and function object.
Definition: Traits.hpp:60
The counting function object.
Definition: Traits.hpp:44
The logical or function object.
Definition: Traits.hpp:76
void operator()(std::atomic< int > &result, bool value)
void operator()(std::atomic< int > &result, bool value)
void operator()(std::atomic< int > &result, bool value)