alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BlockSyncBarrierOmp.hpp
Go to the documentation of this file.
1/* Copyright 2023 Axel Hübl, Benjamin Worpitz, Jan Stephan, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#include <cstdint>
11
12#ifdef _OPENMP
13
14namespace alpaka
15{
16 //! The OpenMP barrier block synchronization.
17 class BlockSyncBarrierOmp : public interface::Implements<ConceptBlockSync, BlockSyncBarrierOmp>
18 {
19 public:
20 std::uint8_t mutable m_generation = 0u;
21 int mutable m_result[2];
22 };
23
24 namespace trait
25 {
26 template<>
28 {
29 ALPAKA_FN_HOST static auto syncBlockThreads(BlockSyncBarrierOmp const& /* blockSync */) -> void
30 {
31// NOTE: This waits for all threads in all blocks.
32// If multiple blocks are executed in parallel this is not optimal.
33# pragma omp barrier
34 }
35 };
36
37 namespace detail
38 {
39 template<typename TOp>
40 struct AtomicOp;
41
42 template<>
44 {
45 void operator()(int& result, bool value)
46 {
47# pragma omp atomic
48 result += static_cast<int>(value);
49 }
50 };
51
52 template<>
54 {
55 void operator()(int& result, bool value)
56 {
57# pragma omp atomic
58 result &= static_cast<int>(value);
59 }
60 };
61
62 template<>
64 {
65 void operator()(int& result, bool value)
66 {
67# pragma omp atomic
68 result |= static_cast<int>(value);
69 }
70 };
71 } // namespace detail
72
73 template<typename TOp>
75 {
77
78 ALPAKA_FN_ACC static auto syncBlockThreadsPredicate(BlockSyncBarrierOmp const& blockSync, int predicate)
79 -> int
80 {
81// The first thread initializes the value.
82// There is an implicit barrier at the end of omp single.
83// NOTE: This code is executed only once for all OpenMP threads.
84// If multiple blocks with multiple threads are executed in parallel
85// this reduction is executed only for one block!
86# pragma omp single
87 {
88 ++blockSync.m_generation;
89 blockSync.m_result[blockSync.m_generation % 2u] = TOp::InitialValue;
90 }
91
92 auto const generationMod2(blockSync.m_generation % 2u);
93 int& result(blockSync.m_result[generationMod2]);
94 bool const predicateBool(predicate != 0);
95
96 detail::AtomicOp<TOp>()(result, predicateBool);
97
98// Wait for all threads to write their predicate into the vector.
99// NOTE: This waits for all threads in all blocks.
100// If multiple blocks are executed in parallel this is not optimal.
101# pragma omp barrier
102
103 return blockSync.m_result[generationMod2];
104 }
105 };
106 } // namespace trait
107} // namespace alpaka
108
109#endif
The OpenMP barrier block synchronization.
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition Common.hpp:82
The alpaka accelerator library.
The logical and function object.
Definition Traits.hpp:60
The counting function object.
Definition Traits.hpp:44
The logical or function object.
Definition Traits.hpp:76
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_ACC auto syncBlockThreadsPredicate(BlockSyncBarrierOmp const &blockSync, int predicate) -> int
The block synchronization and predicate operation trait.
Definition Traits.hpp:27
static ALPAKA_FN_HOST auto syncBlockThreads(BlockSyncBarrierOmp const &) -> void
The block synchronization operation trait.
Definition Traits.hpp:23