alpaka
Abstraction Library for Parallel Kernel Acceleration
AtomicStdLibLock.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, Matthias Werner, RenĂ© Widera, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
9 
10 #include <array>
11 #include <mutex>
12 
13 #ifdef ALPAKA_DISABLE_ATOMIC_ATOMICREF
14 
15 namespace alpaka
16 {
17  //! The CPU threads accelerator atomic ops.
18  //
19  // Atomics can be used in the grids, blocks and threads hierarchy levels.
20  // Atomics are not guaranteed to be save between devices.
21  //
22  // \tparam THashTableSize size of the hash table to allow concurrency between
23  // atomics to different addresses
24  template<size_t THashTableSize>
25  class AtomicStdLibLock
26  {
27  public:
28  template<typename TAtomic, typename TOp, typename T, typename THierarchy, typename TSfinae>
29  friend struct trait::AtomicOp;
30 
31  static constexpr auto nextPowerOf2(size_t const value, size_t const bit = 0u) -> size_t
32  {
33  return value <= (static_cast<size_t>(1u) << bit) ? (static_cast<size_t>(1u) << bit)
34  : nextPowerOf2(value, bit + 1u);
35  }
36 
37  //! get a hash value of the pointer
38  //
39  // This is no perfect hash, there will be collisions if the size of pointer type
40  // is not a power of two.
41  template<typename TPtr>
42  static auto hash(TPtr const* const ptr) -> size_t
43  {
44  auto const ptrAddr = reinterpret_cast<size_t>(ptr);
45  // using power of two for the next division will increase the performance
46  constexpr size_t typeSizePowerOf2 = nextPowerOf2(sizeof(TPtr));
47  // division removes the stride between indices
48  return (ptrAddr / typeSizePowerOf2);
49  }
50 
51  template<typename TPtr>
52  auto getMutex(TPtr const* const ptr) const -> std::mutex&
53  {
54  //! get the size of the hash table
55  //
56  // The size is at least 1 or THashTableSize rounded up to the next power of 2
57  constexpr size_t hashTableSize = THashTableSize == 0u ? 1u : nextPowerOf2(THashTableSize);
58 
59  size_t const hashedAddr = hash(ptr) & (hashTableSize - 1u);
60 # if BOOST_COMP_CLANG
61 # pragma clang diagnostic push
62 # pragma clang diagnostic ignored "-Wexit-time-destructors"
63 # endif
64  static std::array<
65  std::mutex,
66  hashTableSize>
67  m_mtxAtomic; //!< The mutex protecting access for an atomic operation.
68 # if BOOST_COMP_CLANG
69 # pragma clang diagnostic pop
70 # endif
71  return m_mtxAtomic[hashedAddr];
72  }
73  };
74 
75  namespace trait
76  {
77  //! The CPU threads accelerator atomic operation.
78  template<typename TOp, typename T, typename THierarchy, size_t THashTableSize>
79  struct AtomicOp<TOp, AtomicStdLibLock<THashTableSize>, T, THierarchy>
80  {
81  ALPAKA_FN_HOST static auto atomicOp(
82  AtomicStdLibLock<THashTableSize> const& atomic,
83  T* const addr,
84  T const& value) -> T
85  {
86  std::lock_guard<std::mutex> lock(atomic.getMutex(addr));
87  return TOp()(addr, value);
88  }
89 
90  ALPAKA_FN_HOST static auto atomicOp(
91  AtomicStdLibLock<THashTableSize> const& atomic,
92  T* const addr,
93  T const& compare,
94  T const& value) -> T
95  {
96  std::lock_guard<std::mutex> lock(atomic.getMutex(addr));
97  return TOp()(addr, compare, value);
98  }
99  };
100  } // namespace trait
101 } // namespace alpaka
102 
103 #endif
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOp(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &=THierarchy()) -> T
Executes the given operation atomically.
Definition: Traits.hpp:73