alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
AtomicStdLibLock.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, Matthias Werner, René Widera, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9
10#include <array>
11#include <mutex>
12
13#ifdef ALPAKA_DISABLE_ATOMIC_ATOMICREF
14
15namespace alpaka
16{
17 //! The CPU threads accelerator atomic ops.
18 //
19 // Atomics can be used in the grids, blocks and threads hierarchy levels.
20 // Atomics are not guaranteed to be save between devices.
21 //
22 // \tparam THashTableSize size of the hash table to allow concurrency between
23 // atomics to different addresses
24 template<size_t THashTableSize>
25 class AtomicStdLibLock
26 {
27 public:
28 template<typename TAtomic, typename TOp, typename T, typename THierarchy, typename TSfinae>
29 friend struct trait::AtomicOp;
30
31 static constexpr auto nextPowerOf2(size_t const value, size_t const bit = 0u) -> size_t
32 {
33 return value <= (static_cast<size_t>(1u) << bit) ? (static_cast<size_t>(1u) << bit)
34 : nextPowerOf2(value, bit + 1u);
35 }
36
37 //! get a hash value of the pointer
38 //
39 // This is no perfect hash, there will be collisions if the size of pointer type
40 // is not a power of two.
41 template<typename TPtr>
42 static auto hash(TPtr const* const ptr) -> size_t
43 {
44 auto const ptrAddr = reinterpret_cast<size_t>(ptr);
45 // using power of two for the next division will increase the performance
46 constexpr size_t typeSizePowerOf2 = nextPowerOf2(sizeof(TPtr));
47 // division removes the stride between indices
48 return (ptrAddr / typeSizePowerOf2);
49 }
50
51 template<typename TPtr>
52 auto getMutex(TPtr const* const ptr) const -> std::mutex&
53 {
54 //! get the size of the hash table
55 //
56 // The size is at least 1 or THashTableSize rounded up to the next power of 2
57 constexpr size_t hashTableSize = THashTableSize == 0u ? 1u : nextPowerOf2(THashTableSize);
58
59 size_t const hashedAddr = hash(ptr) & (hashTableSize - 1u);
60# if BOOST_COMP_CLANG
61# pragma clang diagnostic push
62# pragma clang diagnostic ignored "-Wexit-time-destructors"
63# endif
64 static std::array<
65 std::mutex,
66 hashTableSize>
67 m_mtxAtomic; //!< The mutex protecting access for an atomic operation.
68# if BOOST_COMP_CLANG
69# pragma clang diagnostic pop
70# endif
71 return m_mtxAtomic[hashedAddr];
72 }
73 };
74
75 namespace trait
76 {
77 //! The CPU threads accelerator atomic operation.
78 template<typename TOp, typename T, typename THierarchy, size_t THashTableSize>
79 struct AtomicOp<TOp, AtomicStdLibLock<THashTableSize>, T, THierarchy>
80 {
81 ALPAKA_FN_HOST static auto atomicOp(
82 AtomicStdLibLock<THashTableSize> const& atomic,
83 T* const addr,
84 T const& value) -> T
85 {
86 std::lock_guard<std::mutex> lock(atomic.getMutex(addr));
87 return TOp()(addr, value);
88 }
89
90 ALPAKA_FN_HOST static auto atomicOp(
91 AtomicStdLibLock<THashTableSize> const& atomic,
92 T* const addr,
93 T const& compare,
94 T const& value) -> T
95 {
96 std::lock_guard<std::mutex> lock(atomic.getMutex(addr));
97 return TOp()(addr, compare, value);
98 }
99 };
100 } // namespace trait
101} // namespace alpaka
102
103#endif
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atomicOp(TAtomic const &atomic, T *const addr, T const &value, THierarchy const &=THierarchy()) -> T
Executes the given operation atomically.
Definition Traits.hpp:73