alpaka
Abstraction Library for Parallel Kernel Acceleration
PhiloxSingle.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Jiri Vyskocil, Rene Widera, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
9 
10 #include <utility>
11 
12 namespace alpaka::rand::engine
13 {
14  /** Philox state for single value engine
15  *
16  * @tparam TCounter Type of the Counter array
17  * @tparam TKey Type of the Key array
18  */
19  template<typename TCounter, typename TKey>
21  {
22  using Counter = TCounter;
23  using Key = TKey;
24 
25  /// Counter array
27  /// Key array
29  /// Intermediate result array
31  /// Pointer to the active intermediate result element
32  std::uint32_t position;
33  // TODO: Box-Muller states
34  };
35 
36  /** Philox engine generating a single number
37  *
38  * This engine's operator() will return a single number. Since the result is the same size as the counter,
39  * and so it contains more than one number, it has to be stored between individual invocations of
40  * operator(). Additionally a pointer has to be stored indicating which part of the result array is to be
41  * returned next.
42  *
43  * @tparam TParams Basic parameters for the Philox algorithm
44  */
45  template<typename TParams>
46  class PhiloxSingle : public PhiloxBaseCommon<TParams, PhiloxSingle<TParams>>
47  {
48  public:
50 
51  /// Counter type
52  using Counter = typename Base::Counter;
53  /// Key type
54  using Key = typename Base::Key;
55  /// State type
57 
58  /// Internal engine state
60 
61  protected:
62  /** Advance internal counter to the next value
63  *
64  * Advances the full internal counter array, resets the position pointer and stores the intermediate
65  * result to be recalled when the user requests a number.
66  */
68  {
69  this->advanceCounter(state.counter);
70  state.result = this->nRounds(state.counter, state.key);
71  state.position = 0;
72  }
73 
74  /** Get the next random number and advance internal state
75  *
76  * The intermediate result stores N = TParams::counterSize numbers. Check if we've already given out
77  * all of them. If so, generate a new intermediate result (this also resets the pointer to the position
78  * of the actual number). Finally, we return the actual number.
79  *
80  * @return The next random number
81  */
83  {
84  // Element zero will always contain the next valid random number.
85  auto result = state.result[0];
86  state.position++;
87  if(state.position == TParams::counterSize)
88  {
89  advanceState();
90  }
91  else
92  {
93  // Shift state results to allow hard coded access to element zero.
94  // This will avoid high register usage on NVIDIA devices.
95  // \todo Check if this shifting of the result vector is decreasing CPU performance.
96  // If so this optimization for GPUs (mostly NVIDIA) should be moved into
97  // PhiloxBaseCudaArray.
98  state.result[0] = state.result[1];
99  state.result[1] = state.result[2];
100  state.result[2] = state.result[3];
101  }
102 
103  return result;
104  }
105 
106  /// Skips the next \a offset numbers
108  {
109  static_assert(TParams::counterSize == 4, "Only counterSize is supported.");
110  state.position = static_cast<decltype(state.position)>(state.position + (offset & 3));
111  offset += state.position < 4 ? 0 : 4;
112  state.position -= state.position < 4 ? 0 : 4u;
113  for(auto numShifts = state.position; numShifts > 0; --numShifts)
114  {
115  // Shift state results to allow hard coded access to element zero.
116  // This will avoid high register usage on NVIDIA devices.
117  state.result[0] = state.result[1];
118  state.result[1] = state.result[2];
119  state.result[2] = state.result[3];
120  }
121  this->skip4(offset / 4);
122  }
123 
124  public:
125  /** Construct a new Philox engine with single-value output
126  *
127  * @param seed Set the Philox generator key
128  * @param subsequence Select a subsequence of size 2^64
129  * @param offset Skip \a offset numbers form the start of the subsequence
130  */
131  ALPAKA_FN_HOST_ACC PhiloxSingle(uint64_t seed = 0, uint64_t subsequence = 0, uint64_t offset = 0)
132  : state{{0, 0, 0, 0}, {low32Bits(seed), high32Bits(seed)}, {0, 0, 0, 0}, 0}
133  {
134  this->skipSubsequence(subsequence);
135  skip(offset);
136  advanceState();
137  }
138 
139  /** Get the next random number
140  *
141  * @return The next random number
142  */
144  {
145  return nextNumber();
146  }
147  };
148 } // namespace alpaka::rand::engine
ALPAKA_FN_HOST_ACC void advanceState()
ALPAKA_FN_HOST_ACC auto nextNumber()
typename Base::Counter Counter
Counter type.
ALPAKA_FN_HOST_ACC auto operator()()
ALPAKA_FN_HOST_ACC PhiloxSingle(uint64_t seed=0, uint64_t subsequence=0, uint64_t offset=0)
State state
Internal engine state.
ALPAKA_FN_HOST_ACC void skip(uint64_t offset)
Skips the next offset numbers.
typename Base::Key Key
Key type.
static ALPAKA_FN_HOST_ACC auto nRounds(Counter const &counter_in, Key const &key_in) -> Counter
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
The random number generator engine specifics.
constexpr ALPAKA_FN_HOST_ACC auto high32Bits(std::uint64_t const x) -> std::uint32_t
Get high 32 bits of a 64-bit number.
constexpr ALPAKA_FN_HOST_ACC auto low32Bits(std::uint64_t const x) -> std::uint32_t
Get low 32 bits of a 64-bit number.
constexpr auto offset
Definition: Extent.hpp:34
Counter result
Intermediate result array.
std::uint32_t position
Pointer to the active intermediate result element.