alpaka
Abstraction Library for Parallel Kernel Acceleration
RandPhilox.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Jiří Vyskočil, Jan Stephan, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/core/Common.hpp"
11 #include "alpaka/rand/Traits.hpp"
12 
13 #include <cstdint>
14 #include <limits>
15 #include <random>
16 #include <type_traits>
17 
18 namespace alpaka::rand
19 {
20  /** Most common Philox engine variant, outputs single number
21  *
22  * This is a variant of the Philox engine generator which outputs a single float. The counter size is \f$4
23  * \times 32 = 128\f$ bits. Since the engine returns a single number, the generated result, which has the same
24  * size as the counter, has to be stored between invocations. Additionally a 32 bit pointer is stored. The
25  * total size of the state is 352 bits = 44 bytes.
26  *
27  * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3,"
28  * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and
29  * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405.
30  */
31  class Philox4x32x10 : public interface::Implements<ConceptRand, Philox4x32x10>
32  {
33  public:
34  /// Philox algorithm: 10 rounds, 4 numbers of size 32.
36  /// Engine outputs a single number
38 
39  /** Initialize a new Philox engine
40  *
41  * @param seed Set the Philox generator key
42  * @param subsequence Select a subsequence of size 2^64
43  * @param offset Skip \a offset numbers form the start of the subsequence
44  */
46  std::uint64_t const seed = 0,
47  std::uint64_t const subsequence = 0,
48  std::uint64_t const offset = 0)
49  : engineVariant(seed, subsequence, offset)
50  {
51  }
52 
53  // STL UniformRandomBitGenerator concept
54  // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator
55  using result_type = std::uint32_t;
56 
57  ALPAKA_FN_HOST_ACC constexpr auto min() -> result_type
58  {
59  return 0;
60  }
61 
62  ALPAKA_FN_HOST_ACC constexpr auto max() -> result_type
63  {
65  }
66 
68  {
69  return engineVariant();
70  }
71 
72  private:
73  EngineVariant engineVariant;
74  };
75 
76  /** Most common Philox engine variant, outputs a 4-vector of floats
77  *
78  * This is a variant of the Philox engine generator which outputs a vector containing 4 floats. The counter
79  * size is \f$4 \times 32 = 128\f$ bits. Since the engine returns the whole generated vector, it is up to the
80  * user to extract individual floats as they need. The benefit is smaller state size since the state does not
81  * contain the intermediate results. The total size of the state is 192 bits = 24 bytes.
82  *
83  * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3,"
84  * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and
85  * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405.
86  */
87  class Philox4x32x10Vector : public interface::Implements<ConceptRand, Philox4x32x10Vector>
88  {
89  public:
92 
93  /** Initialize a new Philox engine
94  *
95  * @param seed Set the Philox generator key
96  * @param subsequence Select a subsequence of size 2^64
97  * @param offset Number of numbers to skip form the start of the subsequence.
98  */
100  std::uint32_t const seed = 0,
101  std::uint32_t const subsequence = 0,
102  std::uint32_t const offset = 0)
103  : engineVariant(seed, subsequence, offset)
104  {
105  }
106 
107  template<typename TScalar>
108  using ResultContainer = typename EngineVariant::template ResultContainer<TScalar>;
109 
110  using ResultInt = std::uint32_t;
111  using ResultVec = decltype(std::declval<EngineVariant>()());
112 
113  ALPAKA_FN_HOST_ACC constexpr auto min() -> ResultInt
114  {
115  return 0;
116  }
117 
118  ALPAKA_FN_HOST_ACC constexpr auto max() -> ResultInt
119  {
121  }
122 
124  {
125  return engineVariant();
126  }
127 
128  private:
129  EngineVariant engineVariant;
130  };
131 
132  // The following exists because you "cannot call __device__ function from a __host__ __device__ function"
133  // directly, but wrapping that call in a struct is just fine.
134  template<typename TEngine>
136  {
137  ALPAKA_FN_HOST_ACC auto operator()(TEngine& engine) -> decltype(engine())
138  {
139  return engine();
140  }
141  };
142 
143  /// TEMP: Distributions to be decided on later. The generator should be compatible with STL as of now.
144  template<typename TResult, typename TSfinae = void>
145  class UniformReal : public interface::Implements<ConceptRand, UniformReal<TResult>>
146  {
147  template<typename TRes, typename TEnable = void>
148  struct ResultType
149  {
150  using type = TRes;
151  };
152 
153  template<typename TRes>
154  struct ResultType<TRes, std::enable_if_t<meta::IsArrayOrVector<TRes>::value>>
155  {
156  using type = typename TRes::value_type;
157  };
158 
159  using T = typename ResultType<TResult>::type;
160  static_assert(std::is_floating_point_v<T>, "Only floating-point types are supported");
161 
162  public:
164  {
165  }
166 
167  ALPAKA_FN_HOST_ACC UniformReal(T min, T max) : _min(min), _max(max), _range(_max - _min)
168  {
169  }
170 
171  template<typename TEngine>
172  ALPAKA_FN_HOST_ACC auto operator()(TEngine& engine) -> TResult
173  {
175  {
176  auto result = engine();
177  T scale = static_cast<T>(1) / static_cast<T>(engine.max()) * _range;
178  TResult ret{
179  static_cast<T>(result[0]) * scale + _min,
180  static_cast<T>(result[1]) * scale + _min,
181  static_cast<T>(result[2]) * scale + _min,
182  static_cast<T>(result[3]) * scale + _min};
183  return ret;
184  }
185  else
186  {
187  // Since it's possible to get a host-only engine here, the call has to go through proxy
188  return static_cast<T>(EngineCallHostAccProxy<TEngine>{}(engine)) / static_cast<T>(engine.max())
189  * _range
190  + _min;
191  }
192 
193  ALPAKA_UNREACHABLE(TResult{});
194  }
195 
196  private:
197  T const _min;
198  T const _max;
199  T const _range;
200  };
201 } // namespace alpaka::rand
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
Definition: Unreachable.hpp:24
constexpr ALPAKA_FN_HOST_ACC auto min() -> ResultInt
Definition: RandPhilox.hpp:113
typename EngineVariant::template ResultContainer< TScalar > ResultContainer
Definition: RandPhilox.hpp:108
ALPAKA_FN_HOST_ACC Philox4x32x10Vector(std::uint32_t const seed=0, std::uint32_t const subsequence=0, std::uint32_t const offset=0)
Definition: RandPhilox.hpp:99
constexpr ALPAKA_FN_HOST_ACC auto max() -> ResultInt
Definition: RandPhilox.hpp:118
decltype(std::declval< EngineVariant >()()) ResultVec
Definition: RandPhilox.hpp:111
engine::PhiloxVector< EngineParams > EngineVariant
Definition: RandPhilox.hpp:91
ALPAKA_FN_HOST_ACC auto operator()() -> ResultVec
Definition: RandPhilox.hpp:123
constexpr ALPAKA_FN_HOST_ACC auto max() -> result_type
Definition: RandPhilox.hpp:62
ALPAKA_FN_HOST_ACC Philox4x32x10(std::uint64_t const seed=0, std::uint64_t const subsequence=0, std::uint64_t const offset=0)
Definition: RandPhilox.hpp:45
ALPAKA_FN_HOST_ACC auto operator()() -> result_type
Definition: RandPhilox.hpp:67
constexpr ALPAKA_FN_HOST_ACC auto min() -> result_type
Definition: RandPhilox.hpp:57
engine::PhiloxSingle< EngineParams > EngineVariant
Engine outputs a single number.
Definition: RandPhilox.hpp:37
TEMP: Distributions to be decided on later. The generator should be compatible with STL as of now.
Definition: RandPhilox.hpp:146
ALPAKA_FN_HOST_ACC UniformReal()
Definition: RandPhilox.hpp:163
ALPAKA_FN_HOST_ACC UniformReal(T min, T max)
Definition: RandPhilox.hpp:167
ALPAKA_FN_HOST_ACC auto operator()(TEngine &engine) -> TResult
Definition: RandPhilox.hpp:172
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
constexpr auto offset
Definition: Extent.hpp:34
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
ALPAKA_FN_HOST_ACC auto operator()(TEngine &engine) -> decltype(engine())
Definition: RandPhilox.hpp:137