alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
RandPhilox.hpp
Go to the documentation of this file.
1/* Copyright 2022 Jiří Vyskočil, Jan Stephan, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
12
13#include <cstdint>
14#include <limits>
15#include <random>
16#include <type_traits>
17
18namespace alpaka::rand
19{
20 /** Most common Philox engine variant, outputs single number
21 *
22 * This is a variant of the Philox engine generator which outputs a single float. The counter size is \f$4
23 * \times 32 = 128\f$ bits. Since the engine returns a single number, the generated result, which has the same
24 * size as the counter, has to be stored between invocations. Additionally a 32 bit pointer is stored. The
25 * total size of the state is 352 bits = 44 bytes.
26 *
27 * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3,"
28 * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and
29 * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405.
30 */
31 class Philox4x32x10 : public interface::Implements<ConceptRand, Philox4x32x10>
32 {
33 public:
34 /// Philox algorithm: 10 rounds, 4 numbers of size 32.
36 /// Engine outputs a single number
38
39 /** Initialize a new Philox engine
40 *
41 * @param seed Set the Philox generator key
42 * @param subsequence Select a subsequence of size 2^64
43 * @param offset Skip \a offset numbers form the start of the subsequence
44 */
46 std::uint64_t const seed = 0,
47 std::uint64_t const subsequence = 0,
48 std::uint64_t const offset = 0)
49 : engineVariant(seed, subsequence, offset)
50 {
51 }
52
53 // STL UniformRandomBitGenerator concept
54 // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator
55 using result_type = std::uint32_t;
56
58 {
59 return 0;
60 }
61
63 {
64 return std::numeric_limits<result_type>::max();
65 }
66
68 {
69 return engineVariant();
70 }
71
72 private:
73 EngineVariant engineVariant;
74 };
75
76 /** Most common Philox engine variant, outputs a 4-vector of floats
77 *
78 * This is a variant of the Philox engine generator which outputs a vector containing 4 floats. The counter
79 * size is \f$4 \times 32 = 128\f$ bits. Since the engine returns the whole generated vector, it is up to the
80 * user to extract individual floats as they need. The benefit is smaller state size since the state does not
81 * contain the intermediate results. The total size of the state is 192 bits = 24 bytes.
82 *
83 * Ref.: J. K. Salmon, M. A. Moraes, R. O. Dror and D. E. Shaw, "Parallel random numbers: As easy as 1, 2, 3,"
84 * SC '11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and
85 * Analysis, 2011, pp. 1-12, doi: 10.1145/2063384.2063405.
86 */
87 class Philox4x32x10Vector : public interface::Implements<ConceptRand, Philox4x32x10Vector>
88 {
89 public:
92
93 /** Initialize a new Philox engine
94 *
95 * @param seed Set the Philox generator key
96 * @param subsequence Select a subsequence of size 2^64
97 * @param offset Number of numbers to skip form the start of the subsequence.
98 */
100 std::uint32_t const seed = 0,
101 std::uint32_t const subsequence = 0,
102 std::uint32_t const offset = 0)
103 : engineVariant(seed, subsequence, offset)
104 {
105 }
106
107 template<typename TScalar>
108 using ResultContainer = typename EngineVariant::template ResultContainer<TScalar>;
109
110 using ResultInt = std::uint32_t;
111 using ResultVec = decltype(std::declval<EngineVariant>()());
112
113 ALPAKA_FN_HOST_ACC constexpr auto min() -> ResultInt
114 {
115 return 0;
116 }
117
118 ALPAKA_FN_HOST_ACC constexpr auto max() -> ResultInt
119 {
120 return std::numeric_limits<ResultInt>::max();
121 }
122
124 {
125 return engineVariant();
126 }
127
128 private:
129 EngineVariant engineVariant;
130 };
131
132 // The following exists because you "cannot call __device__ function from a __host__ __device__ function"
133 // directly, but wrapping that call in a struct is just fine.
134 template<typename TEngine>
136 {
137 ALPAKA_FN_HOST_ACC auto operator()(TEngine& engine) -> decltype(engine())
138 {
139 return engine();
140 }
141 };
142
143 /// TEMP: Distributions to be decided on later. The generator should be compatible with STL as of now.
144 template<typename TResult, typename TSfinae = void>
145 class UniformReal : public interface::Implements<ConceptRand, UniformReal<TResult>>
146 {
147 template<typename TRes, typename TEnable = void>
148 struct ResultType
149 {
150 using type = TRes;
151 };
152
153 template<typename TRes>
154 struct ResultType<TRes, std::enable_if_t<meta::IsArrayOrVector<TRes>::value>>
155 {
156 using type = typename TRes::value_type;
157 };
158
159 using T = typename ResultType<TResult>::type;
160 static_assert(std::is_floating_point_v<T>, "Only floating-point types are supported");
161
162 public:
166
167 ALPAKA_FN_HOST_ACC UniformReal(T min, T max) : _min(min), _max(max), _range(_max - _min)
168 {
169 }
170
171 template<typename TEngine>
172 ALPAKA_FN_HOST_ACC auto operator()(TEngine& engine) -> TResult
173 {
175 {
176 auto result = engine();
177 T scale = static_cast<T>(1) / static_cast<T>(engine.max()) * _range;
178 TResult ret{
179 static_cast<T>(result[0]) * scale + _min,
180 static_cast<T>(result[1]) * scale + _min,
181 static_cast<T>(result[2]) * scale + _min,
182 static_cast<T>(result[3]) * scale + _min};
183 return ret;
184 }
185 else
186 {
187 // Since it's possible to get a host-only engine here, the call has to go through proxy
188 return static_cast<T>(EngineCallHostAccProxy<TEngine>{}(engine)) / static_cast<T>(engine.max())
189 * _range
190 + _min;
191 }
192
193 ALPAKA_UNREACHABLE(TResult{});
194 }
195
196 private:
197 T const _min;
198 T const _max;
199 T const _range;
200 };
201} // namespace alpaka::rand
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
ALPAKA_FN_HOST_ACC constexpr auto max() -> ResultInt
typename EngineVariant::template ResultContainer< TScalar > ResultContainer
ALPAKA_FN_HOST_ACC constexpr auto min() -> ResultInt
ALPAKA_FN_HOST_ACC Philox4x32x10Vector(std::uint32_t const seed=0, std::uint32_t const subsequence=0, std::uint32_t const offset=0)
decltype(std::declval< EngineVariant >()()) ResultVec
engine::PhiloxVector< EngineParams > EngineVariant
ALPAKA_FN_HOST_ACC auto operator()() -> ResultVec
ALPAKA_FN_HOST_ACC constexpr auto max() -> result_type
ALPAKA_FN_HOST_ACC Philox4x32x10(std::uint64_t const seed=0, std::uint64_t const subsequence=0, std::uint64_t const offset=0)
ALPAKA_FN_HOST_ACC auto operator()() -> result_type
ALPAKA_FN_HOST_ACC constexpr auto min() -> result_type
engine::PhiloxSingle< EngineParams > EngineVariant
Engine outputs a single number.
TEMP: Distributions to be decided on later. The generator should be compatible with STL as of now.
ALPAKA_FN_HOST_ACC UniformReal()
ALPAKA_FN_HOST_ACC UniformReal(T min, T max)
ALPAKA_FN_HOST_ACC auto operator()(TEngine &engine) -> TResult
#define ALPAKA_FN_HOST_ACC
Definition Common.hpp:39
STL namespace.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
ALPAKA_FN_HOST_ACC auto operator()(TEngine &engine) -> decltype(engine())