alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
RandUniformCudaHipRand.hpp
Go to the documentation of this file.
1/* Copyright 2022 Benjamin Worpitz, René Widera, Andrea Bocci, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
9#include "alpaka/core/Hip.hpp"
13
14#include <type_traits>
15
16#if(defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)) && !defined(ALPAKA_DISABLE_VENDOR_RNG)
17
18# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
19# include <curand_kernel.h>
20# elif defined(ALPAKA_ACC_GPU_HIP_ENABLED)
21# if BOOST_COMP_CLANG
22# pragma clang diagnostic push
23# pragma clang diagnostic ignored "-Wduplicate-decl-specifier"
24# endif
25
26# if HIP_VERSION >= 50'200'000
27# include <hiprand/hiprand_kernel.h>
28# else
29# include <hiprand_kernel.h>
30# endif
31
32# if BOOST_COMP_CLANG
33# pragma clang diagnostic pop
34# endif
35# endif
36
37namespace alpaka::rand
38{
39 //! The CUDA/HIP rand implementation.
40 template<typename TApi>
41 class RandUniformCudaHipRand : public interface::Implements<ConceptRand, RandUniformCudaHipRand<TApi>>
42 {
43 };
44
45# if !defined(ALPAKA_HOST_ONLY)
46
47# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
48# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
49# endif
50
51# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
52# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
53# endif
54
55 namespace distribution::uniform_cuda_hip
56 {
57 //! The CUDA/HIP random number floating point normal distribution.
58 template<typename T>
59 class NormalReal;
60
61 //! The CUDA/HIP random number floating point uniform distribution.
62 template<typename T>
63 class UniformReal;
64
65 //! The CUDA/HIP random number integer uniform distribution.
66 template<typename T>
67 class UniformUint;
68 } // namespace distribution::uniform_cuda_hip
69
70 namespace engine::uniform_cuda_hip
71 {
72 //! The CUDA/HIP Xor random number generator engine.
73 class Xor
74 {
75 public:
76 // After calling this constructor the instance is not valid initialized and
77 // need to be overwritten with a valid object
78 Xor() = default;
79
80 __device__ Xor(
81 std::uint32_t const& seed,
82 std::uint32_t const& subsequence = 0,
83 std::uint32_t const& offset = 0)
84 {
85# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
86 curand_init(seed, subsequence, offset, &state);
87# else
88 hiprand_init(seed, subsequence, offset, &state);
89# endif
90 }
91
92 private:
93 template<typename T>
94 friend class distribution::uniform_cuda_hip::NormalReal;
95 template<typename T>
96 friend class distribution::uniform_cuda_hip::UniformReal;
97 template<typename T>
98 friend class distribution::uniform_cuda_hip::UniformUint;
99
100# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
101 curandStateXORWOW_t state = curandStateXORWOW_t{};
102# else
103 hiprandStateXORWOW_t state = hiprandStateXORWOW_t{};
104# endif
105
106 public:
107 // STL UniformRandomBitGenerator concept. This is not strictly necessary as the distributions
108 // contained in this file are aware of the API specifics of the CUDA/HIP XORWOW engine and STL
109 // distributions might not work on the device, but it servers a compatibility bridge to other
110 // potentially compatible alpaka distributions.
111# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
112 using result_type = decltype(curand(&state));
113# else
114 using result_type = decltype(hiprand(&state));
115# endif
116 ALPAKA_FN_HOST_ACC static constexpr result_type min()
117 {
118 return std::numeric_limits<result_type>::min();
119 }
120
121 ALPAKA_FN_HOST_ACC static constexpr result_type max()
122 {
123 return std::numeric_limits<result_type>::max();
124 }
125
126 __device__ result_type operator()()
127 {
128# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
129 return curand(&state);
130# else
131 return hiprand(&state);
132# endif
133 }
134 };
135 } // namespace engine::uniform_cuda_hip
136
137 namespace distribution::uniform_cuda_hip
138 {
139 //! The CUDA/HIP random number float normal distribution.
140 template<>
141 class NormalReal<float>
142 {
143 public:
144 template<typename TEngine>
145 __device__ auto operator()(TEngine& engine) -> float
146 {
147# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
148 return curand_normal(&engine.state);
149# else
150 return hiprand_normal(&engine.state);
151# endif
152 }
153 };
154
155 //! The CUDA/HIP random number float normal distribution.
156 template<>
157 class NormalReal<double>
158 {
159 public:
160 template<typename TEngine>
161 __device__ auto operator()(TEngine& engine) -> double
162 {
163# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
164 return curand_normal_double(&engine.state);
165# else
166 return hiprand_normal_double(&engine.state);
167# endif
168 }
169 };
170
171 //! The CUDA/HIP random number float uniform distribution.
172 template<>
173 class UniformReal<float>
174 {
175 public:
176 template<typename TEngine>
177 __device__ auto operator()(TEngine& engine) -> float
178 {
179 // (0.f, 1.0f]
180# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
181 float const fUniformRand(curand_uniform(&engine.state));
182# else
183 float const fUniformRand(hiprand_uniform(&engine.state));
184# endif
185 // NOTE: (1.0f - curand_uniform) does not work, because curand_uniform seems to return
186 // denormalized floats around 0.f. [0.f, 1.0f)
187 return fUniformRand * static_cast<float>(fUniformRand != 1.0f);
188 }
189 };
190
191 //! The CUDA/HIP random number float uniform distribution.
192 template<>
193 class UniformReal<double>
194 {
195 public:
196 template<typename TEngine>
197 __device__ auto operator()(TEngine& engine) -> double
198 {
199 // (0.f, 1.0f]
200# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
201 double const fUniformRand(curand_uniform_double(&engine.state));
202# else
203 double const fUniformRand(hiprand_uniform_double(&engine.state));
204# endif
205 // NOTE: (1.0f - curand_uniform_double) does not work, because curand_uniform_double seems to
206 // return denormalized floats around 0.f. [0.f, 1.0f)
207 return fUniformRand * static_cast<double>(fUniformRand != 1.0);
208 }
209 };
210
211 //! The CUDA/HIP random number unsigned integer uniform distribution.
212 template<>
213 class UniformUint<unsigned int>
214 {
215 public:
216 template<typename TEngine>
217 __device__ auto operator()(TEngine& engine) -> unsigned int
218 {
219# ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
220 return curand(&engine.state);
221# else
222 return hiprand(&engine.state);
223# endif
224 }
225 };
226 } // namespace distribution::uniform_cuda_hip
227
228 namespace distribution::trait
229 {
230 //! The CUDA/HIP random number float normal distribution get trait specialization.
231 template<typename TApi, typename T>
232 struct CreateNormalReal<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_floating_point_v<T>>>
233 {
234 static __device__ auto createNormalReal(RandUniformCudaHipRand<TApi> const& /*rand*/)
235 -> uniform_cuda_hip::NormalReal<T>
236 {
237 return {};
238 }
239 };
240
241 //! The CUDA/HIP random number float uniform distribution get trait specialization.
242 template<typename TApi, typename T>
243 struct CreateUniformReal<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_floating_point_v<T>>>
244 {
245 static __device__ auto createUniformReal(RandUniformCudaHipRand<TApi> const& /*rand*/)
246 -> uniform_cuda_hip::UniformReal<T>
247 {
248 return {};
249 }
250 };
251
252 //! The CUDA/HIP random number integer uniform distribution get trait specialization.
253 template<typename TApi, typename T>
254 struct CreateUniformUint<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_integral_v<T>>>
255 {
256 static __device__ auto createUniformUint(RandUniformCudaHipRand<TApi> const& /*rand*/)
257 -> uniform_cuda_hip::UniformUint<T>
258 {
259 return {};
260 }
261 };
262 } // namespace distribution::trait
263
264 namespace engine::trait
265 {
266 //! The CUDA/HIP random number default generator get trait specialization.
267 template<typename TApi>
269 {
270 static __device__ auto createDefault(
271 RandUniformCudaHipRand<TApi> const& /*rand*/,
272 std::uint32_t const& seed = 0,
273 std::uint32_t const& subsequence = 0,
274 std::uint32_t const& offset = 0) -> uniform_cuda_hip::Xor
275 {
276 return {seed, subsequence, offset};
277 }
278 };
279 } // namespace engine::trait
280# endif
281} // namespace alpaka::rand
282
283#endif
The CUDA/HIP rand implementation.
#define ALPAKA_FN_HOST_ACC
Definition Common.hpp:39
STL namespace.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15
static __device__ auto createNormalReal(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::NormalReal< T >
The random number float normal distribution get trait.
Definition Traits.hpp:27
static __device__ auto createUniformReal(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::UniformReal< T >
The random number float uniform distribution get trait.
Definition Traits.hpp:31
static __device__ auto createUniformUint(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::UniformUint< T >
The random number integer uniform distribution get trait.
Definition Traits.hpp:35
static __device__ auto createDefault(RandUniformCudaHipRand< TApi > const &, std::uint32_t const &seed=0, std::uint32_t const &subsequence=0, std::uint32_t const &offset=0) -> uniform_cuda_hip::Xor
The random number default generator engine get trait.
Definition Traits.hpp:82