alpaka
Abstraction Library for Parallel Kernel Acceleration
RandUniformCudaHipRand.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Benjamin Worpitz, RenĂ© Widera, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
9 #include "alpaka/core/Cuda.hpp"
10 #include "alpaka/core/Hip.hpp"
12 #include "alpaka/rand/Traits.hpp"
13 
14 #include <type_traits>
15 
16 #if(defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)) && !defined(ALPAKA_DISABLE_VENDOR_RNG)
17 
18 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
19 # include <curand_kernel.h>
20 # elif defined(ALPAKA_ACC_GPU_HIP_ENABLED)
21 # if BOOST_COMP_CLANG
22 # pragma clang diagnostic push
23 # pragma clang diagnostic ignored "-Wduplicate-decl-specifier"
24 # endif
25 
26 # if HIP_VERSION >= 50'200'000
27 # include <hiprand/hiprand_kernel.h>
28 # else
29 # include <hiprand_kernel.h>
30 # endif
31 
32 # if BOOST_COMP_CLANG
33 # pragma clang diagnostic pop
34 # endif
35 # endif
36 
37 namespace alpaka::rand
38 {
39  //! The CUDA/HIP rand implementation.
40  template<typename TApi>
41  class RandUniformCudaHipRand : public concepts::Implements<ConceptRand, RandUniformCudaHipRand<TApi>>
42  {
43  };
44 
45 # if !defined(ALPAKA_HOST_ONLY)
46 
47 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
48 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
49 # endif
50 
51 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
52 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
53 # endif
54 
55  namespace distribution::uniform_cuda_hip
56  {
57  //! The CUDA/HIP random number floating point normal distribution.
58  template<typename T>
59  class NormalReal;
60 
61  //! The CUDA/HIP random number floating point uniform distribution.
62  template<typename T>
63  class UniformReal;
64 
65  //! The CUDA/HIP random number integer uniform distribution.
66  template<typename T>
67  class UniformUint;
68  } // namespace distribution::uniform_cuda_hip
69 
70  namespace engine::uniform_cuda_hip
71  {
72  //! The CUDA/HIP Xor random number generator engine.
73  class Xor
74  {
75  public:
76  // After calling this constructor the instance is not valid initialized and
77  // need to be overwritten with a valid object
78  Xor() = default;
79 
80  __device__ Xor(
81  std::uint32_t const& seed,
82  std::uint32_t const& subsequence = 0,
83  std::uint32_t const& offset = 0)
84  {
85 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
86  curand_init(seed, subsequence, offset, &state);
87 # else
88  hiprand_init(seed, subsequence, offset, &state);
89 # endif
90  }
91 
92  private:
93  template<typename T>
95  template<typename T>
97  template<typename T>
99 
100 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
101  curandStateXORWOW_t state = curandStateXORWOW_t{};
102 # else
103  hiprandStateXORWOW_t state = hiprandStateXORWOW_t{};
104 # endif
105 
106  public:
107  // STL UniformRandomBitGenerator concept. This is not strictly necessary as the distributions
108  // contained in this file are aware of the API specifics of the CUDA/HIP XORWOW engine and STL
109  // distributions might not work on the device, but it servers a compatibility bridge to other
110  // potentially compatible alpaka distributions.
111 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
112  using result_type = decltype(curand(&state));
113 # else
114  using result_type = decltype(hiprand(&state));
115 # endif
116  ALPAKA_FN_HOST_ACC static constexpr result_type min()
117  {
119  }
120 
121  ALPAKA_FN_HOST_ACC static constexpr result_type max()
122  {
124  }
125 
126  __device__ result_type operator()()
127  {
128 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
129  return curand(&state);
130 # else
131  return hiprand(&state);
132 # endif
133  }
134  };
135  } // namespace engine::uniform_cuda_hip
136 
137  namespace distribution::uniform_cuda_hip
138  {
139  //! The CUDA/HIP random number float normal distribution.
140  template<>
141  class NormalReal<float>
142  {
143  public:
144  template<typename TEngine>
145  __device__ auto operator()(TEngine& engine) -> float
146  {
147 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
148  return curand_normal(&engine.state);
149 # else
150  return hiprand_normal(&engine.state);
151 # endif
152  }
153  };
154 
155  //! The CUDA/HIP random number float normal distribution.
156  template<>
157  class NormalReal<double>
158  {
159  public:
160  template<typename TEngine>
161  __device__ auto operator()(TEngine& engine) -> double
162  {
163 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
164  return curand_normal_double(&engine.state);
165 # else
166  return hiprand_normal_double(&engine.state);
167 # endif
168  }
169  };
170 
171  //! The CUDA/HIP random number float uniform distribution.
172  template<>
173  class UniformReal<float>
174  {
175  public:
176  template<typename TEngine>
177  __device__ auto operator()(TEngine& engine) -> float
178  {
179  // (0.f, 1.0f]
180 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
181  float const fUniformRand(curand_uniform(&engine.state));
182 # else
183  float const fUniformRand(hiprand_uniform(&engine.state));
184 # endif
185  // NOTE: (1.0f - curand_uniform) does not work, because curand_uniform seems to return
186  // denormalized floats around 0.f. [0.f, 1.0f)
187  return fUniformRand * static_cast<float>(fUniformRand != 1.0f);
188  }
189  };
190 
191  //! The CUDA/HIP random number float uniform distribution.
192  template<>
193  class UniformReal<double>
194  {
195  public:
196  template<typename TEngine>
197  __device__ auto operator()(TEngine& engine) -> double
198  {
199  // (0.f, 1.0f]
200 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
201  double const fUniformRand(curand_uniform_double(&engine.state));
202 # else
203  double const fUniformRand(hiprand_uniform_double(&engine.state));
204 # endif
205  // NOTE: (1.0f - curand_uniform_double) does not work, because curand_uniform_double seems to
206  // return denormalized floats around 0.f. [0.f, 1.0f)
207  return fUniformRand * static_cast<double>(fUniformRand != 1.0);
208  }
209  };
210 
211  //! The CUDA/HIP random number unsigned integer uniform distribution.
212  template<>
213  class UniformUint<unsigned int>
214  {
215  public:
216  template<typename TEngine>
217  __device__ auto operator()(TEngine& engine) -> unsigned int
218  {
219 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
220  return curand(&engine.state);
221 # else
222  return hiprand(&engine.state);
223 # endif
224  }
225  };
226  } // namespace distribution::uniform_cuda_hip
227 
228  namespace distribution::trait
229  {
230  //! The CUDA/HIP random number float normal distribution get trait specialization.
231  template<typename TApi, typename T>
232  struct CreateNormalReal<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_floating_point_v<T>>>
233  {
234  static __device__ auto createNormalReal(RandUniformCudaHipRand<TApi> const& /*rand*/)
236  {
237  return {};
238  }
239  };
240 
241  //! The CUDA/HIP random number float uniform distribution get trait specialization.
242  template<typename TApi, typename T>
243  struct CreateUniformReal<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_floating_point_v<T>>>
244  {
245  static __device__ auto createUniformReal(RandUniformCudaHipRand<TApi> const& /*rand*/)
247  {
248  return {};
249  }
250  };
251 
252  //! The CUDA/HIP random number integer uniform distribution get trait specialization.
253  template<typename TApi, typename T>
254  struct CreateUniformUint<RandUniformCudaHipRand<TApi>, T, std::enable_if_t<std::is_integral_v<T>>>
255  {
256  static __device__ auto createUniformUint(RandUniformCudaHipRand<TApi> const& /*rand*/)
258  {
259  return {};
260  }
261  };
262  } // namespace distribution::trait
263 
264  namespace engine::trait
265  {
266  //! The CUDA/HIP random number default generator get trait specialization.
267  template<typename TApi>
269  {
270  static __device__ auto createDefault(
271  RandUniformCudaHipRand<TApi> const& /*rand*/,
272  std::uint32_t const& seed = 0,
273  std::uint32_t const& subsequence = 0,
274  std::uint32_t const& offset = 0) -> uniform_cuda_hip::Xor
275  {
276  return {seed, subsequence, offset};
277  }
278  };
279  } // namespace engine::trait
280 # endif
281 } // namespace alpaka::rand
282 
283 #endif
The CUDA/HIP rand implementation.
The CUDA/HIP random number floating point normal distribution.
The CUDA/HIP random number floating point uniform distribution.
The CUDA/HIP random number integer uniform distribution.
The CUDA/HIP Xor random number generator engine.
static constexpr ALPAKA_FN_HOST_ACC result_type max()
static constexpr ALPAKA_FN_HOST_ACC result_type min()
__device__ Xor(std::uint32_t const &seed, std::uint32_t const &subsequence=0, std::uint32_t const &offset=0)
#define ALPAKA_FN_HOST_ACC
Definition: Common.hpp:39
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
constexpr auto offset
Definition: Extent.hpp:34
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
static __device__ auto createNormalReal(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::NormalReal< T >
The random number float normal distribution get trait.
Definition: Traits.hpp:27
static __device__ auto createUniformReal(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::UniformReal< T >
The random number float uniform distribution get trait.
Definition: Traits.hpp:31
static __device__ auto createUniformUint(RandUniformCudaHipRand< TApi > const &) -> uniform_cuda_hip::UniformUint< T >
The random number integer uniform distribution get trait.
Definition: Traits.hpp:35
static __device__ auto createDefault(RandUniformCudaHipRand< TApi > const &, std::uint32_t const &seed=0, std::uint32_t const &subsequence=0, std::uint32_t const &offset=0) -> uniform_cuda_hip::Xor
The random number default generator engine get trait.
Definition: Traits.hpp:82