alpaka
Abstraction Library for Parallel Kernel Acceleration
MathUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Axel Huebl, Benjamin Worpitz, Matthias Werner, Bert Wesarg, Valentin Gehrke, RenĂ© Widera,
2  * Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Jeffrey Kelling, Sergei Bastrakov
3  * SPDX-License-Identifier: MPL-2.0
4  */
5 
6 #pragma once
7 
10 #include "alpaka/core/Decay.hpp"
14 #include "alpaka/math/Complex.hpp"
15 #include "alpaka/math/Traits.hpp"
16 
17 #include <type_traits>
18 
19 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
20 
21 namespace alpaka::math
22 {
23  //! The CUDA built in abs.
24  class AbsUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAbs, AbsUniformCudaHipBuiltIn>
25  {
26  };
27 
28  //! The CUDA built in acos.
29  class AcosUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAcos, AcosUniformCudaHipBuiltIn>
30  {
31  };
32 
33  //! The CUDA built in acosh.
34  class AcoshUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAcosh, AcoshUniformCudaHipBuiltIn>
35  {
36  };
37 
38  //! The CUDA built in arg.
39  class ArgUniformCudaHipBuiltIn : public interface::Implements<ConceptMathArg, ArgUniformCudaHipBuiltIn>
40  {
41  };
42 
43  //! The CUDA built in asin.
44  class AsinUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAsin, AsinUniformCudaHipBuiltIn>
45  {
46  };
47 
48  //! The CUDA built in asinh.
49  class AsinhUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAsinh, AsinhUniformCudaHipBuiltIn>
50  {
51  };
52 
53  //! The CUDA built in atan.
54  class AtanUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAtan, AtanUniformCudaHipBuiltIn>
55  {
56  };
57 
58  //! The CUDA built in atanh.
59  class AtanhUniformCudaHipBuiltIn : public interface::Implements<ConceptMathAtanh, AtanhUniformCudaHipBuiltIn>
60  {
61  };
62 
63  //! The CUDA built in atan2.
64  class Atan2UniformCudaHipBuiltIn : public interface::Implements<ConceptMathAtan2, Atan2UniformCudaHipBuiltIn>
65  {
66  };
67 
68  //! The CUDA built in cbrt.
69  class CbrtUniformCudaHipBuiltIn : public interface::Implements<ConceptMathCbrt, CbrtUniformCudaHipBuiltIn>
70  {
71  };
72 
73  //! The CUDA built in ceil.
74  class CeilUniformCudaHipBuiltIn : public interface::Implements<ConceptMathCeil, CeilUniformCudaHipBuiltIn>
75  {
76  };
77 
78  //! The CUDA built in conj.
79  class ConjUniformCudaHipBuiltIn : public interface::Implements<ConceptMathConj, ConjUniformCudaHipBuiltIn>
80  {
81  };
82 
83  //! The CUDA built in copysign.
85  : public interface::Implements<ConceptMathCopysign, CopysignUniformCudaHipBuiltIn>
86  {
87  };
88 
89  //! The CUDA built in cos.
90  class CosUniformCudaHipBuiltIn : public interface::Implements<ConceptMathCos, CosUniformCudaHipBuiltIn>
91  {
92  };
93 
94  //! The CUDA built in cosh.
95  class CoshUniformCudaHipBuiltIn : public interface::Implements<ConceptMathCosh, CoshUniformCudaHipBuiltIn>
96  {
97  };
98 
99  //! The CUDA built in erf.
100  class ErfUniformCudaHipBuiltIn : public interface::Implements<ConceptMathErf, ErfUniformCudaHipBuiltIn>
101  {
102  };
103 
104  //! The CUDA built in exp.
105  class ExpUniformCudaHipBuiltIn : public interface::Implements<ConceptMathExp, ExpUniformCudaHipBuiltIn>
106  {
107  };
108 
109  //! The CUDA built in floor.
110  class FloorUniformCudaHipBuiltIn : public interface::Implements<ConceptMathFloor, FloorUniformCudaHipBuiltIn>
111  {
112  };
113 
114  //! The CUDA built in fma.
115  class FmaUniformCudaHipBuiltIn : public interface::Implements<ConceptMathFma, FmaUniformCudaHipBuiltIn>
116  {
117  };
118 
119  //! The CUDA built in fmod.
120  class FmodUniformCudaHipBuiltIn : public interface::Implements<ConceptMathFmod, FmodUniformCudaHipBuiltIn>
121  {
122  };
123 
124  //! The CUDA built in isfinite.
126  : public interface::Implements<ConceptMathIsfinite, IsfiniteUniformCudaHipBuiltIn>
127  {
128  };
129 
130  //! The CUDA built in isinf.
131  class IsinfUniformCudaHipBuiltIn : public interface::Implements<ConceptMathIsinf, IsinfUniformCudaHipBuiltIn>
132  {
133  };
134 
135  //! The CUDA built in isnan.
136  class IsnanUniformCudaHipBuiltIn : public interface::Implements<ConceptMathIsnan, IsnanUniformCudaHipBuiltIn>
137  {
138  };
139 
140  // ! The CUDA built in log.
141  class LogUniformCudaHipBuiltIn : public interface::Implements<ConceptMathLog, LogUniformCudaHipBuiltIn>
142  {
143  };
144 
145  // ! The CUDA built in log2.
146  class Log2UniformCudaHipBuiltIn : public interface::Implements<ConceptMathLog2, Log2UniformCudaHipBuiltIn>
147  {
148  };
149 
150  // ! The CUDA built in log10.
151  class Log10UniformCudaHipBuiltIn : public interface::Implements<ConceptMathLog10, Log10UniformCudaHipBuiltIn>
152  {
153  };
154 
155  //! The CUDA built in max.
156  class MaxUniformCudaHipBuiltIn : public interface::Implements<ConceptMathMax, MaxUniformCudaHipBuiltIn>
157  {
158  };
159 
160  //! The CUDA built in min.
161  class MinUniformCudaHipBuiltIn : public interface::Implements<ConceptMathMin, MinUniformCudaHipBuiltIn>
162  {
163  };
164 
165  //! The CUDA built in pow.
166  class PowUniformCudaHipBuiltIn : public interface::Implements<ConceptMathPow, PowUniformCudaHipBuiltIn>
167  {
168  };
169 
170  //! The CUDA built in remainder.
172  : public interface::Implements<ConceptMathRemainder, RemainderUniformCudaHipBuiltIn>
173  {
174  };
175 
176  //! The CUDA round.
177  class RoundUniformCudaHipBuiltIn : public interface::Implements<ConceptMathRound, RoundUniformCudaHipBuiltIn>
178  {
179  };
180 
181  //! The CUDA rsqrt.
182  class RsqrtUniformCudaHipBuiltIn : public interface::Implements<ConceptMathRsqrt, RsqrtUniformCudaHipBuiltIn>
183  {
184  };
185 
186  //! The CUDA sin.
187  class SinUniformCudaHipBuiltIn : public interface::Implements<ConceptMathSin, SinUniformCudaHipBuiltIn>
188  {
189  };
190 
191  //! The CUDA sinh.
192  class SinhUniformCudaHipBuiltIn : public interface::Implements<ConceptMathSinh, SinhUniformCudaHipBuiltIn>
193  {
194  };
195 
196  //! The CUDA sincos.
197  class SinCosUniformCudaHipBuiltIn : public interface::Implements<ConceptMathSinCos, SinCosUniformCudaHipBuiltIn>
198  {
199  };
200 
201  //! The CUDA sqrt.
202  class SqrtUniformCudaHipBuiltIn : public interface::Implements<ConceptMathSqrt, SqrtUniformCudaHipBuiltIn>
203  {
204  };
205 
206  //! The CUDA tan.
207  class TanUniformCudaHipBuiltIn : public interface::Implements<ConceptMathTan, TanUniformCudaHipBuiltIn>
208  {
209  };
210 
211  //! The CUDA tanh.
212  class TanhUniformCudaHipBuiltIn : public interface::Implements<ConceptMathTanh, TanhUniformCudaHipBuiltIn>
213  {
214  };
215 
216  //! The CUDA trunc.
217  class TruncUniformCudaHipBuiltIn : public interface::Implements<ConceptMathTrunc, TruncUniformCudaHipBuiltIn>
218  {
219  };
220 
221  //! The standard library math trait specializations.
223  : public AbsUniformCudaHipBuiltIn
226  , public ArgUniformCudaHipBuiltIn
236  , public CosUniformCudaHipBuiltIn
238  , public ErfUniformCudaHipBuiltIn
239  , public ExpUniformCudaHipBuiltIn
241  , public FmaUniformCudaHipBuiltIn
243  , public LogUniformCudaHipBuiltIn
246  , public MaxUniformCudaHipBuiltIn
247  , public MinUniformCudaHipBuiltIn
248  , public PowUniformCudaHipBuiltIn
252  , public SinUniformCudaHipBuiltIn
256  , public TanUniformCudaHipBuiltIn
262  {
263  };
264 
265 # if !defined(ALPAKA_HOST_ONLY)
266 
267 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
268 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
269 # endif
270 
271 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
272 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
273 # endif
274 
275 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && defined(__CUDA_ARCH__)
276 # include <cuda_runtime.h>
277 # endif
278 
279 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && defined(__HIP_DEVICE_COMPILE__)
280 # include <hip/math_functions.h>
281 # endif
282 
283  namespace trait
284  {
285  //! The CUDA abs trait specialization for real types.
286  template<typename TArg>
287  struct Abs<AbsUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_signed_v<TArg>>>
288  {
289  __host__ __device__ auto operator()(AbsUniformCudaHipBuiltIn const& /* abs_ctx */, TArg const& arg)
290  {
291  if constexpr(is_decayed_v<TArg, float>)
292  return ::fabsf(arg);
293  else if constexpr(is_decayed_v<TArg, double>)
294  return ::fabs(arg);
295  else if constexpr(is_decayed_v<TArg, int>)
296  return ::abs(arg);
297  else if constexpr(is_decayed_v<TArg, long int>)
298  return ::labs(arg);
299  else if constexpr(is_decayed_v<TArg, long long int>)
300  return ::llabs(arg);
301  else
302  static_assert(!sizeof(TArg), "Unsupported data type");
303 
304  ALPAKA_UNREACHABLE(TArg{});
305  }
306  };
307 
308  //! The CUDA abs trait specialization for complex types.
309  template<typename T>
311  {
312  //! Take context as original (accelerator) type, since we call other math functions
313  template<typename TCtx>
314  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
315  {
316  return sqrt(ctx, arg.real() * arg.real() + arg.imag() * arg.imag());
317  }
318  };
319 
320  //! The CUDA acos trait specialization for real types.
321  template<typename TArg>
322  struct Acos<AcosUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
323  {
324  __host__ __device__ auto operator()(AcosUniformCudaHipBuiltIn const& /* acos_ctx */, TArg const& arg)
325  {
326  if constexpr(is_decayed_v<TArg, float>)
327  return ::acosf(arg);
328  else if constexpr(is_decayed_v<TArg, double>)
330  else
331  static_assert(!sizeof(TArg), "Unsupported data type");
332 
333  ALPAKA_UNREACHABLE(TArg{});
334  }
335  };
336 
337  //! The CUDA acos trait specialization for complex types.
338  template<typename T>
340  {
341  //! Take context as original (accelerator) type, since we call other math functions
342  template<typename TCtx>
343  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
344  {
345  // This holds everywhere, including the branch cuts: acos(z) = -i * ln(z + i * sqrt(1 - z^2))
346  return Complex<T>{0.0, -1.0} * log(ctx, arg + Complex<T>{0.0, 1.0} * sqrt(ctx, T(1.0) - arg * arg));
347  }
348  };
349 
350  //! The CUDA acosh trait specialization for real types.
351  template<typename TArg>
352  struct Acosh<AcoshUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
353  {
354  __host__ __device__ auto operator()(AcoshUniformCudaHipBuiltIn const& /* acosh_ctx */, TArg const& arg)
355  {
356  if constexpr(is_decayed_v<TArg, float>)
357  return ::acoshf(arg);
358  else if constexpr(is_decayed_v<TArg, double>)
360  else
361  static_assert(!sizeof(TArg), "Unsupported data type");
362 
363  ALPAKA_UNREACHABLE(TArg{});
364  }
365  };
366 
367  //! The CUDA acosh trait specialization for complex types.
368  template<typename T>
370  {
371  //! Take context as original (accelerator) type, since we call other math functions
372  template<typename TCtx>
373  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
374  {
375  // acos(z) = ln(z + sqrt(z-1) * sqrt(z+1))
376  return log(ctx, arg + sqrt(ctx, arg - static_cast<T>(1.0)) * sqrt(ctx, arg + static_cast<T>(1.0)));
377  }
378  };
379 
380  //! The CUDA arg trait specialization for real types.
381  template<typename TArgument>
382  struct Arg<ArgUniformCudaHipBuiltIn, TArgument, std::enable_if_t<std::is_floating_point_v<TArgument>>>
383  {
384  //! Take context as original (accelerator) type, since we call other math functions
385  template<typename TCtx>
386  __host__ __device__ auto operator()(TCtx const& ctx, TArgument const& argument)
387  {
388  // Fall back to atan2 so that boundary cases are resolved consistently
389  return atan2(ctx, TArgument{0.0}, argument);
390  }
391  };
392 
393  //! The CUDA arg Complex<T> specialization for complex types.
394  template<typename T>
396  {
397  //! Take context as original (accelerator) type, since we call other math functions
398  template<typename TCtx>
399  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& argument)
400  {
401  return atan2(ctx, argument.imag(), argument.real());
402  }
403  };
404 
405  //! The CUDA asin trait specialization for real types.
406  template<typename TArg>
407  struct Asin<AsinUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
408  {
409  __host__ __device__ auto operator()(AsinUniformCudaHipBuiltIn const& /* asin_ctx */, TArg const& arg)
410  {
411  if constexpr(is_decayed_v<TArg, float>)
412  return ::asinf(arg);
413  else if constexpr(is_decayed_v<TArg, double>)
415  else
416  static_assert(!sizeof(TArg), "Unsupported data type");
417 
418  ALPAKA_UNREACHABLE(TArg{});
419  }
420  };
421 
422  //! The CUDA asin trait specialization for complex types.
423  template<typename T>
425  {
426  //! Take context as original (accelerator) type, since we call other math functions
427  template<typename TCtx>
428  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
429  {
430  // This holds everywhere, including the branch cuts: asin(z) = i * ln(sqrt(1 - z^2) - i * z)
431  return Complex<T>{0.0, 1.0} * log(ctx, sqrt(ctx, T(1.0) - arg * arg) - Complex<T>{0.0, 1.0} * arg);
432  }
433  };
434 
435  //! The CUDA asinh trait specialization for real types.
436  template<typename TArg>
437  struct Asinh<AsinhUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
438  {
439  __host__ __device__ auto operator()(AsinhUniformCudaHipBuiltIn const& /* asinh_ctx */, TArg const& arg)
440  {
441  if constexpr(is_decayed_v<TArg, float>)
442  return ::asinhf(arg);
443  else if constexpr(is_decayed_v<TArg, double>)
445  else
446  static_assert(!sizeof(TArg), "Unsupported data type");
447 
448  ALPAKA_UNREACHABLE(TArg{});
449  }
450  };
451 
452  //! The CUDA asinh trait specialization for complex types.
453  template<typename T>
455  {
456  //! Take context as original (accelerator) type, since we call other math functions
457  template<typename TCtx>
458  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
459  {
460  // asinh(z) = ln(z + sqrt(z^2 + 1))
461  return log(ctx, arg + sqrt(ctx, arg * arg + static_cast<T>(1.0)));
462  }
463  };
464 
465  //! The CUDA atan trait specialization for real types.
466  template<typename TArg>
467  struct Atan<AtanUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
468  {
469  __host__ __device__ auto operator()(AtanUniformCudaHipBuiltIn const& /* atan_ctx */, TArg const& arg)
470  {
471  if constexpr(is_decayed_v<TArg, float>)
472  return ::atanf(arg);
473  else if constexpr(is_decayed_v<TArg, double>)
475  else
476  static_assert(!sizeof(TArg), "Unsupported data type");
477 
478  ALPAKA_UNREACHABLE(TArg{});
479  }
480  };
481 
482  //! The CUDA atan trait specialization for complex types.
483  template<typename T>
485  {
486  //! Take context as original (accelerator) type, since we call other math functions
487  template<typename TCtx>
488  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
489  {
490  // This holds everywhere, including the branch cuts: atan(z) = -i/2 * ln((i - z) / (i + z))
491  return Complex<T>{0.0, -0.5} * log(ctx, (Complex<T>{0.0, 1.0} - arg) / (Complex<T>{0.0, 1.0} + arg));
492  }
493  };
494 
495  //! The CUDA atanh trait specialization for real types.
496  template<typename TArg>
497  struct Atanh<AtanhUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
498  {
499  __host__ __device__ auto operator()(AtanhUniformCudaHipBuiltIn const& /* atanh_ctx */, TArg const& arg)
500  {
501  if constexpr(is_decayed_v<TArg, float>)
502  return ::atanhf(arg);
503  else if constexpr(is_decayed_v<TArg, double>)
505  else
506  static_assert(!sizeof(TArg), "Unsupported data type");
507 
508  ALPAKA_UNREACHABLE(TArg{});
509  }
510  };
511 
512  //! The CUDA atanh trait specialization for complex types.
513  template<typename T>
515  {
516  //! Take context as original (accelerator) type, since we call other math functions
517  template<typename TCtx>
518  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
519  {
520  // atanh(z) = 0.5 * (ln(1 + z) - ln(1 - z))
521  return static_cast<T>(0.5)
522  * (log(ctx, static_cast<T>(1.0) + arg) - log(ctx, static_cast<T>(1.0) - arg));
523  }
524  };
525 
526  //! The CUDA atan2 trait specialization.
527  template<typename Ty, typename Tx>
528  struct Atan2<
530  Ty,
531  Tx,
532  std::enable_if_t<std::is_floating_point_v<Ty> && std::is_floating_point_v<Tx>>>
533  {
534  __host__ __device__ auto operator()(
535  Atan2UniformCudaHipBuiltIn const& /* atan2_ctx */,
536  Ty const& y,
537  Tx const& x)
538  {
539  if constexpr(is_decayed_v<Ty, float> && is_decayed_v<Tx, float>)
540  return ::atan2f(y, x);
541  else if constexpr(is_decayed_v<Ty, double> || is_decayed_v<Tx, double>)
542  return ::atan2(y, x);
543  else
544  static_assert(!sizeof(Ty), "Unsupported data type");
545 
546  ALPAKA_UNREACHABLE(Ty{});
547  }
548  };
549 
550  //! The CUDA cbrt trait specialization.
551  template<typename TArg>
552  struct Cbrt<CbrtUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_arithmetic_v<TArg>>>
553  {
554  __host__ __device__ auto operator()(CbrtUniformCudaHipBuiltIn const& /* cbrt_ctx */, TArg const& arg)
555  {
556  if constexpr(is_decayed_v<TArg, float>)
557  return ::cbrtf(arg);
558  else if constexpr(is_decayed_v<TArg, double> || std::is_integral_v<TArg>)
560  else
561  static_assert(!sizeof(TArg), "Unsupported data type");
562 
563  ALPAKA_UNREACHABLE(TArg{});
564  }
565  };
566 
567  //! The CUDA ceil trait specialization.
568  template<typename TArg>
569  struct Ceil<CeilUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
570  {
571  __host__ __device__ auto operator()(CeilUniformCudaHipBuiltIn const& /* ceil_ctx */, TArg const& arg)
572  {
573  if constexpr(is_decayed_v<TArg, float>)
574  return ::ceilf(arg);
575  else if constexpr(is_decayed_v<TArg, double>)
577  else
578  static_assert(!sizeof(TArg), "Unsupported data type");
579 
580  ALPAKA_UNREACHABLE(TArg{});
581  }
582  };
583 
584  //! The CUDA conj trait specialization for real types.
585  template<typename TArg>
586  struct Conj<ConjUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
587  {
588  __host__ __device__ auto operator()(ConjUniformCudaHipBuiltIn const& /* conj_ctx */, TArg const& arg)
589  {
590  return Complex<TArg>{arg, TArg{0.0}};
591  }
592  };
593 
594  //! The CUDA conj specialization for complex types.
595  template<typename T>
597  {
598  __host__ __device__ auto operator()(ConjUniformCudaHipBuiltIn const& /* conj_ctx */, Complex<T> const& arg)
599  {
600  return Complex<T>{arg.real(), -arg.imag()};
601  }
602  };
603 
604  //! The CUDA copysign trait specialization for real types.
605  template<typename TMag, typename TSgn>
606  struct Copysign<
608  TMag,
609  TSgn,
610  std::enable_if_t<std::is_floating_point_v<TMag> && std::is_floating_point_v<TSgn>>>
611  {
612  __host__ __device__ auto operator()(
613  CopysignUniformCudaHipBuiltIn const& /* copysign_ctx */,
614  TMag const& mag,
615  TSgn const& sgn)
616  {
617  if constexpr(is_decayed_v<TMag, float> && is_decayed_v<TSgn, float>)
618  return ::copysignf(mag, sgn);
619  else if constexpr(is_decayed_v<TMag, double> || is_decayed_v<TSgn, double>)
620  return ::copysign(mag, sgn);
621  else
622  static_assert(!sizeof(TMag), "Unsupported data type");
623 
624  ALPAKA_UNREACHABLE(TMag{});
625  }
626  };
627 
628  //! The CUDA cos trait specialization for real types.
629  template<typename TArg>
630  struct Cos<CosUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
631  {
632  __host__ __device__ auto operator()(CosUniformCudaHipBuiltIn const& /* cos_ctx */, TArg const& arg)
633  {
634  if constexpr(is_decayed_v<TArg, float>)
635  return ::cosf(arg);
636  else if constexpr(is_decayed_v<TArg, double>)
637  return ::cos(arg);
638  else
639  static_assert(!sizeof(TArg), "Unsupported data type");
640 
641  ALPAKA_UNREACHABLE(TArg{});
642  }
643  };
644 
645  //! The CUDA cos trait specialization for complex types.
646  template<typename T>
648  {
649  //! Take context as original (accelerator) type, since we call other math functions
650  template<typename TCtx>
651  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
652  {
653  // cos(z) = 0.5 * (exp(i * z) + exp(-i * z))
654  return T(0.5) * (exp(ctx, Complex<T>{0.0, 1.0} * arg) + exp(ctx, Complex<T>{0.0, -1.0} * arg));
655  }
656  };
657 
658  //! The CUDA cosh trait specialization for real types.
659  template<typename TArg>
660  struct Cosh<CoshUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
661  {
662  __host__ __device__ auto operator()(CoshUniformCudaHipBuiltIn const& /* cos_ctx */, TArg const& arg)
663  {
664  if constexpr(is_decayed_v<TArg, float>)
665  return ::coshf(arg);
666  else if constexpr(is_decayed_v<TArg, double>)
668  else
669  static_assert(!sizeof(TArg), "Unsupported data type");
670 
671  ALPAKA_UNREACHABLE(TArg{});
672  }
673  };
674 
675  //! The CUDA cosh trait specialization for complex types.
676  template<typename T>
678  {
679  //! Take context as original (accelerator) type, since we call other math functions
680  template<typename TCtx>
681  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
682  {
683  // cosh(z) = 0.5 * (exp(z) + exp(-z))
684  return T(0.5) * (exp(ctx, arg) + exp(ctx, static_cast<T>(-1.0) * arg));
685  }
686  };
687 
688  //! The CUDA erf trait specialization.
689  template<typename TArg>
690  struct Erf<ErfUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
691  {
692  __host__ __device__ auto operator()(ErfUniformCudaHipBuiltIn const& /* erf_ctx */, TArg const& arg)
693  {
694  if constexpr(is_decayed_v<TArg, float>)
695  return ::erff(arg);
696  else if constexpr(is_decayed_v<TArg, double>)
697  return ::erf(arg);
698  else
699  static_assert(!sizeof(TArg), "Unsupported data type");
700 
701  ALPAKA_UNREACHABLE(TArg{});
702  }
703  };
704 
705  //! The CUDA exp trait specialization for real types.
706  template<typename TArg>
707  struct Exp<ExpUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
708  {
709  __host__ __device__ auto operator()(ExpUniformCudaHipBuiltIn const& /* exp_ctx */, TArg const& arg)
710  {
711  if constexpr(is_decayed_v<TArg, float>)
712  return ::expf(arg);
713  else if constexpr(is_decayed_v<TArg, double>)
714  return ::exp(arg);
715  else
716  static_assert(!sizeof(TArg), "Unsupported data type");
717 
718  ALPAKA_UNREACHABLE(TArg{});
719  }
720  };
721 
722  //! The CUDA exp trait specialization for complex types.
723  template<typename T>
725  {
726  //! Take context as original (accelerator) type, since we call other math functions
727  template<typename TCtx>
728  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
729  {
730  // exp(z) = exp(x + iy) = exp(x) * (cos(y) + i * sin(y))
731  auto re = T{}, im = T{};
732  sincos(ctx, arg.imag(), im, re);
733  return exp(ctx, arg.real()) * Complex<T>{re, im};
734  }
735  };
736 
737  //! The CUDA floor trait specialization.
738  template<typename TArg>
739  struct Floor<FloorUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
740  {
741  __host__ __device__ auto operator()(FloorUniformCudaHipBuiltIn const& /* floor_ctx */, TArg const& arg)
742  {
743  if constexpr(is_decayed_v<TArg, float>)
744  return ::floorf(arg);
745  else if constexpr(is_decayed_v<TArg, double>)
747  else
748  static_assert(!sizeof(TArg), "Unsupported data type");
749 
750  ALPAKA_UNREACHABLE(TArg{});
751  }
752  };
753 
754  //! The CUDA fma trait specialization.
755  template<typename Tx, typename Ty, typename Tz>
756  struct Fma<
758  Tx,
759  Ty,
760  Tz,
761  std::enable_if_t<
762  std::is_floating_point_v<Tx> && std::is_floating_point_v<Ty> && std::is_floating_point_v<Tz>>>
763  {
764  __host__ __device__ auto operator()(
765  FmaUniformCudaHipBuiltIn const& /* fma_ctx */,
766  Tx const& x,
767  Ty const& y,
768  Tz const& z)
769  {
770  if constexpr(is_decayed_v<Tx, float> && is_decayed_v<Ty, float> && is_decayed_v<Tz, float>)
771  return ::fmaf(x, y, z);
772  else if constexpr(is_decayed_v<Tx, double> || is_decayed_v<Ty, double> || is_decayed_v<Tz, double>)
773  return ::fma(x, y, z);
774  else
775  static_assert(!sizeof(Tx), "Unsupported data type");
776 
777  using Ret [[maybe_unused]] = std::conditional_t<
778  is_decayed_v<Tx, float> && is_decayed_v<Ty, float> && is_decayed_v<Tz, float>,
779  float,
780  double>;
781  ALPAKA_UNREACHABLE(Ret{});
782  }
783  };
784 
785  //! The CUDA fmod trait specialization.
786  template<typename Tx, typename Ty>
787  struct Fmod<
789  Tx,
790  Ty,
791  std::enable_if_t<std::is_floating_point_v<Tx> && std::is_floating_point_v<Ty>>>
792  {
793  __host__ __device__ auto operator()(
794  FmodUniformCudaHipBuiltIn const& /* fmod_ctx */,
795  Tx const& x,
796  Ty const& y)
797  {
798  if constexpr(is_decayed_v<Tx, float> && is_decayed_v<Ty, float>)
799  return ::fmodf(x, y);
800  else if constexpr(is_decayed_v<Tx, double> || is_decayed_v<Ty, double>)
801  return ::fmod(x, y);
802  else
803  static_assert(!sizeof(Tx), "Unsupported data type");
804 
805  using Ret [[maybe_unused]]
806  = std::conditional_t<is_decayed_v<Tx, float> && is_decayed_v<Ty, float>, float, double>;
807  ALPAKA_UNREACHABLE(Ret{});
808  }
809  };
810 
811  //! The CUDA isfinite trait specialization.
812  template<typename TArg>
813  struct Isfinite<IsfiniteUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
814  {
815  __host__ __device__ auto operator()(IsfiniteUniformCudaHipBuiltIn const& /* ctx */, TArg const& arg)
816  {
818  }
819  };
820 
821  //! The CUDA isinf trait specialization.
822  template<typename TArg>
823  struct Isinf<IsinfUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
824  {
825  __host__ __device__ auto operator()(IsinfUniformCudaHipBuiltIn const& /* ctx */, TArg const& arg)
826  {
828  }
829  };
830 
831  //! The CUDA isnan trait specialization.
832  template<typename TArg>
833  struct Isnan<IsnanUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
834  {
835  __host__ __device__ auto operator()(IsnanUniformCudaHipBuiltIn const& /* ctx */, TArg const& arg)
836  {
838  }
839  };
840 
841  //! The CUDA log trait specialization for real types.
842  template<typename TArg>
843  struct Log<LogUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
844  {
845  __host__ __device__ auto operator()(LogUniformCudaHipBuiltIn const& /* log_ctx */, TArg const& arg)
846  {
847  if constexpr(is_decayed_v<TArg, float>)
848  return ::logf(arg);
849  else if constexpr(is_decayed_v<TArg, double>)
850  return ::log(arg);
851  else
852  static_assert(!sizeof(TArg), "Unsupported data type");
853 
854  ALPAKA_UNREACHABLE(TArg{});
855  }
856  };
857 
858  //! The CUDA log trait specialization for complex types.
859  template<typename T>
861  {
862  //! Take context as original (accelerator) type, since we call other math functions
863  template<typename TCtx>
864  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& argument)
865  {
866  // Branch cut along the negative real axis (same as for std::complex),
867  // principal value of ln(z) = ln(|z|) + i * arg(z)
868  return log(ctx, abs(ctx, argument)) + Complex<T>{0.0, 1.0} * arg(ctx, argument);
869  }
870  };
871 
872  //! The CUDA log2 trait specialization for real types.
873  template<typename TArg>
874  struct Log2<Log2UniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
875  {
876  __host__ __device__ auto operator()(Log2UniformCudaHipBuiltIn const& /* log2_ctx */, TArg const& arg)
877  {
878  if constexpr(is_decayed_v<TArg, float>)
879  return ::log2f(arg);
880  else if constexpr(is_decayed_v<TArg, double>)
882  else
883  static_assert(!sizeof(TArg), "Unsupported data type");
884 
885  ALPAKA_UNREACHABLE(TArg{});
886  }
887  };
888 
889  //! The CUDA log10 trait specialization for real types.
890  template<typename TArg>
891  struct Log10<Log10UniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
892  {
893  __host__ __device__ auto operator()(Log10UniformCudaHipBuiltIn const& /* log10_ctx */, TArg const& arg)
894  {
895  if constexpr(is_decayed_v<TArg, float>)
896  return ::log10f(arg);
897  else if constexpr(is_decayed_v<TArg, double>)
899  else
900  static_assert(!sizeof(TArg), "Unsupported data type");
901 
902  ALPAKA_UNREACHABLE(TArg{});
903  }
904  };
905 
906  //! The CUDA log10 trait specialization for complex types.
907  template<typename T>
909  {
910  //! Take context as original (accelerator) type, since we call other math functions
911  template<typename TCtx>
912  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& argument)
913  {
914  return log(ctx, argument) / log(ctx, static_cast<T>(10));
915  }
916  };
917 
918  //! The CUDA max trait specialization.
919  template<typename Tx, typename Ty>
920  struct Max<
922  Tx,
923  Ty,
924  std::enable_if_t<std::is_arithmetic_v<Tx> && std::is_arithmetic_v<Ty>>>
925  {
926  __host__ __device__ auto operator()(
927  MaxUniformCudaHipBuiltIn const& /* max_ctx */,
928  Tx const& x,
929  Ty const& y)
930  {
931  if constexpr(std::is_integral_v<Tx> && std::is_integral_v<Ty>)
932  return ::max(x, y);
933  else if constexpr(is_decayed_v<Tx, float> && is_decayed_v<Ty, float>)
934  return ::fmaxf(x, y);
935  else if constexpr(
936  is_decayed_v<Tx, double> || is_decayed_v<Ty, double>
937  || (is_decayed_v<Tx, float> && std::is_integral_v<Ty>)
938  || (std::is_integral_v<Tx> && is_decayed_v<Ty, float>) )
939  return ::fmax(x, y);
940  else
941  static_assert(!sizeof(Tx), "Unsupported data type");
942 
943  using Ret [[maybe_unused]] = std::conditional_t<
944  std::is_integral_v<Tx> && std::is_integral_v<Ty>,
945  decltype(::max(x, y)),
946  std::conditional_t<is_decayed_v<Tx, float> && is_decayed_v<Ty, float>, float, double>>;
947  ALPAKA_UNREACHABLE(Ret{});
948  }
949  };
950 
951  //! The CUDA min trait specialization.
952  template<typename Tx, typename Ty>
953  struct Min<
955  Tx,
956  Ty,
957  std::enable_if_t<std::is_arithmetic_v<Tx> && std::is_arithmetic_v<Ty>>>
958  {
959  __host__ __device__ auto operator()(
960  MinUniformCudaHipBuiltIn const& /* min_ctx */,
961  Tx const& x,
962  Ty const& y)
963  {
964  if constexpr(std::is_integral_v<Tx> && std::is_integral_v<Ty>)
965  return ::min(x, y);
966  else if constexpr(is_decayed_v<Tx, float> && is_decayed_v<Ty, float>)
967  return ::fminf(x, y);
968  else if constexpr(
969  is_decayed_v<Tx, double> || is_decayed_v<Ty, double>
970  || (is_decayed_v<Tx, float> && std::is_integral_v<Ty>)
971  || (std::is_integral_v<Tx> && is_decayed_v<Ty, float>) )
972  return ::fmin(x, y);
973  else
974  static_assert(!sizeof(Tx), "Unsupported data type");
975 
976  using Ret [[maybe_unused]] = std::conditional_t<
977  std::is_integral_v<Tx> && std::is_integral_v<Ty>,
978  decltype(::min(x, y)),
979  std::conditional_t<is_decayed_v<Tx, float> && is_decayed_v<Ty, float>, float, double>>;
980  ALPAKA_UNREACHABLE(Ret{});
981  }
982  };
983 
984  //! The CUDA pow trait specialization for real types.
985  template<typename TBase, typename TExp>
986  struct Pow<
988  TBase,
989  TExp,
990  std::enable_if_t<std::is_floating_point_v<TBase> && std::is_floating_point_v<TExp>>>
991  {
992  __host__ __device__ auto operator()(
993  PowUniformCudaHipBuiltIn const& /* pow_ctx */,
994  TBase const& base,
995  TExp const& exp)
996  {
997  if constexpr(is_decayed_v<TBase, float> && is_decayed_v<TExp, float>)
998  return ::powf(base, exp);
999  else if constexpr(is_decayed_v<TBase, double> || is_decayed_v<TExp, double>)
1000  return ::pow(static_cast<double>(base), static_cast<double>(exp));
1001  else
1002  static_assert(!sizeof(TBase), "Unsupported data type");
1003 
1004  using Ret [[maybe_unused]]
1005  = std::conditional_t<is_decayed_v<TBase, float> && is_decayed_v<TExp, float>, float, double>;
1006  ALPAKA_UNREACHABLE(Ret{});
1007  }
1008  };
1009 
1010  //! The CUDA pow trait specialization for complex types.
1011  template<typename T, typename U>
1013  {
1014  //! Take context as original (accelerator) type, since we call other math functions
1015  template<typename TCtx>
1016  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& base, Complex<U> const& exponent)
1017  {
1018  // Type promotion matching rules of complex std::pow but simplified given our math only supports float
1019  // and double, no long double.
1020  using Promoted
1021  = Complex<std::conditional_t<is_decayed_v<T, float> && is_decayed_v<U, float>, float, double>>;
1022  // pow(z1, z2) = e^(z2 * log(z1))
1023  return exp(ctx, Promoted{exponent} * log(ctx, Promoted{base}));
1024  }
1025  };
1026 
1027  //! The CUDA pow trait specialization for complex and real types.
1028  template<typename T, typename U>
1030  {
1031  //! Take context as original (accelerator) type, since we call other math functions
1032  template<typename TCtx>
1033  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& base, U const& exponent)
1034  {
1035  return pow(ctx, base, Complex<U>{exponent});
1036  }
1037  };
1038 
1039  //! The CUDA pow trait specialization for real and complex types.
1040  template<typename T, typename U>
1042  {
1043  //! Take context as original (accelerator) type, since we call other math functions
1044  template<typename TCtx>
1045  __host__ __device__ auto operator()(TCtx const& ctx, T const& base, Complex<U> const& exponent)
1046  {
1047  return pow(ctx, Complex<T>{base}, exponent);
1048  }
1049  };
1050 
1051  //! The CUDA remainder trait specialization.
1052  template<typename Tx, typename Ty>
1053  struct Remainder<
1055  Tx,
1056  Ty,
1057  std::enable_if_t<std::is_floating_point_v<Tx> && std::is_floating_point_v<Ty>>>
1058  {
1059  __host__ __device__ auto operator()(
1060  RemainderUniformCudaHipBuiltIn const& /* remainder_ctx */,
1061  Tx const& x,
1062  Ty const& y)
1063  {
1064  if constexpr(is_decayed_v<Tx, float> && is_decayed_v<Ty, float>)
1065  return ::remainderf(x, y);
1066  else if constexpr(is_decayed_v<Tx, double> || is_decayed_v<Ty, double>)
1067  return ::remainder(x, y);
1068  else
1069  static_assert(!sizeof(Tx), "Unsupported data type");
1070 
1071  using Ret [[maybe_unused]]
1072  = std::conditional_t<is_decayed_v<Tx, float> && is_decayed_v<Ty, float>, float, double>;
1073  ALPAKA_UNREACHABLE(Ret{});
1074  }
1075  };
1076 
1077  //! The CUDA round trait specialization.
1078  template<typename TArg>
1079  struct Round<RoundUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1080  {
1081  __host__ __device__ auto operator()(RoundUniformCudaHipBuiltIn const& /* round_ctx */, TArg const& arg)
1082  {
1083  if constexpr(is_decayed_v<TArg, float>)
1084  return ::roundf(arg);
1085  else if constexpr(is_decayed_v<TArg, double>)
1087  else
1088  static_assert(!sizeof(TArg), "Unsupported data type");
1089 
1090  ALPAKA_UNREACHABLE(TArg{});
1091  }
1092  };
1093 
1094  //! The CUDA lround trait specialization.
1095  template<typename TArg>
1096  struct Lround<RoundUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1097  {
1098  __host__ __device__ auto operator()(RoundUniformCudaHipBuiltIn const& /* lround_ctx */, TArg const& arg)
1099  {
1100  if constexpr(is_decayed_v<TArg, float>)
1101  return ::lroundf(arg);
1102  else if constexpr(is_decayed_v<TArg, double>)
1104  else
1105  static_assert(!sizeof(TArg), "Unsupported data type");
1106 
1107  ALPAKA_UNREACHABLE(long{});
1108  }
1109  };
1110 
1111  //! The CUDA llround trait specialization.
1112  template<typename TArg>
1113  struct Llround<RoundUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1114  {
1115  __host__ __device__ auto operator()(RoundUniformCudaHipBuiltIn const& /* llround_ctx */, TArg const& arg)
1116  {
1117  if constexpr(is_decayed_v<TArg, float>)
1118  return ::llroundf(arg);
1119  else if constexpr(is_decayed_v<TArg, double>)
1121  else
1122  static_assert(!sizeof(TArg), "Unsupported data type");
1123 
1124  // NVCC versions before 11.3 are unable to compile 'long long{}': "type name is not allowed".
1125  using Ret [[maybe_unused]] = long long;
1126  ALPAKA_UNREACHABLE(Ret{});
1127  }
1128  };
1129 
1130  //! The CUDA rsqrt trait specialization for real types.
1131  template<typename TArg>
1132  struct Rsqrt<RsqrtUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_arithmetic_v<TArg>>>
1133  {
1134  __host__ __device__ auto operator()(RsqrtUniformCudaHipBuiltIn const& /* rsqrt_ctx */, TArg const& arg)
1135  {
1136  if constexpr(is_decayed_v<TArg, float>)
1137  return ::rsqrtf(arg);
1138  else if constexpr(is_decayed_v<TArg, double> || std::is_integral_v<TArg>)
1140  else
1141  static_assert(!sizeof(TArg), "Unsupported data type");
1142 
1143  ALPAKA_UNREACHABLE(TArg{});
1144  }
1145  };
1146 
1147  //! The CUDA rsqrt trait specialization for complex types.
1148  template<typename T>
1150  {
1151  //! Take context as original (accelerator) type, since we call other math functions
1152  template<typename TCtx>
1153  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
1154  {
1155  return T{1.0} / sqrt(ctx, arg);
1156  }
1157  };
1158 
1159  //! The CUDA sin trait specialization for real types.
1160  template<typename TArg>
1161  struct Sin<SinUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1162  {
1163  __host__ __device__ auto operator()(SinUniformCudaHipBuiltIn const& /* sin_ctx */, TArg const& arg)
1164  {
1165  if constexpr(is_decayed_v<TArg, float>)
1166  return ::sinf(arg);
1167  else if constexpr(is_decayed_v<TArg, double>)
1168  return ::sin(arg);
1169  else
1170  static_assert(!sizeof(TArg), "Unsupported data type");
1171 
1172  ALPAKA_UNREACHABLE(TArg{});
1173  }
1174  };
1175 
1176  //! The CUDA sin trait specialization for complex types.
1177  template<typename T>
1179  {
1180  //! Take context as original (accelerator) type, since we call other math functions
1181  template<typename TCtx>
1182  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
1183  {
1184  // sin(z) = (exp(i * z) - exp(-i * z)) / 2i
1185  return (exp(ctx, Complex<T>{0.0, 1.0} * arg) - exp(ctx, Complex<T>{0.0, -1.0} * arg))
1186  / Complex<T>{0.0, 2.0};
1187  }
1188  };
1189 
1190  //! The CUDA sinh trait specialization for real types.
1191  template<typename TArg>
1192  struct Sinh<SinhUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1193  {
1194  __host__ __device__ auto operator()(SinhUniformCudaHipBuiltIn const& /* sinh_ctx */, TArg const& arg)
1195  {
1196  if constexpr(is_decayed_v<TArg, float>)
1197  return ::sinhf(arg);
1198  else if constexpr(is_decayed_v<TArg, double>)
1199  return ::sinh(arg);
1200  else
1201  static_assert(!sizeof(TArg), "Unsupported data type");
1202 
1203  ALPAKA_UNREACHABLE(TArg{});
1204  }
1205  };
1206 
1207  //! The CUDA sinh trait specialization for complex types.
1208  template<typename T>
1210  {
1211  //! Take context as original (accelerator) type, since we call other math functions
1212  template<typename TCtx>
1213  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
1214  {
1215  // sinh(z) = (exp(z) - exp(-i * z)) / 2
1216  return (exp(ctx, arg) - exp(ctx, static_cast<T>(-1.0) * arg)) / static_cast<T>(2.0);
1217  }
1218  };
1219 
1220  //! The CUDA sincos trait specialization for real types.
1221  template<typename TArg>
1222  struct SinCos<SinCosUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1223  {
1224  __host__ __device__ auto operator()(
1225  SinCosUniformCudaHipBuiltIn const& /* sincos_ctx */,
1226  TArg const& arg,
1227  TArg& result_sin,
1228  TArg& result_cos) -> void
1229  {
1230  if constexpr(is_decayed_v<TArg, float>)
1231  ::sincosf(arg, &result_sin, &result_cos);
1232  else if constexpr(is_decayed_v<TArg, double>)
1233  ::sincos(arg, &result_sin, &result_cos);
1234  else
1235  static_assert(!sizeof(TArg), "Unsupported data type");
1236  }
1237  };
1238 
1239  //! The CUDA sincos trait specialization for complex types.
1240  template<typename T>
1242  {
1243  //! Take context as original (accelerator) type, since we call other math functions
1244  template<typename TCtx>
1245  __host__ __device__ auto operator()(
1246  TCtx const& ctx,
1247  Complex<T> const& arg,
1248  Complex<T>& result_sin,
1249  Complex<T>& result_cos) -> void
1250  {
1251  result_sin = sin(ctx, arg);
1252  result_cos = cos(ctx, arg);
1253  }
1254  };
1255 
1256  //! The CUDA sqrt trait specialization for real types.
1257  template<typename TArg>
1258  struct Sqrt<SqrtUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_arithmetic_v<TArg>>>
1259  {
1260  __host__ __device__ auto operator()(SqrtUniformCudaHipBuiltIn const& /* sqrt_ctx */, TArg const& arg)
1261  {
1262  if constexpr(is_decayed_v<TArg, float>)
1263  return ::sqrtf(arg);
1264  else if constexpr(is_decayed_v<TArg, double> || std::is_integral_v<TArg>)
1265  return ::sqrt(arg);
1266  else
1267  static_assert(!sizeof(TArg), "Unsupported data type");
1268 
1269  ALPAKA_UNREACHABLE(TArg{});
1270  }
1271  };
1272 
1273  //! The CUDA sqrt trait specialization for complex types.
1274  template<typename T>
1276  {
1277  //! Take context as original (accelerator) type, since we call other math functions
1278  template<typename TCtx>
1279  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& argument)
1280  {
1281  // Branch cut along the negative real axis (same as for std::complex),
1282  // principal value of sqrt(z) = sqrt(|z|) * e^(i * arg(z) / 2)
1283  auto const halfArg = T(0.5) * arg(ctx, argument);
1284  auto re = T{}, im = T{};
1285  sincos(ctx, halfArg, im, re);
1286  return sqrt(ctx, abs(ctx, argument)) * Complex<T>(re, im);
1287  }
1288  };
1289 
1290  //! The CUDA tan trait specialization for real types.
1291  template<typename TArg>
1292  struct Tan<TanUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1293  {
1294  __host__ __device__ auto operator()(TanUniformCudaHipBuiltIn const& /* tan_ctx */, TArg const& arg)
1295  {
1296  if constexpr(is_decayed_v<TArg, float>)
1297  return ::tanf(arg);
1298  else if constexpr(is_decayed_v<TArg, double>)
1299  return ::tan(arg);
1300  else
1301  static_assert(!sizeof(TArg), "Unsupported data type");
1302 
1303  ALPAKA_UNREACHABLE(TArg{});
1304  }
1305  };
1306 
1307  //! The CUDA tan trait specialization for complex types.
1308  template<typename T>
1310  {
1311  //! Take context as original (accelerator) type, since we call other math functions
1312  template<typename TCtx>
1313  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
1314  {
1315  // tan(z) = i * (e^-iz - e^iz) / (e^-iz + e^iz) = i * (1 - e^2iz) / (1 + e^2iz)
1316  // Warning: this straightforward implementation can easily result in NaN as 0/0 or inf/inf.
1317  auto const expValue = exp(ctx, Complex<T>{0.0, 2.0} * arg);
1318  return Complex<T>{0.0, 1.0} * (T{1.0} - expValue) / (T{1.0} + expValue);
1319  }
1320  };
1321 
1322  //! The CUDA tanh trait specialization for real types.
1323  template<typename TArg>
1324  struct Tanh<TanhUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1325  {
1326  __host__ __device__ auto operator()(TanhUniformCudaHipBuiltIn const& /* tanh_ctx */, TArg const& arg)
1327  {
1328  if constexpr(is_decayed_v<TArg, float>)
1329  return ::tanhf(arg);
1330  else if constexpr(is_decayed_v<TArg, double>)
1331  return ::tanh(arg);
1332  else
1333  static_assert(!sizeof(TArg), "Unsupported data type");
1334 
1335  ALPAKA_UNREACHABLE(TArg{});
1336  }
1337  };
1338 
1339  //! The CUDA tanh trait specialization for complex types.
1340  template<typename T>
1342  {
1343  //! Take context as original (accelerator) type, since we call other math functions
1344  template<typename TCtx>
1345  __host__ __device__ auto operator()(TCtx const& ctx, Complex<T> const& arg)
1346  {
1347  // tanh(z) = (e^z - e^-z)/(e^z+e^-z)
1348  return (exp(ctx, arg) - exp(ctx, static_cast<T>(-1.0) * arg))
1349  / (exp(ctx, arg) + exp(ctx, static_cast<T>(-1.0) * arg));
1350  }
1351  };
1352 
1353  //! The CUDA trunc trait specialization.
1354  template<typename TArg>
1355  struct Trunc<TruncUniformCudaHipBuiltIn, TArg, std::enable_if_t<std::is_floating_point_v<TArg>>>
1356  {
1357  __host__ __device__ auto operator()(TruncUniformCudaHipBuiltIn const& /* trunc_ctx */, TArg const& arg)
1358  {
1359  if constexpr(is_decayed_v<TArg, float>)
1360  return ::truncf(arg);
1361  else if constexpr(is_decayed_v<TArg, double>)
1363  else
1364  static_assert(!sizeof(TArg), "Unsupported data type");
1365 
1366  ALPAKA_UNREACHABLE(TArg{});
1367  }
1368  };
1369  } // namespace trait
1370 # endif
1371 } // namespace alpaka::math
1372 
1373 #endif
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
Definition: Unreachable.hpp:24
Implementation of a complex number useable on host and device.
Definition: Complex.hpp:39
constexpr ALPAKA_FN_HOST_ACC T imag() const
Get the imaginary part.
Definition: Complex.hpp:92
constexpr ALPAKA_FN_HOST_ACC T real() const
Get the real part.
Definition: Complex.hpp:80
The standard library math trait specializations.
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > tan(Complex< T > const &x)
Tangent.
Definition: Complex.hpp:565
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > acosh(Complex< T > const &x)
Arc hyperbolic cosine.
Definition: Complex.hpp:394
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > atanh(Complex< T > const &x)
Arc hyperbolic tangent.
Definition: Complex.hpp:434
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > cosh(Complex< T > const &x)
Hyperbolic cosine.
Definition: Complex.hpp:458
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > atan(Complex< T > const &x)
Arc tangent.
Definition: Complex.hpp:426
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC T abs(Complex< T > const &x)
Host-only math functions matching std::complex<T>.
Definition: Complex.hpp:378
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > log(Complex< T > const &x)
Natural logarithm.
Definition: Complex.hpp:474
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > sinh(Complex< T > const &x)
Hyperbolic sine.
Definition: Complex.hpp:549
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > tanh(Complex< T > const &x)
Hyperbolic tangent.
Definition: Complex.hpp:573
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > log10(Complex< T > const &x)
Base 10 logarithm.
Definition: Complex.hpp:482
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > asinh(Complex< T > const &x)
Arc hyperbolic sine.
Definition: Complex.hpp:418
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > cos(Complex< T > const &x)
Cosine.
Definition: Complex.hpp:450
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > asin(Complex< T > const &x)
Arc sine.
Definition: Complex.hpp:410
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > exp(Complex< T > const &x)
Exponential.
Definition: Complex.hpp:466
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto pow(Complex< T > const &x, Complex< U > const &y)
Complex power of a complex number.
Definition: Complex.hpp:506
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > sqrt(Complex< T > const &x)
Square root.
Definition: Complex.hpp:557
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > acos(Complex< T > const &x)
Arc cosine.
Definition: Complex.hpp:386
constexpr ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC Complex< T > sin(Complex< T > const &x)
Sine.
Definition: Complex.hpp:541
ALPAKA_FN_HOST_ACC auto rsqrt(TArg const &arg)
Fallback implementation when no better ADL match was found.
Definition: Traits.hpp:729
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto pow(T const &pow_ctx, TBase const &base, TExp const &exp)
Computes the value of base raised to the power exp.
Definition: Traits.hpp:1300
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
Definition: Traits.hpp:1263
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto sqrt(T const &sqrt_ctx, TArg const &arg)
Computes the square root of arg.
Definition: Traits.hpp:1441
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto floor(T const &floor_ctx, TArg const &arg)
Computes the largest integer value not greater than arg.
Definition: Traits.hpp:1116
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto exp(T const &exp_ctx, TArg const &arg)
Computes the e (Euler's number, 2.7182818) raised to the given power arg.
Definition: Traits.hpp:1102
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto isfinite(T const &ctx, TArg const &arg)
Checks if given value is finite.
Definition: Traits.hpp:1164
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto cos(T const &cos_ctx, TArg const &arg)
Computes the cosine (measured in radians).
Definition: Traits.hpp:1060
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto llround(T const &llround_ctx, TArg const &arg) -> long long int
Computes the nearest integer value to arg (in integer format), rounding halfway cases away from zero,...
Definition: Traits.hpp:1361
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto fmod(T const &fmod_ctx, Tx const &x, Ty const &y)
Computes the floating-point remainder of the division operation x/y.
Definition: Traits.hpp:1150
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto log2(T const &log2_ctx, TArg const &arg)
Computes the the natural (base 2) logarithm of arg.
Definition: Traits.hpp:1228
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto trunc(T const &trunc_ctx, TArg const &arg)
Computes the nearest integer not greater in magnitude than arg.
Definition: Traits.hpp:1483
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto copysign(T const &copysign_ctx, TMag const &mag, TSgn const &sgn)
Creates a value with the magnitude of mag and the sign of sgn.
Definition: Traits.hpp:1046
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto isinf(T const &ctx, TArg const &arg)
Checks if given value is inf.
Definition: Traits.hpp:1178
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto ceil(T const &ceil_ctx, TArg const &arg)
Computes the smallest integer value not less than arg.
Definition: Traits.hpp:1016
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto lround(T const &lround_ctx, TArg const &arg) -> long int
Computes the nearest integer value to arg (in integer format), rounding halfway cases away from zero,...
Definition: Traits.hpp:1346
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto sincos(T const &sincos_ctx, TArg const &arg, TArg &result_sin, TArg &result_cos) -> void
Computes the sine and cosine (measured in radians).
Definition: Traits.hpp:1423
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto fma(T const &fma_ctx, Tx const &x, Ty const &y, Tz const &z)
Computes x * y + z as if to infinite precision and rounded only once to fit the result type.
Definition: Traits.hpp:1134
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto remainder(T const &remainder_ctx, Tx const &x, Ty const &y)
Computes the IEEE remainder of the floating point division operation x/y.
Definition: Traits.hpp:1316
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto isnan(T const &ctx, TArg const &arg)
Checks if given value is NaN.
Definition: Traits.hpp:1192
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto cbrt(T const &cbrt_ctx, TArg const &arg)
Computes the cbrt.
Definition: Traits.hpp:1002
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto atan2(T const &atan2_ctx, Ty const &y, Tx const &x)
Computes the arc tangent of y/x using the signs of arguments to determine the correct quadrant.
Definition: Traits.hpp:988
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto sin(T const &sin_ctx, TArg const &arg)
Computes the sine (measured in radians).
Definition: Traits.hpp:1393
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto abs(T const &abs_ctx, TArg const &arg)
Computes the absolute value.
Definition: Traits.hpp:864
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto log(T const &log_ctx, TArg const &arg)
Computes the the natural (base e) logarithm of arg.
Definition: Traits.hpp:1210
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto round(T const &round_ctx, TArg const &arg)
Computes the nearest integer value to arg (in floating-point format), rounding halfway cases away fro...
Definition: Traits.hpp:1331
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto arg(T const &arg_ctx, TArgument const &argument)
Computes the complex argument of the value.
Definition: Traits.hpp:912
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto erf(T const &erf_ctx, TArg const &arg)
Computes the error function of arg.
Definition: Traits.hpp:1088
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The abs trait.
Definition: Traits.hpp:297
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The acos trait.
Definition: Traits.hpp:310
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The acosh trait.
Definition: Traits.hpp:323
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &argument)
Take context as original (accelerator) type, since we call other math functions.
__host__ __device__ auto operator()(TCtx const &ctx, TArgument const &argument)
Take context as original (accelerator) type, since we call other math functions.
The arg trait.
Definition: Traits.hpp:336
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The asin trait.
Definition: Traits.hpp:352
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The asin trait.
Definition: Traits.hpp:365
The atan2 trait.
Definition: Traits.hpp:404
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The atan trait.
Definition: Traits.hpp:378
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The atanh trait.
Definition: Traits.hpp:391
The cbrt trait.
Definition: Traits.hpp:417
The ceil trait.
Definition: Traits.hpp:430
__host__ __device__ auto operator()(ConjUniformCudaHipBuiltIn const &, Complex< T > const &arg)
The conj trait.
Definition: Traits.hpp:443
The copysign trait.
Definition: Traits.hpp:456
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The cos trait.
Definition: Traits.hpp:469
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The cosh trait.
Definition: Traits.hpp:482
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The exp trait.
Definition: Traits.hpp:507
The floor trait.
Definition: Traits.hpp:520
The fma trait.
Definition: Traits.hpp:533
The fmod trait.
Definition: Traits.hpp:546
The isfinite trait.
Definition: Traits.hpp:559
The isinf trait.
Definition: Traits.hpp:572
The isnan trait.
Definition: Traits.hpp:585
The round trait.
Definition: Traits.hpp:715
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &argument)
Take context as original (accelerator) type, since we call other math functions.
The base 10 log trait.
Definition: Traits.hpp:624
The bas 2 log trait.
Definition: Traits.hpp:611
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &argument)
Take context as original (accelerator) type, since we call other math functions.
The log trait.
Definition: Traits.hpp:598
The round trait.
Definition: Traits.hpp:702
The max trait.
Definition: Traits.hpp:637
The min trait.
Definition: Traits.hpp:650
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &base, Complex< U > const &exponent)
Take context as original (accelerator) type, since we call other math functions.
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &base, U const &exponent)
Take context as original (accelerator) type, since we call other math functions.
__host__ __device__ auto operator()(TCtx const &ctx, T const &base, Complex< U > const &exponent)
Take context as original (accelerator) type, since we call other math functions.
The pow trait.
Definition: Traits.hpp:663
The remainder trait.
Definition: Traits.hpp:676
The round trait.
Definition: Traits.hpp:689
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The rsqrt trait.
Definition: Traits.hpp:740
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg, Complex< T > &result_sin, Complex< T > &result_cos) -> void
Take context as original (accelerator) type, since we call other math functions.
__host__ __device__ auto operator()(SinCosUniformCudaHipBuiltIn const &, TArg const &arg, TArg &result_sin, TArg &result_cos) -> void
The sincos trait.
Definition: Traits.hpp:793
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The sin trait.
Definition: Traits.hpp:753
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The sin trait.
Definition: Traits.hpp:766
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &argument)
Take context as original (accelerator) type, since we call other math functions.
The sqrt trait.
Definition: Traits.hpp:806
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The tan trait.
Definition: Traits.hpp:819
__host__ __device__ auto operator()(TCtx const &ctx, Complex< T > const &arg)
Take context as original (accelerator) type, since we call other math functions.
The tanh trait.
Definition: Traits.hpp:832
The trunc trait.
Definition: Traits.hpp:845