alpaka
Abstraction Library for Parallel Kernel Acceleration
IntrinsicUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Sergei Bastrakov, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
10 
11 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12 
13 namespace alpaka
14 {
15  //! The GPU CUDA/HIP intrinsic.
17  : public interface::Implements<ConceptIntrinsic, IntrinsicUniformCudaHipBuiltIn>
18  {
19  };
20 
21 # if !defined(ALPAKA_HOST_ONLY)
22 
23 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
24 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
25 # endif
26 
27 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
28 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
29 # endif
30 
31  namespace trait
32  {
33  template<>
35  {
36  __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint32_t value)
37  -> std::int32_t
38  {
39  // clang as CUDA compiler change the interface to unsigned values for clang >=18
40 # if BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_COMP_CLANG < BOOST_VERSION_NUMBER(18, 0, 0)
41  return __popc(static_cast<int>(value));
42 # else
43  return static_cast<std::int32_t>(__popc(static_cast<unsigned int>(value)));
44 # endif
45  }
46 
47  __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint64_t value)
48  -> std::int32_t
49  {
50  // clang as CUDA compiler change the interface to unsigned values for clang >=18
51 # if BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_COMP_CLANG < BOOST_VERSION_NUMBER(18, 0, 0)
52  return __popcll(static_cast<long long>(value));
53 # else
54  return static_cast<std::int32_t>(__popcll(static_cast<unsigned long long>(value)));
55 # endif
56  }
57  };
58 
59  template<>
61  {
62  __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int32_t value)
63  -> std::int32_t
64  {
65  return static_cast<std::int32_t>(__ffs(static_cast<int>(value)));
66  }
67 
68  __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int64_t value)
69  -> std::int32_t
70  {
71  return static_cast<std::int32_t>(__ffsll(static_cast<long long>(value)));
72  }
73  };
74  } // namespace trait
75 
76 # endif
77 
78 } // namespace alpaka
79 
80 #endif
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition: Interface.hpp:15
static __device__ auto ffs(IntrinsicUniformCudaHipBuiltIn const &, std::int64_t value) -> std::int32_t
static __device__ auto ffs(IntrinsicUniformCudaHipBuiltIn const &, std::int32_t value) -> std::int32_t
The ffs trait.
Definition: Traits.hpp:28
static __device__ auto popcount(IntrinsicUniformCudaHipBuiltIn const &, std::uint32_t value) -> std::int32_t
static __device__ auto popcount(IntrinsicUniformCudaHipBuiltIn const &, std::uint64_t value) -> std::int32_t
The popcount trait.
Definition: Traits.hpp:24