alpaka
Abstraction Library for Parallel Kernel Acceleration
IntrinsicUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Sergei Bastrakov, Andrea Bocci, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
10 
11 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12 
13 namespace alpaka
14 {
15  //! The GPU CUDA/HIP intrinsic.
17  : public concepts::Implements<ConceptIntrinsic, IntrinsicUniformCudaHipBuiltIn>
18  {
19  };
20 
21 # if !defined(ALPAKA_HOST_ONLY)
22 
23 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
24 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
25 # endif
26 
27 # if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
28 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
29 # endif
30 
31  namespace trait
32  {
33  template<>
35  {
36  __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint32_t value)
37  -> std::int32_t
38  {
39 # if BOOST_COMP_CLANG && BOOST_LANG_CUDA
40  return __popc(static_cast<int>(value));
41 # else
42  return static_cast<std::int32_t>(__popc(static_cast<unsigned int>(value)));
43 # endif
44  }
45 
46  __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint64_t value)
47  -> std::int32_t
48  {
49 # if BOOST_COMP_CLANG && BOOST_LANG_CUDA
50  return __popcll(static_cast<long long>(value));
51 # else
52  return static_cast<std::int32_t>(__popcll(static_cast<unsigned long long>(value)));
53 # endif
54  }
55  };
56 
57  template<>
59  {
60  __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int32_t value)
61  -> std::int32_t
62  {
63  return static_cast<std::int32_t>(__ffs(static_cast<int>(value)));
64  }
65 
66  __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int64_t value)
67  -> std::int32_t
68  {
69  return static_cast<std::int32_t>(__ffsll(static_cast<long long>(value)));
70  }
71  };
72  } // namespace trait
73 
74 # endif
75 
76 } // namespace alpaka
77 
78 #endif
The alpaka accelerator library.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
static __device__ auto ffs(IntrinsicUniformCudaHipBuiltIn const &, std::int64_t value) -> std::int32_t
static __device__ auto ffs(IntrinsicUniformCudaHipBuiltIn const &, std::int32_t value) -> std::int32_t
The ffs trait.
Definition: Traits.hpp:28
static __device__ auto popcount(IntrinsicUniformCudaHipBuiltIn const &, std::uint32_t value) -> std::int32_t
static __device__ auto popcount(IntrinsicUniformCudaHipBuiltIn const &, std::uint64_t value) -> std::int32_t
The popcount trait.
Definition: Traits.hpp:24