alpaka
Abstraction Library for Parallel Kernel Acceleration
IntrinsicCpu.hpp
Go to the documentation of this file.
1 /* Copyright 2023 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
11 
12 #include <bitset>
13 #include <climits>
14 #if __has_include(<version>) // Not part of the C++17 standard but all major standard libraries include this
15 # include <version>
16 #endif
17 #ifdef __cpp_lib_bitops
18 # include <bit>
19 #endif
20 
21 #if BOOST_COMP_MSVC
22 # include <intrin.h>
23 #endif
24 
25 namespace alpaka
26 {
27  //! The CPU intrinsic.
28  class IntrinsicCpu : public concepts::Implements<ConceptIntrinsic, IntrinsicCpu>
29  {
30  };
31 
32  namespace trait
33  {
34  template<>
36  {
37  template<typename UnsignedIntegral>
38  static auto popcount(IntrinsicCpu const& /*intrinsic*/, UnsignedIntegral value) -> std::int32_t
39  {
40 #ifdef __cpp_lib_bitops
41  return std::popcount(value);
42 #elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
43  if constexpr(sizeof(UnsignedIntegral) == 8)
44  return __builtin_popcountll(value);
45  else
46  return __builtin_popcount(value);
47 #elif BOOST_COMP_MSVC
48  if constexpr(sizeof(UnsignedIntegral) == 8)
49  return static_cast<std::int32_t>(__popcnt64(value));
50  else
51  return __popcnt(value);
52 #else
53  // Fallback to standard library
54  return static_cast<std::int32_t>(std::bitset<sizeof(UnsignedIntegral) * CHAR_BIT>(value).count());
55 #endif
57  }
58  };
59 
60  template<>
61  struct Ffs<IntrinsicCpu>
62  {
63  template<typename Integral>
64  static auto ffs(IntrinsicCpu const& /*intrinsic*/, Integral value) -> std::int32_t
65  {
66 #ifdef __cpp_lib_bitops
67  return value == 0 ? 0 : std::countr_zero(static_cast<std::make_unsigned_t<Integral>>(value)) + 1;
68 #elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
69  if constexpr(sizeof(Integral) == 8)
70  return __builtin_ffsll(value);
71  else
72  return __builtin_ffs(value);
73 #elif BOOST_COMP_MSVC
74  // Implementation based on
75  // https://gitlab.freedesktop.org/cairo/cairo/commit/f5167dc2e1a13d8c4e5d66d7178a24b9b5e7ac7a
76  unsigned long index = 0u;
77  if constexpr(sizeof(Integral) == 8)
78  return _BitScanForward64(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
79  else
80  return _BitScanForward(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
81 #else
82  return alpaka::detail::ffsFallback(value);
83 #endif
85  }
86  };
87  } // namespace trait
88 } // namespace alpaka
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
Definition: Unreachable.hpp:24
The CPU intrinsic.
static auto ffsFallback(TValue value) -> std::int32_t
Fallback implementation of ffs.
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto popcount(TIntrinsic const &intrinsic, std::uint32_t value) -> std::int32_t
Returns the number of 1 bits in the given 32-bit value.
Definition: Traits.hpp:38
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Definition: Concepts.hpp:15
static auto ffs(IntrinsicCpu const &, Integral value) -> std::int32_t
The ffs trait.
Definition: Traits.hpp:28
static auto popcount(IntrinsicCpu const &, UnsignedIntegral value) -> std::int32_t
The popcount trait.
Definition: Traits.hpp:24