alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IntrinsicCpu.hpp
Go to the documentation of this file.
1/* Copyright 2023 Sergei Bastrakov, Bernhard Manfred Gruber, Jan Stephan
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
11
12#include <bitset>
13#include <climits>
14#if __has_include(<version>) // Not part of the C++17 standard but all major standard libraries include this
15# include <version>
16#endif
17#ifdef __cpp_lib_bitops
18# include <bit>
19#endif
20
21#if BOOST_COMP_MSVC
22# include <intrin.h>
23#endif
24
25namespace alpaka
26{
27 //! The CPU intrinsic.
28 class IntrinsicCpu : public interface::Implements<ConceptIntrinsic, IntrinsicCpu>
29 {
30 };
31
32 namespace trait
33 {
34 template<>
35 struct Popcount<IntrinsicCpu>
36 {
37 template<typename UnsignedIntegral>
38 static auto popcount(IntrinsicCpu const& /*intrinsic*/, UnsignedIntegral value) -> std::int32_t
39 {
40#ifdef __cpp_lib_bitops
41 return std::popcount(value);
42#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
43 if constexpr(sizeof(UnsignedIntegral) == 8)
44 return __builtin_popcountll(value);
45 else
46 return __builtin_popcount(value);
47#elif BOOST_COMP_MSVC
48 if constexpr(sizeof(UnsignedIntegral) == 8)
49 return static_cast<std::int32_t>(__popcnt64(value));
50 else
51 return __popcnt(value);
52#else
53 // Fallback to standard library
54 return static_cast<std::int32_t>(std::bitset<sizeof(UnsignedIntegral) * CHAR_BIT>(value).count());
55#endif
57 }
58 };
59
60 template<>
61 struct Ffs<IntrinsicCpu>
62 {
63 template<typename Integral>
64 static auto ffs(IntrinsicCpu const& /*intrinsic*/, Integral value) -> std::int32_t
65 {
66#ifdef __cpp_lib_bitops
67 return value == 0 ? 0 : std::countr_zero(static_cast<std::make_unsigned_t<Integral>>(value)) + 1;
68#elif BOOST_COMP_GNUC || BOOST_COMP_CLANG
69 if constexpr(sizeof(Integral) == 8)
70 return __builtin_ffsll(value);
71 else
72 return __builtin_ffs(value);
73#elif BOOST_COMP_MSVC
74 // Implementation based on
75 // https://gitlab.freedesktop.org/cairo/cairo/commit/f5167dc2e1a13d8c4e5d66d7178a24b9b5e7ac7a
76 unsigned long index = 0u;
77 if constexpr(sizeof(Integral) == 8)
78 return _BitScanForward64(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
79 else
80 return _BitScanForward(&index, value) == 0 ? 0 : static_cast<std::int32_t>(index + 1u);
81#else
82 return alpaka::detail::ffsFallback(value);
83#endif
85 }
86 };
87 } // namespace trait
88} // namespace alpaka
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
The CPU intrinsic.
static auto ffsFallback(TValue value) -> std::int32_t
Fallback implementation of ffs.
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ffs(TIntrinsic const &intrinsic, std::int32_t value) -> std::int32_t
Returns the 1-based position of the least significant bit set to 1 in the given 32-bit value....
Definition Traits.hpp:65
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto popcount(TIntrinsic const &intrinsic, std::uint32_t value) -> std::int32_t
Returns the number of 1 bits in the given 32-bit value.
Definition Traits.hpp:38
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15