alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IntrinsicUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1/* Copyright 2022 Sergei Bastrakov, Andrea Bocci, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
12
13namespace alpaka
14{
15 //! The GPU CUDA/HIP intrinsic.
17 : public interface::Implements<ConceptIntrinsic, IntrinsicUniformCudaHipBuiltIn>
18 {
19 };
20
21# if !defined(ALPAKA_HOST_ONLY)
22
23# if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
24# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
25# endif
26
27# if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
28# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
29# endif
30
31 namespace trait
32 {
33 template<>
34 struct Popcount<IntrinsicUniformCudaHipBuiltIn>
35 {
36 __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint32_t value)
37 -> std::int32_t
38 {
39 // clang as CUDA compiler change the interface to unsigned values for clang >=18
40# if BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_COMP_CLANG < BOOST_VERSION_NUMBER(18, 0, 0)
41 return __popc(static_cast<int>(value));
42# else
43 return static_cast<std::int32_t>(__popc(static_cast<unsigned int>(value)));
44# endif
45 }
46
47 __device__ static auto popcount(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::uint64_t value)
48 -> std::int32_t
49 {
50 // clang as CUDA compiler change the interface to unsigned values for clang >=18
51# if BOOST_COMP_CLANG && BOOST_LANG_CUDA && BOOST_COMP_CLANG < BOOST_VERSION_NUMBER(18, 0, 0)
52 return __popcll(static_cast<long long>(value));
53# else
54 return static_cast<std::int32_t>(__popcll(static_cast<unsigned long long>(value)));
55# endif
56 }
57 };
58
59 template<>
60 struct Ffs<IntrinsicUniformCudaHipBuiltIn>
61 {
62 __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int32_t value)
63 -> std::int32_t
64 {
65 return static_cast<std::int32_t>(__ffs(static_cast<int>(value)));
66 }
67
68 __device__ static auto ffs(IntrinsicUniformCudaHipBuiltIn const& /*intrinsic*/, std::int64_t value)
69 -> std::int32_t
70 {
71 return static_cast<std::int32_t>(__ffsll(static_cast<long long>(value)));
72 }
73 };
74 } // namespace trait
75
76# endif
77
78} // namespace alpaka
79
80#endif
The alpaka accelerator library.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto ffs(TIntrinsic const &intrinsic, std::int32_t value) -> std::int32_t
Returns the 1-based position of the least significant bit set to 1 in the given 32-bit value....
Definition Traits.hpp:65
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto popcount(TIntrinsic const &intrinsic, std::uint32_t value) -> std::int32_t
Returns the number of 1 bits in the given 32-bit value.
Definition Traits.hpp:38
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
Definition Interface.hpp:15