alpaka
Abstraction Library for Parallel Kernel Acceleration
Common.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Hübl, Benjamin Worpitz, Matthias Werner, Jan Stephan, René Widera, Andrea Bocci, Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/core/Debug.hpp"
9 
10 // Boost.Uuid errors with VS2017 when intrin.h is not included
11 #if defined(_MSC_VER) && _MSC_VER >= 1910
12 # include <intrin.h>
13 #endif
14 
15 #if BOOST_LANG_HIP
16 // HIP defines some keywords like __forceinline__ in header files.
17 # include <hip/hip_runtime.h>
18 #endif
19 
20 //! All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_FN_HOST_ACC.
21 //!
22 //! \code{.cpp}
23 //! Usage:
24 //! ALPAKA_FN_ACC
25 //! auto add(std::int32_t a, std::int32_t b)
26 //! -> std::int32_t;
27 //! \endcode
28 //! @{
29 #if BOOST_LANG_CUDA || BOOST_LANG_HIP
30 # if defined(ALPAKA_ACC_GPU_CUDA_ONLY_MODE) || defined(ALPAKA_ACC_GPU_HIP_ONLY_MODE)
31 # define ALPAKA_FN_ACC __device__
32 # else
33 # define ALPAKA_FN_ACC __device__ __host__
34 # endif
35 # define ALPAKA_FN_HOST_ACC __device__ __host__
36 # define ALPAKA_FN_HOST __host__
37 #else
38 # define ALPAKA_FN_ACC
39 # define ALPAKA_FN_HOST_ACC
40 # define ALPAKA_FN_HOST
41 #endif
42 //! @}
43 
44 //! All functions marked with ALPAKA_FN_ACC or ALPAKA_FN_HOST_ACC that are exported to / imported from different
45 //! translation units have to be attributed with ALPAKA_FN_EXTERN. Note that this needs to be applied to both the
46 //! declaration and the definition.
47 //!
48 //! Usage:
49 //! ALPAKA_FN_ACC ALPAKA_FN_EXTERN auto add(std::int32_t a, std::int32_t b) -> std::int32_t;
50 //!
51 //! Warning: If this is used together with the SYCL back-end make sure that your SYCL runtime supports generic
52 //! address spaces. Otherwise it is forbidden to use pointers as parameter or return type for functions marked
53 //! with ALPAKA_FN_EXTERN.
54 #ifdef ALPAKA_ACC_SYCL_ENABLED
55 /*
56  This is required by the SYCL standard, section 5.10.1 "SYCL functions and member functions linkage":
57 
58  The default behavior in SYCL applications is that all the definitions and declarations of the functions and member
59  functions are available to the SYCL compiler, in the same translation unit. When this is not the case, all the
60  symbols that need to be exported to a SYCL library or from a C++ library to a SYCL application need to be defined
61  using the macro: SYCL_EXTERNAL.
62 */
63 # define ALPAKA_FN_EXTERN SYCL_EXTERNAL
64 #else
65 # define ALPAKA_FN_EXTERN
66 #endif
67 
68 //! Disable nvcc warning:
69 //! 'calling a __host__ function from __host__ __device__ function.'
70 //! Usage:
71 //! ALPAKA_NO_HOST_ACC_WARNING
72 //! ALPAKA_FN_HOST_ACC function_declaration()
73 //! WARNING: Only use this method if there is no other way.
74 //! Most cases can be solved by #if BOOST_ARCH_PTX or #if BOOST_LANG_CUDA.
75 #if(BOOST_LANG_CUDA && !BOOST_COMP_CLANG_CUDA)
76 # if BOOST_COMP_MSVC || defined(BOOST_COMP_MSVC_EMULATED)
77 # define ALPAKA_NO_HOST_ACC_WARNING __pragma(hd_warning_disable)
78 # else
79 # define ALPAKA_NO_HOST_ACC_WARNING _Pragma("hd_warning_disable")
80 # endif
81 #else
82 # define ALPAKA_NO_HOST_ACC_WARNING
83 #endif
84 
85 //! Macro defining the inline function attribute.
86 //!
87 //! The macro should stay on the left hand side of keywords, e.g. 'static', 'constexpr', 'explicit' or the return type.
88 #if BOOST_LANG_CUDA || BOOST_LANG_HIP
89 # define ALPAKA_FN_INLINE __forceinline__
90 #elif BOOST_COMP_MSVC || defined(BOOST_COMP_MSVC_EMULATED)
91 // TODO: With C++20 [[msvc::forceinline]] can be used.
92 # define ALPAKA_FN_INLINE __forceinline
93 #else
94 // For gcc, clang, and clang-based compilers like Intel icpx
95 # define ALPAKA_FN_INLINE [[gnu::always_inline]] inline
96 #endif
97 
98 //! This macro defines a variable lying in global accelerator device memory.
99 //!
100 //! Example:
101 //! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<TAcc, int> variable;
102 //!
103 //! Those variables behave like ordinary variables when used in file-scope,
104 //! but inside kernels the get() method must be used to access the variable.
105 //! They are declared inline to resolve to a single instance across multiple
106 //! translation units.
107 //! Like ordinary variables, only one definition is allowed (ODR)
108 //! Failure to do so might lead to linker errors.
109 //!
110 //! In contrast to ordinary variables, you can not define such variables
111 //! as static compilation unit local variables with internal linkage
112 //! because this is forbidden by CUDA.
113 //!
114 //! \attention It is not allowed to initialize the variable together with the declaration.
115 //! To initialize the variable alpaka::memcpy must be used.
116 //! \code{.cpp}
117 //! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<TAcc, int> foo;
118 //!
119 //! struct DeviceMemoryKernel
120 //! {
121 //! ALPAKA_NO_HOST_ACC_WARNING
122 //! template<typename TAcc>
123 //! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
124 //! {
125 //! auto a = foo<TAcc>.get();
126 //! }
127 //! }
128 //!
129 //! void initFoo() {
130 //! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
131 //! int initialValue = 42;
132 //! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
133 //! alpaka::memcpy(queue, foo<Acc>, bufHost, extent);
134 //! }
135 //! \endcode
136 #if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
137  || BOOST_LANG_HIP)
138 # if defined(__CUDACC_RDC__) || defined(__CLANG_RDC__)
139 # define ALPAKA_STATIC_ACC_MEM_GLOBAL \
140  template<typename TAcc> \
141  __device__ inline
142 # else
143 # define ALPAKA_STATIC_ACC_MEM_GLOBAL \
144  template<typename TAcc> \
145  __device__ static
146 # endif
147 #else
148 # define ALPAKA_STATIC_ACC_MEM_GLOBAL \
149  template<typename TAcc> \
150  inline
151 #endif
152 
153 //! This macro defines a variable lying in constant accelerator device memory.
154 //!
155 //! Example:
156 //! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<TAcc, const int> variable;
157 //!
158 //! Those variables behave like ordinary variables when used in file-scope,
159 //! but inside kernels the get() method must be used to access the variable.
160 //! They are declared inline to resolve to a single instance across multiple
161 //! translation units.
162 //! Like ordinary variables, only one definition is allowed (ODR)
163 //! Failure to do so might lead to linker errors.
164 //!
165 //! In contrast to ordinary variables, you can not define such variables
166 //! as static compilation unit local variables with internal linkage
167 //! because this is forbidden by CUDA.
168 //!
169 //! \attention It is not allowed to initialize the variable together with the declaration.
170 //! To initialize the variable alpaka::memcpy must be used.
171 //! \code{.cpp}
172 //! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<TAcc, const int> foo;
173 //!
174 //! struct DeviceMemoryKernel
175 //! {
176 //! ALPAKA_NO_HOST_ACC_WARNING
177 //! template<typename TAcc>
178 //! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
179 //! {
180 //! auto a = foo<TAcc>.get();
181 //! }
182 //! }
183 //!
184 //! void initFoo() {
185 //! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
186 //! int initialValue = 42;
187 //! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
188 //! alpaka::memcpy(queue, foo<Acc>, bufHost, extent);
189 //! }
190 //! \endcode
191 #if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
192  || BOOST_LANG_HIP)
193 # if defined(__CUDACC_RDC__) || defined(__CLANG_RDC__)
194 # define ALPAKA_STATIC_ACC_MEM_CONSTANT \
195  template<typename TAcc> \
196  __constant__ inline
197 # else
198 # define ALPAKA_STATIC_ACC_MEM_CONSTANT \
199  template<typename TAcc> \
200  __constant__ static
201 # endif
202 #else
203 # define ALPAKA_STATIC_ACC_MEM_CONSTANT \
204  template<typename TAcc> \
205  inline
206 #endif
207 
208 //! This macro disables memory optimizations for annotated device memory.
209 //!
210 //! Example:
211 //! ALPAKA_DEVICE_VOLATILE float* ptr;
212 //!
213 //! This is useful for pointers, (shared) variables and shared memory which are used in combination with
214 //! the alpaka::mem_fence() function. It ensures that memory annotated with this macro will always be written directly
215 //! to memory (and not to a register or cache because of compiler optimizations).
216 #if(BOOST_LANG_CUDA && BOOST_ARCH_PTX) \
217  || (BOOST_LANG_HIP && defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__ == 1)
218 # define ALPAKA_DEVICE_VOLATILE volatile
219 #else
220 # define ALPAKA_DEVICE_VOLATILE
221 #endif