alpaka
Abstraction Library for Parallel Kernel Acceleration
Common.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Axel Hübl, Benjamin Worpitz, Matthias Werner, Jan Stephan, René Widera, Andrea Bocci, Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/core/Debug.hpp"
9 
10 // Boost.Uuid errors with VS2017 when intrin.h is not included
11 #if defined(_MSC_VER) && _MSC_VER >= 1910
12 # include <intrin.h>
13 #endif
14 
15 #if BOOST_LANG_HIP
16 // HIP defines some keywords like __forceinline__ in header files.
17 # include <hip/hip_runtime.h>
18 #endif
19 
20 //! All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_FN_HOST_ACC.
21 //!
22 //! \code{.cpp}
23 //! Usage:
24 //! ALPAKA_FN_ACC
25 //! auto add(std::int32_t a, std::int32_t b)
26 //! -> std::int32_t;
27 //! \endcode
28 //! @{
29 #if BOOST_LANG_CUDA || BOOST_LANG_HIP
30 # if defined(ALPAKA_ACC_GPU_CUDA_ONLY_MODE) || defined(ALPAKA_ACC_GPU_HIP_ONLY_MODE)
31 # define ALPAKA_FN_ACC __device__
32 # else
33 # define ALPAKA_FN_ACC __device__ __host__
34 # endif
35 # define ALPAKA_FN_HOST_ACC __device__ __host__
36 # define ALPAKA_FN_HOST __host__
37 #else
38 # define ALPAKA_FN_ACC
39 # define ALPAKA_FN_HOST_ACC
40 # define ALPAKA_FN_HOST
41 #endif
42 //! @}
43 
44 //! All functions marked with ALPAKA_FN_ACC or ALPAKA_FN_HOST_ACC that are exported to / imported from different
45 //! translation units have to be attributed with ALPAKA_FN_EXTERN. Note that this needs to be applied to both the
46 //! declaration and the definition.
47 //!
48 //! Usage:
49 //! ALPAKA_FN_ACC ALPAKA_FN_EXTERN auto add(std::int32_t a, std::int32_t b) -> std::int32_t;
50 //!
51 //! Warning: If this is used together with the SYCL back-end make sure that your SYCL runtime supports generic
52 //! address spaces. Otherwise it is forbidden to use pointers as parameter or return type for functions marked
53 //! with ALPAKA_FN_EXTERN.
54 #ifdef ALPAKA_ACC_SYCL_ENABLED
55 /*
56  This is required by the SYCL standard, section 5.10.1 "SYCL functions and member functions linkage":
57 
58  The default behavior in SYCL applications is that all the definitions and declarations of the functions and member
59  functions are available to the SYCL compiler, in the same translation unit. When this is not the case, all the
60  symbols that need to be exported to a SYCL library or from a C++ library to a SYCL application need to be defined
61  using the macro: SYCL_EXTERNAL.
62 */
63 # define ALPAKA_FN_EXTERN SYCL_EXTERNAL
64 #else
65 # define ALPAKA_FN_EXTERN
66 #endif
67 
68 //! Disable nvcc warning:
69 //! 'calling a __host__ function from __host__ __device__ function.'
70 //! Usage:
71 //! ALPAKA_NO_HOST_ACC_WARNING
72 //! ALPAKA_FN_HOST_ACC function_declaration()
73 //! WARNING: Only use this method if there is no other way.
74 //! Most cases can be solved by #if BOOST_ARCH_PTX or #if BOOST_LANG_CUDA.
75 #if(BOOST_LANG_CUDA && !BOOST_COMP_CLANG_CUDA)
76 # if BOOST_COMP_MSVC || defined(BOOST_COMP_MSVC_EMULATED)
77 # define ALPAKA_NO_HOST_ACC_WARNING __pragma(hd_warning_disable)
78 # else
79 # define ALPAKA_NO_HOST_ACC_WARNING _Pragma("hd_warning_disable")
80 # endif
81 #else
82 # define ALPAKA_NO_HOST_ACC_WARNING
83 #endif
84 
85 //! Macro defining the inline function attribute.
86 //!
87 //! The macro should stay on the left hand side of keywords, e.g. 'static', 'constexpr', 'explicit' or the return type.
88 #if BOOST_LANG_CUDA || BOOST_LANG_HIP
89 # define ALPAKA_FN_INLINE __forceinline__
90 #elif BOOST_COMP_MSVC || defined(BOOST_COMP_MSVC_EMULATED)
91 // TODO: With C++20 [[msvc::forceinline]] can be used.
92 # define ALPAKA_FN_INLINE __forceinline
93 #else
94 // For gcc, clang, and clang-based compilers like Intel icpx
95 # define ALPAKA_FN_INLINE [[gnu::always_inline]] inline
96 #endif
97 
98 //! This macro defines a variable lying in global accelerator device memory.
99 //!
100 //! Example:
101 //! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<TAcc, int> variable;
102 //!
103 //! Those variables behave like ordinary variables when used in file-scope,
104 //! but inside kernels the get() method must be used to access the variable.
105 //! They are declared inline to resolve to a single instance across multiple
106 //! translation units.
107 //! Like ordinary variables, only one definition is allowed (ODR)
108 //! Failure to do so might lead to linker errors.
109 //!
110 //! In contrast to ordinary variables, you can not define such variables
111 //! as static compilation unit local variables with internal linkage
112 //! because this is forbidden by CUDA.
113 //!
114 //! \attention It is not allowed to initialize the variable together with the declaration.
115 //! To initialize the variable alpaka::memcpy must be used.
116 //! \code{.cpp}
117 //! ALPAKA_STATIC_ACC_MEM_GLOBAL alpaka::DevGlobal<TAcc, int> foo;
118 //!
119 //! struct DeviceMemoryKernel
120 //! {
121 //! ALPAKA_NO_HOST_ACC_WARNING
122 //! template<typename TAcc>
123 //! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
124 //! {
125 //! auto a = foo<TAcc>.get();
126 //! }
127 //! }
128 //!
129 //! void initFoo() {
130 //! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
131 //! int initialValue = 42;
132 //! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
133 //! alpaka::memcpy(queue, foo<Acc>, bufHost, extent);
134 //! }
135 //! \endcode
136 #if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
137  || BOOST_LANG_HIP)
138 # define ALPAKA_STATIC_ACC_MEM_GLOBAL \
139  template<typename TAcc> \
140  inline __device__
141 #else
142 # define ALPAKA_STATIC_ACC_MEM_GLOBAL \
143  template<typename TAcc> \
144  inline
145 #endif
146 
147 //! This macro defines a variable lying in constant accelerator device memory.
148 //!
149 //! Example:
150 //! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<TAcc, const int> variable;
151 //!
152 //! Those variables behave like ordinary variables when used in file-scope,
153 //! but inside kernels the get() method must be used to access the variable.
154 //! They are declared inline to resolve to a single instance across multiple
155 //! translation units.
156 //! Like ordinary variables, only one definition is allowed (ODR)
157 //! Failure to do so might lead to linker errors.
158 //!
159 //! In contrast to ordinary variables, you can not define such variables
160 //! as static compilation unit local variables with internal linkage
161 //! because this is forbidden by CUDA.
162 //!
163 //! \attention It is not allowed to initialize the variable together with the declaration.
164 //! To initialize the variable alpaka::memcpy must be used.
165 //! \code{.cpp}
166 //! ALPAKA_STATIC_ACC_MEM_CONSTANT alpaka::DevGlobal<TAcc, const int> foo;
167 //!
168 //! struct DeviceMemoryKernel
169 //! {
170 //! ALPAKA_NO_HOST_ACC_WARNING
171 //! template<typename TAcc>
172 //! ALPAKA_FN_ACC void operator()(TAcc const& acc) const
173 //! {
174 //! auto a = foo<TAcc>.get();
175 //! }
176 //! }
177 //!
178 //! void initFoo() {
179 //! auto extent = alpaka::Vec<alpaka::DimInt<1u>, size_t>{1};
180 //! int initialValue = 42;
181 //! alpaka::ViewPlainPtr<DevHost, int, alpaka::DimInt<1u>, size_t> bufHost(&initialValue, devHost, extent);
182 //! alpaka::memcpy(queue, foo<Acc>, bufHost, extent);
183 //! }
184 //! \endcode
185 #if((BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA) || (BOOST_LANG_CUDA && BOOST_COMP_NVCC && BOOST_ARCH_PTX) \
186  || BOOST_LANG_HIP)
187 # define ALPAKA_STATIC_ACC_MEM_CONSTANT \
188  template<typename TAcc> \
189  inline __constant__
190 #else
191 # define ALPAKA_STATIC_ACC_MEM_CONSTANT \
192  template<typename TAcc> \
193  inline
194 #endif
195 
196 //! This macro disables memory optimizations for annotated device memory.
197 //!
198 //! Example:
199 //! ALPAKA_DEVICE_VOLATILE float* ptr;
200 //!
201 //! This is useful for pointers, (shared) variables and shared memory which are used in combination with
202 //! the alpaka::mem_fence() function. It ensures that memory annotated with this macro will always be written directly
203 //! to memory (and not to a register or cache because of compiler optimizations).
204 #if(BOOST_LANG_CUDA && BOOST_ARCH_PTX) \
205  || (BOOST_LANG_HIP && defined(__HIP_DEVICE_COMPILE__) && __HIP_DEVICE_COMPILE__ == 1)
206 # define ALPAKA_DEVICE_VOLATILE volatile
207 #else
208 # define ALPAKA_DEVICE_VOLATILE
209 #endif