alpaka
Abstraction Library for Parallel Kernel Acceleration
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
SyclSubgroupSize.hpp
Go to the documentation of this file.
1/* Copyright 2023 Andrea Bocci, Aurora Perego
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#ifdef ALPAKA_ACC_SYCL_ENABLED
6
7# ifdef __SYCL_DEVICE_ONLY__
8
9# if /* Broadwell Intel graphics architecture */ \
10 (defined(__SYCL_TARGET_INTEL_GPU_BDW__) && __SYCL_TARGET_INTEL_GPU_BDW__) \
11 || /* Skylake Intel graphics architecture */ \
12 (defined(__SYCL_TARGET_INTEL_GPU_SKL__) && __SYCL_TARGET_INTEL_GPU_SKL__) \
13 || /* Kaby Lake Intel graphics architecture */ \
14 (defined(__SYCL_TARGET_INTEL_GPU_KBL__) && __SYCL_TARGET_INTEL_GPU_KBL__) \
15 || /* Coffee Lake Intel graphics architecture */ \
16 (defined(__SYCL_TARGET_INTEL_GPU_CFL__) && __SYCL_TARGET_INTEL_GPU_CFL__) \
17 || /* Apollo Lake Intel graphics architecture */ \
18 (defined(__SYCL_TARGET_INTEL_GPU_APL__) && __SYCL_TARGET_INTEL_GPU_APL__) \
19 || /* Gemini Lake Intel graphics architecture */ \
20 (defined(__SYCL_TARGET_INTEL_GPU_GLK__) && __SYCL_TARGET_INTEL_GPU_GLK__) \
21 || /* Whiskey Lake Intel graphics architecture */ \
22 (defined(__SYCL_TARGET_INTEL_GPU_WHL__) && __SYCL_TARGET_INTEL_GPU_WHL__) \
23 || /* Amber Lake Intel graphics architecture */ \
24 (defined(__SYCL_TARGET_INTEL_GPU_AML__) && __SYCL_TARGET_INTEL_GPU_AML__) \
25 || /* Comet Lake Intel graphics architecture */ \
26 (defined(__SYCL_TARGET_INTEL_GPU_CML__) && __SYCL_TARGET_INTEL_GPU_CML__) \
27 || /* Ice Lake Intel graphics architecture */ \
28 (defined(__SYCL_TARGET_INTEL_GPU_ICLLP__) && __SYCL_TARGET_INTEL_GPU_ICLLP__) \
29 || /* Elkhart Lake or Jasper Lake Intel graphics architecture */ \
30 (defined(__SYCL_TARGET_INTEL_GPU_EHL__) && __SYCL_TARGET_INTEL_GPU_EHL__) \
31 || /* Tiger Lake Intel graphics architecture */ \
32 (defined(__SYCL_TARGET_INTEL_GPU_TGLLP__) && __SYCL_TARGET_INTEL_GPU_TGLLP__) \
33 || /* Rocket Lake Intel graphics architecture */ \
34 (defined(__SYCL_TARGET_INTEL_GPU_RKL__) && __SYCL_TARGET_INTEL_GPU_RKL__) \
35 || /* Alder Lake S or Raptor Lake S Intel graphics architecture */ \
36 (defined(__SYCL_TARGET_INTEL_GPU_ADL_S__) && __SYCL_TARGET_INTEL_GPU_ADL_S__) \
37 || /* Alder Lake P Intel graphics architecture */ \
38 (defined(__SYCL_TARGET_INTEL_GPU_ADL_P__) && __SYCL_TARGET_INTEL_GPU_ADL_P__) \
39 || /* Alder Lake N Intel graphics architecture */ \
40 (defined(__SYCL_TARGET_INTEL_GPU_ADL_N__) && __SYCL_TARGET_INTEL_GPU_ADL_N__) \
41 || /* DG1 Intel graphics architecture */ \
42 (defined(__SYCL_TARGET_INTEL_GPU_DG1__) && __SYCL_TARGET_INTEL_GPU_DG1__) \
43 || /* Alchemist G10 Intel graphics architecture */ \
44 (defined(__SYCL_TARGET_INTEL_GPU_ACM_G10__) && __SYCL_TARGET_INTEL_GPU_ACM_G10__) \
45 || /* Alchemist G11 Intel graphics architecture */ \
46 (defined(__SYCL_TARGET_INTEL_GPU_ACM_G11__) && __SYCL_TARGET_INTEL_GPU_ACM_G11__) \
47 || /* Alchemist G12 Intel graphics architecture */ \
48 (defined(__SYCL_TARGET_INTEL_GPU_ACM_G12__) && __SYCL_TARGET_INTEL_GPU_ACM_G12__) \
49 || /* Meteor Lake U/S or Arrow Lake U/S Intel graphics architecture */ \
50 (defined(__SYCL_TARGET_INTEL_GPU_MTL_U__) && __SYCL_TARGET_INTEL_GPU_MTL_U__) \
51 || /* Meteor Lake H Intel graphics architecture */ \
52 (defined(__SYCL_TARGET_INTEL_GPU_MTL_H__) && __SYCL_TARGET_INTEL_GPU_MTL_H__) \
53 || /* Arrow Lake H Intel graphics architecture */ \
54 (defined(__SYCL_TARGET_INTEL_GPU_ARL_H__) && __SYCL_TARGET_INTEL_GPU_ARL_H__) \
55 || /* Battlemage G21 Intel graphics architecture */ \
56 (defined(__SYCL_TARGET_INTEL_GPU_BMG_G21__) && __SYCL_TARGET_INTEL_GPU_BMG_G21__) \
57 || /* Lunar Lake Intel graphics architecture */ \
58 (defined(__SYCL_TARGET_INTEL_GPU_LNL_M__) && __SYCL_TARGET_INTEL_GPU_LNL_M__)
59
60# define SYCL_SUBGROUP_SIZE (8 | 16 | 32)
61
62# elif /* Ponte Vecchio Intel graphics architecture */ \
63 (defined(__SYCL_TARGET_INTEL_GPU_PVC__) && __SYCL_TARGET_INTEL_GPU_PVC__) \
64 || /* Ponte Vecchio VG Intel graphics architecture */ \
65 (defined(__SYCL_TARGET_INTEL_GPU_PVC_VG__) && __SYCL_TARGET_INTEL_GPU_PVC_VG__)
66
67# define SYCL_SUBGROUP_SIZE (16 | 32)
68
69# elif(/* generate code ahead of time for x86_64 CPUs */ \
70 defined(__SYCL_TARGET_INTEL_X86_64__) && __SYCL_TARGET_INTEL_X86_64__)
71
72# define SYCL_SUBGROUP_SIZE (4 | 8 | 16 | 32 | 64)
73
74# elif /* NVIDIA Maxwell architecture (compute capability 5.0) */ \
75 (defined(__SYCL_TARGET_NVIDIA_GPU_SM50__) && __SYCL_TARGET_NVIDIA_GPU_SM50__) \
76 || /* NVIDIA Maxwell architecture (compute capability 5.2) */ \
77 (defined(__SYCL_TARGET_NVIDIA_GPU_SM52__) && __SYCL_TARGET_NVIDIA_GPU_SM52__) \
78 || /* NVIDIA Jetson TX1 / Nano (compute capability 5.3) */ \
79 (defined(__SYCL_TARGET_NVIDIA_GPU_SM53__) && __SYCL_TARGET_NVIDIA_GPU_SM53__) \
80 || /* NVIDIA Pascal architecture (compute capability 6.0) */ \
81 (defined(__SYCL_TARGET_NVIDIA_GPU_SM60__) && __SYCL_TARGET_NVIDIA_GPU_SM60__) \
82 || /* NVIDIA Pascal architecture (compute capability 6.1) */ \
83 (defined(__SYCL_TARGET_NVIDIA_GPU_SM61__) && __SYCL_TARGET_NVIDIA_GPU_SM61__) \
84 || /* NVIDIA Jetson TX2 (compute capability 6.2) */ \
85 (defined(__SYCL_TARGET_NVIDIA_GPU_SM62__) && __SYCL_TARGET_NVIDIA_GPU_SM62__) \
86 || /* NVIDIA Volta architecture (compute capability 7.0) */ \
87 (defined(__SYCL_TARGET_NVIDIA_GPU_SM70__) && __SYCL_TARGET_NVIDIA_GPU_SM70__) \
88 || /* NVIDIA Jetson AGX (compute capability 7.2) */ \
89 (defined(__SYCL_TARGET_NVIDIA_GPU_SM72__) && __SYCL_TARGET_NVIDIA_GPU_SM72__) \
90 || /* NVIDIA Turing architecture (compute capability 7.5) */ \
91 (defined(__SYCL_TARGET_NVIDIA_GPU_SM75__) && __SYCL_TARGET_NVIDIA_GPU_SM75__) \
92 || /* NVIDIA Ampere architecture (compute capability 8.0) */ \
93 (defined(__SYCL_TARGET_NVIDIA_GPU_SM80__) && __SYCL_TARGET_NVIDIA_GPU_SM80__) \
94 || /* NVIDIA Ampere architecture (compute capability 8.6) */ \
95 (defined(__SYCL_TARGET_NVIDIA_GPU_SM86__) && __SYCL_TARGET_NVIDIA_GPU_SM86__) \
96 || /* NVIDIA Jetson/Drive AGX Orin (compute capability 8.7) */ \
97 (defined(__SYCL_TARGET_NVIDIA_GPU_SM87__) && __SYCL_TARGET_NVIDIA_GPU_SM87__) \
98 || /* NVIDIA Ada Lovelace arch. (compute capability 8.9) */ \
99 (defined(__SYCL_TARGET_NVIDIA_GPU_SM89__) && __SYCL_TARGET_NVIDIA_GPU_SM89__) \
100 || /* NVIDIA Hopper architecture (compute capability 9.0) */ \
101 (defined(__SYCL_TARGET_NVIDIA_GPU_SM90__) && __SYCL_TARGET_NVIDIA_GPU_SM90__)
102
103# define SYCL_SUBGROUP_SIZE (32) /* CUDA supports warp size 32 */
104
105# elif /* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */ \
106 (defined(__SYCL_TARGET_AMD_GPU_GFX700__) && __SYCL_TARGET_AMD_GPU_GFX700__) \
107 || /* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */ \
108 (defined(__SYCL_TARGET_AMD_GPU_GFX701__) && __SYCL_TARGET_AMD_GPU_GFX701__) \
109 || /* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */ \
110 (defined(__SYCL_TARGET_AMD_GPU_GFX702__) && __SYCL_TARGET_AMD_GPU_GFX702__) \
111 || /* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */ \
112 (defined(__SYCL_TARGET_AMD_GPU_GFX801__) && __SYCL_TARGET_AMD_GPU_GFX801__) \
113 || /* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */ \
114 (defined(__SYCL_TARGET_AMD_GPU_GFX802__) && __SYCL_TARGET_AMD_GPU_GFX802__) \
115 || /* AMD GCN 4.0 Arctic Islands architecture (gfx 8.0) */ \
116 (defined(__SYCL_TARGET_AMD_GPU_GFX803__) && __SYCL_TARGET_AMD_GPU_GFX803__) \
117 || /* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */ \
118 (defined(__SYCL_TARGET_AMD_GPU_GFX805__) && __SYCL_TARGET_AMD_GPU_GFX805__) \
119 || /* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.1) */ \
120 (defined(__SYCL_TARGET_AMD_GPU_GFX810__) && __SYCL_TARGET_AMD_GPU_GFX810__) \
121 || /* AMD GCN 5.0 Vega architecture (gfx 9.0) */ \
122 (defined(__SYCL_TARGET_AMD_GPU_GFX900__) && __SYCL_TARGET_AMD_GPU_GFX900__) \
123 || /* AMD GCN 5.0 Vega architecture (gfx 9.0) */ \
124 (defined(__SYCL_TARGET_AMD_GPU_GFX902__) && __SYCL_TARGET_AMD_GPU_GFX902__) \
125 || /* AMD GCN 5.0 Vega architecture (gfx 9.0) */ \
126 (defined(__SYCL_TARGET_AMD_GPU_GFX904__) && __SYCL_TARGET_AMD_GPU_GFX904__) \
127 || /* AMD GCN 5.1 Vega II architecture (gfx 9.0) */ \
128 (defined(__SYCL_TARGET_AMD_GPU_GFX906__) && __SYCL_TARGET_AMD_GPU_GFX906__) \
129 || /* AMD CDNA 1.0 Arcturus architecture (gfx 9.0) */ \
130 (defined(__SYCL_TARGET_AMD_GPU_GFX908__) && __SYCL_TARGET_AMD_GPU_GFX908__) \
131 || /* AMD GCN 5.0 Raven 2 architecture (gfx 9.0) */ \
132 (defined(__SYCL_TARGET_AMD_GPU_GFX909__) && __SYCL_TARGET_AMD_GPU_GFX909__) \
133 || /* AMD CDNA 2.0 Aldebaran architecture (gfx 9.0) */ \
134 (defined(__SYCL_TARGET_AMD_GPU_GFX90A__) && __SYCL_TARGET_AMD_GPU_GFX90A__) \
135 || /* AMD GCN 5.1 Renoir architecture (gfx 9.0) */ \
136 (defined(__SYCL_TARGET_AMD_GPU_GFX90C__) && __SYCL_TARGET_AMD_GPU_GFX90C__) \
137 || /* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */ \
138 (defined(__SYCL_TARGET_AMD_GPU_GFX940__) && __SYCL_TARGET_AMD_GPU_GFX940__) \
139 || /* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */ \
140 (defined(__SYCL_TARGET_AMD_GPU_GFX941__) && __SYCL_TARGET_AMD_GPU_GFX941__) \
141 || /* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */ \
142 (defined(__SYCL_TARGET_AMD_GPU_GFX942__) && __SYCL_TARGET_AMD_GPU_GFX942__)
143
144# define SYCL_SUBGROUP_SIZE (64) /* up to gfx9, HIP supports wavefront size 64 */
145
146# elif /* AMD RDNA 1.0 Navi 10 architecture (gfx 10.1) */ \
147 (defined(__SYCL_TARGET_AMD_GPU_GFX1010__) && __SYCL_TARGET_AMD_GPU_GFX1010__) \
148 || /* AMD RDNA 1.0 Navi 12 architecture (gfx 10.1) */ \
149 (defined(__SYCL_TARGET_AMD_GPU_GFX1011__) && __SYCL_TARGET_AMD_GPU_GFX1011__) \
150 || /* AMD RDNA 1.0 Navi 14 architecture (gfx 10.1) */ \
151 (defined(__SYCL_TARGET_AMD_GPU_GFX1012__) && __SYCL_TARGET_AMD_GPU_GFX1012__) \
152 || /* AMD RDNA 2.0 Oberon architecture (gfx 10.1) */ \
153 (defined(__SYCL_TARGET_AMD_GPU_GFX1013__) && __SYCL_TARGET_AMD_GPU_GFX1013__) \
154 || /* AMD RDNA 2.0 Navi 21 architecture (gfx 10.3) */ \
155 (defined(__SYCL_TARGET_AMD_GPU_GFX1030__) && __SYCL_TARGET_AMD_GPU_GFX1030__) \
156 || /* AMD RDNA 2.0 Navi 22 architecture (gfx 10.3) */ \
157 (defined(__SYCL_TARGET_AMD_GPU_GFX1031__) && __SYCL_TARGET_AMD_GPU_GFX1031__) \
158 || /* AMD RDNA 2.0 Navi 23 architecture (gfx 10.3) */ \
159 (defined(__SYCL_TARGET_AMD_GPU_GFX1032__) && __SYCL_TARGET_AMD_GPU_GFX1032__) \
160 || /* AMD RDNA 2.0 Van Gogh architecture (gfx 10.3) */ \
161 (defined(__SYCL_TARGET_AMD_GPU_GFX1033__) && __SYCL_TARGET_AMD_GPU_GFX1033__) \
162 || /* AMD RDNA 2.0 Navi 24 architecture (gfx 10.3) */ \
163 (defined(__SYCL_TARGET_AMD_GPU_GFX1034__) && __SYCL_TARGET_AMD_GPU_GFX1034__) \
164 || /* AMD RDNA 2.0 Rembrandt Mobile architecture (gfx 10.3) */ \
165 (defined(__SYCL_TARGET_AMD_GPU_GFX1035__) && __SYCL_TARGET_AMD_GPU_GFX1035__) \
166 || /* AMD RDNA 2.0 Raphael architecture (gfx 10.3) */ \
167 (defined(__SYCL_TARGET_AMD_GPU_GFX1036__) && __SYCL_TARGET_AMD_GPU_GFX1036__) \
168 || /* AMD RDNA 3.0 Navi 31 architecture (gfx 11.0) */ \
169 (defined(__SYCL_TARGET_AMD_GPU_GFX1100__) && __SYCL_TARGET_AMD_GPU_GFX1100__) \
170 || /* AMD RDNA 3.0 Navi 32 architecture (gfx 11.0) */ \
171 (defined(__SYCL_TARGET_AMD_GPU_GFX1101__) && __SYCL_TARGET_AMD_GPU_GFX1101__) \
172 || /* AMD RDNA 3.0 Navi 33 architecture (gfx 11.0) */ \
173 (defined(__SYCL_TARGET_AMD_GPU_GFX1102__) && __SYCL_TARGET_AMD_GPU_GFX1102__) \
174 || /* AMD RDNA 3.0 Phoenix mobile architecture (gfx 11.0) */ \
175 (defined(__SYCL_TARGET_AMD_GPU_GFX1103__) && __SYCL_TARGET_AMD_GPU_GFX1103__) \
176 || /* AMD RDNA 3.5 Strix Point architecture (gfx 11.5) */ \
177 (defined(__SYCL_TARGET_AMD_GPU_GFX1150__) && __SYCL_TARGET_AMD_GPU_GFX1150__) \
178 || /* AMD RDNA 3.5 Strix Halo architecture (gfx 11.5) */ \
179 (defined(__SYCL_TARGET_AMD_GPU_GFX1151__) && __SYCL_TARGET_AMD_GPU_GFX1151__) \
180 || /* AMD RDNA 4.0 Navi 44 architecture (gfx 12.0) */ \
181 (defined(__SYCL_TARGET_AMD_GPU_GFX1200__) && __SYCL_TARGET_AMD_GPU_GFX1200__) \
182 || /* AMD RDNA 4.0 Navi 48 architecture (gfx 12.0) */ \
183 (defined(__SYCL_TARGET_AMD_GPU_GFX1201__) && __SYCL_TARGET_AMD_GPU_GFX1201__)
184
185# define SYCL_SUBGROUP_SIZE (32) /* starting from gfx10, HIP supports wavefront size 32 */
186
187# else // __SYCL_TARGET_*
188
189# define SYCL_SUBGROUP_SIZE (0) /* unknown target */
190
191# endif // __SYCL_TARGET_*
192
193# else
194
195# define SYCL_SUBGROUP_SIZE (0) /* host compilation */
196
197# endif // __SYCL_DEVICE_ONLY__
198
199#endif // ALPAKA_ACC_SYCL_ENABLED