alpaka
Abstraction Library for Parallel Kernel Acceleration
Toggle main menu visibility
Main Page
Related Pages
Namespaces
Namespace List
Namespace Members
All
a
b
c
d
e
f
g
h
i
l
m
n
o
p
q
r
s
t
u
v
w
Functions
a
b
c
d
e
f
g
h
i
l
m
n
o
p
r
s
t
u
v
w
Variables
a
b
d
e
h
i
l
n
o
p
s
u
w
Typedefs
a
b
c
d
e
f
h
i
l
m
n
p
q
r
t
u
Enumerations
Concepts
Classes
Class List
Class Hierarchy
Class Members
All
:
a
b
c
d
e
f
g
h
i
k
l
m
n
o
p
q
r
s
t
u
v
w
z
~
Functions
a
b
c
d
e
f
g
h
i
k
l
m
n
o
p
q
r
s
t
u
v
w
z
~
Variables
a
b
c
d
e
f
g
h
i
k
l
m
n
p
q
r
s
t
v
w
Typedefs
a
b
c
d
e
f
h
i
k
l
m
p
q
r
s
t
v
w
Enumerations
Enumerator
Related Symbols
Files
File List
File Members
All
a
b
c
i
m
p
t
u
Functions
Typedefs
Macros
a
b
c
m
p
t
u
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Pages
Concepts
Loading...
Searching...
No Matches
SyclSubgroupSize.hpp
Go to the documentation of this file.
1
/* Copyright 2023 Andrea Bocci, Aurora Perego
2
* SPDX-License-Identifier: MPL-2.0
3
*/
4
5
#ifdef ALPAKA_ACC_SYCL_ENABLED
6
7
# ifdef __SYCL_DEVICE_ONLY__
8
9
# if
/* Broadwell Intel graphics architecture */
\
10
(defined(__SYCL_TARGET_INTEL_GPU_BDW__) && __SYCL_TARGET_INTEL_GPU_BDW__) \
11
||
/* Skylake Intel graphics architecture */
\
12
(defined(__SYCL_TARGET_INTEL_GPU_SKL__) && __SYCL_TARGET_INTEL_GPU_SKL__) \
13
||
/* Kaby Lake Intel graphics architecture */
\
14
(defined(__SYCL_TARGET_INTEL_GPU_KBL__) && __SYCL_TARGET_INTEL_GPU_KBL__) \
15
||
/* Coffee Lake Intel graphics architecture */
\
16
(defined(__SYCL_TARGET_INTEL_GPU_CFL__) && __SYCL_TARGET_INTEL_GPU_CFL__) \
17
||
/* Apollo Lake Intel graphics architecture */
\
18
(defined(__SYCL_TARGET_INTEL_GPU_APL__) && __SYCL_TARGET_INTEL_GPU_APL__) \
19
||
/* Gemini Lake Intel graphics architecture */
\
20
(defined(__SYCL_TARGET_INTEL_GPU_GLK__) && __SYCL_TARGET_INTEL_GPU_GLK__) \
21
||
/* Whiskey Lake Intel graphics architecture */
\
22
(defined(__SYCL_TARGET_INTEL_GPU_WHL__) && __SYCL_TARGET_INTEL_GPU_WHL__) \
23
||
/* Amber Lake Intel graphics architecture */
\
24
(defined(__SYCL_TARGET_INTEL_GPU_AML__) && __SYCL_TARGET_INTEL_GPU_AML__) \
25
||
/* Comet Lake Intel graphics architecture */
\
26
(defined(__SYCL_TARGET_INTEL_GPU_CML__) && __SYCL_TARGET_INTEL_GPU_CML__) \
27
||
/* Ice Lake Intel graphics architecture */
\
28
(defined(__SYCL_TARGET_INTEL_GPU_ICLLP__) && __SYCL_TARGET_INTEL_GPU_ICLLP__) \
29
||
/* Elkhart Lake or Jasper Lake Intel graphics architecture */
\
30
(defined(__SYCL_TARGET_INTEL_GPU_EHL__) && __SYCL_TARGET_INTEL_GPU_EHL__) \
31
||
/* Tiger Lake Intel graphics architecture */
\
32
(defined(__SYCL_TARGET_INTEL_GPU_TGLLP__) && __SYCL_TARGET_INTEL_GPU_TGLLP__) \
33
||
/* Rocket Lake Intel graphics architecture */
\
34
(defined(__SYCL_TARGET_INTEL_GPU_RKL__) && __SYCL_TARGET_INTEL_GPU_RKL__) \
35
||
/* Alder Lake S or Raptor Lake S Intel graphics architecture */
\
36
(defined(__SYCL_TARGET_INTEL_GPU_ADL_S__) && __SYCL_TARGET_INTEL_GPU_ADL_S__) \
37
||
/* Alder Lake P Intel graphics architecture */
\
38
(defined(__SYCL_TARGET_INTEL_GPU_ADL_P__) && __SYCL_TARGET_INTEL_GPU_ADL_P__) \
39
||
/* Alder Lake N Intel graphics architecture */
\
40
(defined(__SYCL_TARGET_INTEL_GPU_ADL_N__) && __SYCL_TARGET_INTEL_GPU_ADL_N__) \
41
||
/* DG1 Intel graphics architecture */
\
42
(defined(__SYCL_TARGET_INTEL_GPU_DG1__) && __SYCL_TARGET_INTEL_GPU_DG1__) \
43
||
/* Alchemist G10 Intel graphics architecture */
\
44
(defined(__SYCL_TARGET_INTEL_GPU_ACM_G10__) && __SYCL_TARGET_INTEL_GPU_ACM_G10__) \
45
||
/* Alchemist G11 Intel graphics architecture */
\
46
(defined(__SYCL_TARGET_INTEL_GPU_ACM_G11__) && __SYCL_TARGET_INTEL_GPU_ACM_G11__) \
47
||
/* Alchemist G12 Intel graphics architecture */
\
48
(defined(__SYCL_TARGET_INTEL_GPU_ACM_G12__) && __SYCL_TARGET_INTEL_GPU_ACM_G12__) \
49
||
/* Meteor Lake U/S or Arrow Lake U/S Intel graphics architecture */
\
50
(defined(__SYCL_TARGET_INTEL_GPU_MTL_U__) && __SYCL_TARGET_INTEL_GPU_MTL_U__) \
51
||
/* Meteor Lake H Intel graphics architecture */
\
52
(defined(__SYCL_TARGET_INTEL_GPU_MTL_H__) && __SYCL_TARGET_INTEL_GPU_MTL_H__) \
53
||
/* Arrow Lake H Intel graphics architecture */
\
54
(defined(__SYCL_TARGET_INTEL_GPU_ARL_H__) && __SYCL_TARGET_INTEL_GPU_ARL_H__) \
55
||
/* Battlemage G21 Intel graphics architecture */
\
56
(defined(__SYCL_TARGET_INTEL_GPU_BMG_G21__) && __SYCL_TARGET_INTEL_GPU_BMG_G21__) \
57
||
/* Lunar Lake Intel graphics architecture */
\
58
(defined(__SYCL_TARGET_INTEL_GPU_LNL_M__) && __SYCL_TARGET_INTEL_GPU_LNL_M__)
59
60
# define SYCL_SUBGROUP_SIZE (8 | 16 | 32)
61
62
# elif
/* Ponte Vecchio Intel graphics architecture */
\
63
(defined(__SYCL_TARGET_INTEL_GPU_PVC__) && __SYCL_TARGET_INTEL_GPU_PVC__) \
64
||
/* Ponte Vecchio VG Intel graphics architecture */
\
65
(defined(__SYCL_TARGET_INTEL_GPU_PVC_VG__) && __SYCL_TARGET_INTEL_GPU_PVC_VG__)
66
67
# define SYCL_SUBGROUP_SIZE (16 | 32)
68
69
# elif(
/* generate code ahead of time for x86_64 CPUs */
\
70
defined(__SYCL_TARGET_INTEL_X86_64__) && __SYCL_TARGET_INTEL_X86_64__)
71
72
# define SYCL_SUBGROUP_SIZE (4 | 8 | 16 | 32 | 64)
73
74
# elif
/* NVIDIA Maxwell architecture (compute capability 5.0) */
\
75
(defined(__SYCL_TARGET_NVIDIA_GPU_SM50__) && __SYCL_TARGET_NVIDIA_GPU_SM50__) \
76
||
/* NVIDIA Maxwell architecture (compute capability 5.2) */
\
77
(defined(__SYCL_TARGET_NVIDIA_GPU_SM52__) && __SYCL_TARGET_NVIDIA_GPU_SM52__) \
78
||
/* NVIDIA Jetson TX1 / Nano (compute capability 5.3) */
\
79
(defined(__SYCL_TARGET_NVIDIA_GPU_SM53__) && __SYCL_TARGET_NVIDIA_GPU_SM53__) \
80
||
/* NVIDIA Pascal architecture (compute capability 6.0) */
\
81
(defined(__SYCL_TARGET_NVIDIA_GPU_SM60__) && __SYCL_TARGET_NVIDIA_GPU_SM60__) \
82
||
/* NVIDIA Pascal architecture (compute capability 6.1) */
\
83
(defined(__SYCL_TARGET_NVIDIA_GPU_SM61__) && __SYCL_TARGET_NVIDIA_GPU_SM61__) \
84
||
/* NVIDIA Jetson TX2 (compute capability 6.2) */
\
85
(defined(__SYCL_TARGET_NVIDIA_GPU_SM62__) && __SYCL_TARGET_NVIDIA_GPU_SM62__) \
86
||
/* NVIDIA Volta architecture (compute capability 7.0) */
\
87
(defined(__SYCL_TARGET_NVIDIA_GPU_SM70__) && __SYCL_TARGET_NVIDIA_GPU_SM70__) \
88
||
/* NVIDIA Jetson AGX (compute capability 7.2) */
\
89
(defined(__SYCL_TARGET_NVIDIA_GPU_SM72__) && __SYCL_TARGET_NVIDIA_GPU_SM72__) \
90
||
/* NVIDIA Turing architecture (compute capability 7.5) */
\
91
(defined(__SYCL_TARGET_NVIDIA_GPU_SM75__) && __SYCL_TARGET_NVIDIA_GPU_SM75__) \
92
||
/* NVIDIA Ampere architecture (compute capability 8.0) */
\
93
(defined(__SYCL_TARGET_NVIDIA_GPU_SM80__) && __SYCL_TARGET_NVIDIA_GPU_SM80__) \
94
||
/* NVIDIA Ampere architecture (compute capability 8.6) */
\
95
(defined(__SYCL_TARGET_NVIDIA_GPU_SM86__) && __SYCL_TARGET_NVIDIA_GPU_SM86__) \
96
||
/* NVIDIA Jetson/Drive AGX Orin (compute capability 8.7) */
\
97
(defined(__SYCL_TARGET_NVIDIA_GPU_SM87__) && __SYCL_TARGET_NVIDIA_GPU_SM87__) \
98
||
/* NVIDIA Ada Lovelace arch. (compute capability 8.9) */
\
99
(defined(__SYCL_TARGET_NVIDIA_GPU_SM89__) && __SYCL_TARGET_NVIDIA_GPU_SM89__) \
100
||
/* NVIDIA Hopper architecture (compute capability 9.0) */
\
101
(defined(__SYCL_TARGET_NVIDIA_GPU_SM90__) && __SYCL_TARGET_NVIDIA_GPU_SM90__)
102
103
# define SYCL_SUBGROUP_SIZE (32)
/* CUDA supports warp size 32 */
104
105
# elif
/* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */
\
106
(defined(__SYCL_TARGET_AMD_GPU_GFX700__) && __SYCL_TARGET_AMD_GPU_GFX700__) \
107
||
/* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */
\
108
(defined(__SYCL_TARGET_AMD_GPU_GFX701__) && __SYCL_TARGET_AMD_GPU_GFX701__) \
109
||
/* AMD GCN 2.0 Sea Islands architecture (gfx 7.0) */
\
110
(defined(__SYCL_TARGET_AMD_GPU_GFX702__) && __SYCL_TARGET_AMD_GPU_GFX702__) \
111
||
/* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */
\
112
(defined(__SYCL_TARGET_AMD_GPU_GFX801__) && __SYCL_TARGET_AMD_GPU_GFX801__) \
113
||
/* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */
\
114
(defined(__SYCL_TARGET_AMD_GPU_GFX802__) && __SYCL_TARGET_AMD_GPU_GFX802__) \
115
||
/* AMD GCN 4.0 Arctic Islands architecture (gfx 8.0) */
\
116
(defined(__SYCL_TARGET_AMD_GPU_GFX803__) && __SYCL_TARGET_AMD_GPU_GFX803__) \
117
||
/* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.0) */
\
118
(defined(__SYCL_TARGET_AMD_GPU_GFX805__) && __SYCL_TARGET_AMD_GPU_GFX805__) \
119
||
/* AMD GCN 3.0 Volcanic Islands architecture (gfx 8.1) */
\
120
(defined(__SYCL_TARGET_AMD_GPU_GFX810__) && __SYCL_TARGET_AMD_GPU_GFX810__) \
121
||
/* AMD GCN 5.0 Vega architecture (gfx 9.0) */
\
122
(defined(__SYCL_TARGET_AMD_GPU_GFX900__) && __SYCL_TARGET_AMD_GPU_GFX900__) \
123
||
/* AMD GCN 5.0 Vega architecture (gfx 9.0) */
\
124
(defined(__SYCL_TARGET_AMD_GPU_GFX902__) && __SYCL_TARGET_AMD_GPU_GFX902__) \
125
||
/* AMD GCN 5.0 Vega architecture (gfx 9.0) */
\
126
(defined(__SYCL_TARGET_AMD_GPU_GFX904__) && __SYCL_TARGET_AMD_GPU_GFX904__) \
127
||
/* AMD GCN 5.1 Vega II architecture (gfx 9.0) */
\
128
(defined(__SYCL_TARGET_AMD_GPU_GFX906__) && __SYCL_TARGET_AMD_GPU_GFX906__) \
129
||
/* AMD CDNA 1.0 Arcturus architecture (gfx 9.0) */
\
130
(defined(__SYCL_TARGET_AMD_GPU_GFX908__) && __SYCL_TARGET_AMD_GPU_GFX908__) \
131
||
/* AMD GCN 5.0 Raven 2 architecture (gfx 9.0) */
\
132
(defined(__SYCL_TARGET_AMD_GPU_GFX909__) && __SYCL_TARGET_AMD_GPU_GFX909__) \
133
||
/* AMD CDNA 2.0 Aldebaran architecture (gfx 9.0) */
\
134
(defined(__SYCL_TARGET_AMD_GPU_GFX90A__) && __SYCL_TARGET_AMD_GPU_GFX90A__) \
135
||
/* AMD GCN 5.1 Renoir architecture (gfx 9.0) */
\
136
(defined(__SYCL_TARGET_AMD_GPU_GFX90C__) && __SYCL_TARGET_AMD_GPU_GFX90C__) \
137
||
/* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */
\
138
(defined(__SYCL_TARGET_AMD_GPU_GFX940__) && __SYCL_TARGET_AMD_GPU_GFX940__) \
139
||
/* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */
\
140
(defined(__SYCL_TARGET_AMD_GPU_GFX941__) && __SYCL_TARGET_AMD_GPU_GFX941__) \
141
||
/* AMD CDNA 3.0 Aqua Vanjaram architecture (gfx 9.4) */
\
142
(defined(__SYCL_TARGET_AMD_GPU_GFX942__) && __SYCL_TARGET_AMD_GPU_GFX942__)
143
144
# define SYCL_SUBGROUP_SIZE (64)
/* up to gfx9, HIP supports wavefront size 64 */
145
146
# elif
/* AMD RDNA 1.0 Navi 10 architecture (gfx 10.1) */
\
147
(defined(__SYCL_TARGET_AMD_GPU_GFX1010__) && __SYCL_TARGET_AMD_GPU_GFX1010__) \
148
||
/* AMD RDNA 1.0 Navi 12 architecture (gfx 10.1) */
\
149
(defined(__SYCL_TARGET_AMD_GPU_GFX1011__) && __SYCL_TARGET_AMD_GPU_GFX1011__) \
150
||
/* AMD RDNA 1.0 Navi 14 architecture (gfx 10.1) */
\
151
(defined(__SYCL_TARGET_AMD_GPU_GFX1012__) && __SYCL_TARGET_AMD_GPU_GFX1012__) \
152
||
/* AMD RDNA 2.0 Oberon architecture (gfx 10.1) */
\
153
(defined(__SYCL_TARGET_AMD_GPU_GFX1013__) && __SYCL_TARGET_AMD_GPU_GFX1013__) \
154
||
/* AMD RDNA 2.0 Navi 21 architecture (gfx 10.3) */
\
155
(defined(__SYCL_TARGET_AMD_GPU_GFX1030__) && __SYCL_TARGET_AMD_GPU_GFX1030__) \
156
||
/* AMD RDNA 2.0 Navi 22 architecture (gfx 10.3) */
\
157
(defined(__SYCL_TARGET_AMD_GPU_GFX1031__) && __SYCL_TARGET_AMD_GPU_GFX1031__) \
158
||
/* AMD RDNA 2.0 Navi 23 architecture (gfx 10.3) */
\
159
(defined(__SYCL_TARGET_AMD_GPU_GFX1032__) && __SYCL_TARGET_AMD_GPU_GFX1032__) \
160
||
/* AMD RDNA 2.0 Van Gogh architecture (gfx 10.3) */
\
161
(defined(__SYCL_TARGET_AMD_GPU_GFX1033__) && __SYCL_TARGET_AMD_GPU_GFX1033__) \
162
||
/* AMD RDNA 2.0 Navi 24 architecture (gfx 10.3) */
\
163
(defined(__SYCL_TARGET_AMD_GPU_GFX1034__) && __SYCL_TARGET_AMD_GPU_GFX1034__) \
164
||
/* AMD RDNA 2.0 Rembrandt Mobile architecture (gfx 10.3) */
\
165
(defined(__SYCL_TARGET_AMD_GPU_GFX1035__) && __SYCL_TARGET_AMD_GPU_GFX1035__) \
166
||
/* AMD RDNA 2.0 Raphael architecture (gfx 10.3) */
\
167
(defined(__SYCL_TARGET_AMD_GPU_GFX1036__) && __SYCL_TARGET_AMD_GPU_GFX1036__) \
168
||
/* AMD RDNA 3.0 Navi 31 architecture (gfx 11.0) */
\
169
(defined(__SYCL_TARGET_AMD_GPU_GFX1100__) && __SYCL_TARGET_AMD_GPU_GFX1100__) \
170
||
/* AMD RDNA 3.0 Navi 32 architecture (gfx 11.0) */
\
171
(defined(__SYCL_TARGET_AMD_GPU_GFX1101__) && __SYCL_TARGET_AMD_GPU_GFX1101__) \
172
||
/* AMD RDNA 3.0 Navi 33 architecture (gfx 11.0) */
\
173
(defined(__SYCL_TARGET_AMD_GPU_GFX1102__) && __SYCL_TARGET_AMD_GPU_GFX1102__) \
174
||
/* AMD RDNA 3.0 Phoenix mobile architecture (gfx 11.0) */
\
175
(defined(__SYCL_TARGET_AMD_GPU_GFX1103__) && __SYCL_TARGET_AMD_GPU_GFX1103__) \
176
||
/* AMD RDNA 3.5 Strix Point architecture (gfx 11.5) */
\
177
(defined(__SYCL_TARGET_AMD_GPU_GFX1150__) && __SYCL_TARGET_AMD_GPU_GFX1150__) \
178
||
/* AMD RDNA 3.5 Strix Halo architecture (gfx 11.5) */
\
179
(defined(__SYCL_TARGET_AMD_GPU_GFX1151__) && __SYCL_TARGET_AMD_GPU_GFX1151__) \
180
||
/* AMD RDNA 4.0 Navi 44 architecture (gfx 12.0) */
\
181
(defined(__SYCL_TARGET_AMD_GPU_GFX1200__) && __SYCL_TARGET_AMD_GPU_GFX1200__) \
182
||
/* AMD RDNA 4.0 Navi 48 architecture (gfx 12.0) */
\
183
(defined(__SYCL_TARGET_AMD_GPU_GFX1201__) && __SYCL_TARGET_AMD_GPU_GFX1201__)
184
185
# define SYCL_SUBGROUP_SIZE (32)
/* starting from gfx10, HIP supports wavefront size 32 */
186
187
# else
// __SYCL_TARGET_*
188
189
# define SYCL_SUBGROUP_SIZE (0)
/* unknown target */
190
191
# endif
// __SYCL_TARGET_*
192
193
# else
194
195
# define SYCL_SUBGROUP_SIZE (0)
/* host compilation */
196
197
# endif
// __SYCL_DEVICE_ONLY__
198
199
#endif
// ALPAKA_ACC_SYCL_ENABLED
include
alpaka
kernel
SyclSubgroupSize.hpp
Generated on Thu Mar 27 2025 15:41:11 for alpaka by
1.9.8