alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IndependentElements.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <algorithm>
7#include <cstddef>
8#include <type_traits>
9
10namespace alpaka
11{
12
13 namespace detail
14 {
15
16 /* IndependentGroupsAlong
17 *
18 * `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the
19 * group indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If
20 * `groups` is not specified, it defaults to the number of blocks along the `Dim` dimension.
21 *
22 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
23 * that can infer the accelerator type from the argument.
24 *
25 * In a 1-dimensional kernel, `independentGroups(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc,
26 * 0>(acc, ...)`.
27 *
28 * In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop),
29 * followed by dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop). For convenience
30 * when converting CUDA or HIP code, `independentGroupsAlongX(acc, ...)`, `Y` and `Z` are shorthands for
31 * `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`.
32 *
33 * `independentGroupsAlong<Dim>(acc, ...)` should be called consistently by all the threads in a block. All
34 * threads in a block see the same loop iterations, while threads in different blocks may see a different
35 * number of iterations.
36 * If the work division has more blocks than the required number of groups, the first blocks will perform one
37 * iteration of the loop, while the other blocks will exit the loop immediately.
38 * If the work division has less blocks than the required number of groups, some of the blocks will perform
39 * more than one iteration, in order to cover then whole problem space.
40 *
41 * For example,
42 *
43 * for (auto group: independentGroupsAlong<Dim>(acc, 7))
44 *
45 * will return the group range from 0 to 6, distributed across all blocks in the work division.
46 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the
47 * other blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from
48 * 0 to 6 will process one group while block 7 will no process any.
49 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
50 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0
51 * will process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6,
52 * and block 3 will process group 3.
53 */
54
55 template<concepts::Acc TAcc, std::size_t Dim>
56 requires(alpaka::Dim<TAcc>::value >= Dim)
58 {
59 public:
61
62 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
63 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
64 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
65 , extent_{stride_}
66 {
67 }
68
69 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
70 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
71 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
72 , extent_{groups}
73 {
74 }
75
76 class const_iterator;
78
80 {
81 return const_iterator(stride_, extent_, first_);
82 }
83
85 {
86 return const_iterator(stride_, extent_, extent_);
87 }
88
90 {
92
93 ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
94 : stride_{stride}
95 , extent_{extent}
96 , first_{std::min(first, extent)}
97 {
98 }
99
100 public:
102 {
103 return first_;
104 }
105
106 // pre-increment the iterator
108 {
109 // increment the first-element-in-block index by the grid stride
110 first_ += stride_;
111 if(first_ < extent_)
112 return *this;
113
114 // the iterator has reached or passed the end of the extent, clamp it to the extent
115 first_ = extent_;
116 return *this;
117 }
118
119 // post-increment the iterator
121 {
122 const_iterator old = *this;
123 ++(*this);
124 return old;
125 }
126
127 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
128 {
129 return (first_ == other.first_);
130 }
131
132 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
133 {
134 return not(*this == other);
135 }
136
137 private:
138 // non-const to support iterator copy and assignment
139 Idx stride_;
140 Idx extent_;
141 // modified by the pre/post-increment operator
142 Idx first_;
143 };
144
145 private:
146 Idx const first_;
147 Idx const stride_;
148 Idx const extent_;
149 };
150
151 } // namespace detail
152
153 /* independentGroups
154 *
155 * `independentGroups(acc, groups)` returns a one-dimensional iteratable range than spans the group indices from 0
156 * to `groups`. If `groups` is not specified, it defaults to the number of blocks.
157 *
158 * `independentGroups(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, 0>(acc, ...)`.
159 *
160 * `independentGroups(acc, ...)` should be called consistently by all the threads in a block. All threads in a
161 * block see the same loop iterations, while threads in different blocks may see a different number of iterations.
162 * If the work division has more blocks than the required number of groups, the first blocks will perform one
163 * iteration of the loop, while the other blocks will exit the loop immediately.
164 * If the work division has less blocks than the required number of groups, some of the blocks will perform more
165 * than one iteration, in order to cover then whole problem space.
166 *
167 * For example,
168 *
169 * for (auto group: independentGroups(acc, 7))
170 *
171 * will return the group range from 0 to 6, distributed across all blocks in the work division.
172 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other
173 * blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6
174 * will process one group while block 7 will no process any.
175 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
176 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will
177 * process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block
178 * 3 will process group 3.
179 *
180 * Note that `independentGroups(acc, ...)` is only suitable for one-dimensional kernels. For N-dimensional kernels,
181 * use
182 * - `independentGroupsAlong<Dim>(acc, ...)` to perform the iteration explicitly along dimension `Dim`;
183 * - `independentGroupsAlongX(acc, ...)`, `independentGroupsAlongY(acc, ...)`, or `independentGroupsAlongZ(acc,
184 * ...)` to loop along the fastest, second-fastest, or third-fastest dimension.
185 */
186
187 template<concepts::Acc TAcc, typename... TArgs>
188 requires(alpaka::Dim<TAcc>::value == 1)
189 ALPAKA_FN_ACC inline auto independentGroups(TAcc const& acc, TArgs... args)
190 {
191 using Idx = alpaka::Idx<TAcc>;
192 return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
193 }
194
195 /* independentGroupsAlong<Dim>
196 *
197 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
198 * that can infer the accelerator type from the argument.
199 */
200
201 template<std::size_t Dim, concepts::Acc TAcc, typename... TArgs>
202 requires(alpaka::Dim<TAcc>::value >= Dim)
203 ALPAKA_FN_ACC inline auto independentGroupsAlong(TAcc const& acc, TArgs... args)
204 {
205 using Idx = alpaka::Idx<TAcc>;
206 return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
207 }
208
209 /* independentGroupsAlongX, Y, Z
210 *
211 * Like `independentGroups` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
212 * dimensions.
213 */
214
215 template<concepts::Acc TAcc, typename... TArgs>
216 requires(alpaka::Dim<TAcc>::value > 0)
217 ALPAKA_FN_ACC inline auto independentGroupsAlongX(TAcc const& acc, TArgs... args)
218 {
219 using Idx = alpaka::Idx<TAcc>;
220 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
221 }
222
223 template<concepts::Acc TAcc, typename... TArgs>
224 requires(alpaka::Dim<TAcc>::value > 1)
225 ALPAKA_FN_ACC inline auto independentGroupsAlongY(TAcc const& acc, TArgs... args)
226 {
227 using Idx = alpaka::Idx<TAcc>;
228 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
229 }
230
231 template<concepts::Acc TAcc, typename... TArgs>
232 requires(alpaka::Dim<TAcc>::value > 2)
233 ALPAKA_FN_ACC inline auto independentGroupsAlongZ(TAcc const& acc, TArgs... args)
234 {
235 using Idx = alpaka::Idx<TAcc>;
236 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
237 }
238
239 namespace detail
240 {
241
242 /* IndependentGroupElementsAlong
243 *
244 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupElementsAlong<TAcc,
245 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
246 */
247
248 template<concepts::Acc TAcc, std::size_t Dim>
249 requires(alpaka::Dim<TAcc>::value >= Dim)
250 class IndependentGroupElementsAlong
251 {
252 public:
253 using Idx = alpaka::Idx<TAcc>;
254
255 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc)
256 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
257 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
258 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
259 , extent_{stride_}
260 {
261 }
262
263 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
264 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
265 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
266 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
267 , extent_{extent}
268 {
269 }
270
271 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
272 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
273 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first}
274 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
275 , extent_{extent}
276 {
277 }
278
279 class const_iterator;
280 using iterator = const_iterator;
281
282 ALPAKA_FN_ACC inline const_iterator begin() const
283 {
284 return const_iterator(elements_, stride_, extent_, thread_);
285 }
286
287 ALPAKA_FN_ACC inline const_iterator end() const
288 {
289 return const_iterator(elements_, stride_, extent_, extent_);
290 }
291
293 {
294 friend class IndependentGroupElementsAlong;
295
296 ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
297 : elements_{elements}
298 ,
299 // we need to reduce the stride by on element range because index_ is later increased with each
300 // increment
301 stride_{stride - elements}
302 , extent_{extent}
303 , index_{std::min(first, extent)}
304 {
305 }
306
307 public:
308 ALPAKA_FN_ACC inline Idx operator*() const
309 {
310 return index_;
311 }
312
313 // pre-increment the iterator
315 {
316 ++indexElem_;
317 ++index_;
318 if(indexElem_ >= elements_)
319 {
320 indexElem_ = 0;
321 index_ += stride_;
322 }
323 if(index_ >= extent_)
324 index_ = extent_;
325
326 return *this;
327 }
328
329 // post-increment the iterator
331 {
332 const_iterator old = *this;
333 ++(*this);
334 return old;
335 }
336
337 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
338 {
339 return (*(*this) == *other);
340 }
341
342 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
343 {
344 return not(*this == other);
345 }
346
347 private:
348 // non-const to support iterator copy and assignment
349 Idx elements_;
350 Idx stride_;
351 Idx extent_;
352 // modified by the pre/post-increment operator
353 Idx index_;
354 Idx indexElem_ = 0;
355 };
356
357 private:
358 Idx const elements_;
359 Idx const thread_;
360 Idx const stride_;
361 Idx const extent_;
362 };
363
364 } // namespace detail
365
366 /* independentGroupElements
367 */
368
369 template<concepts::Acc TAcc, typename... TArgs>
370 requires(alpaka::Dim<TAcc>::value == 1)
371 ALPAKA_FN_ACC inline auto independentGroupElements(TAcc const& acc, TArgs... args)
372 {
373 using Idx = alpaka::Idx<TAcc>;
374 return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
375 }
376
377 /* independentGroupElementsAlong<Dim>
378 *
379 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupElementsAlong<TAcc,
380 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
381 */
382
383 template<std::size_t Dim, concepts::Acc TAcc, typename... TArgs>
384 requires(alpaka::Dim<TAcc>::value >= Dim)
385 ALPAKA_FN_ACC inline auto independentGroupElementsAlong(TAcc const& acc, TArgs... args)
386 {
387 using Idx = alpaka::Idx<TAcc>;
388 return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
389 }
390
391 /* independentGroupElementsAlongX, Y, Z
392 *
393 * Like `independentGroupElements` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
394 * dimensions.
395 */
396
397 template<concepts::Acc TAcc, typename... TArgs>
398 requires(alpaka::Dim<TAcc>::value > 0)
399 ALPAKA_FN_ACC inline auto independentGroupElementsAlongX(TAcc const& acc, TArgs... args)
400 {
401 using Idx = alpaka::Idx<TAcc>;
402 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(
403 acc,
404 static_cast<Idx>(args)...);
405 }
406
407 template<concepts::Acc TAcc, typename... TArgs>
408 requires(alpaka::Dim<TAcc>::value > 1)
409 ALPAKA_FN_ACC inline auto independentGroupElementsAlongY(TAcc const& acc, TArgs... args)
410 {
411 using Idx = alpaka::Idx<TAcc>;
412 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(
413 acc,
414 static_cast<Idx>(args)...);
415 }
416
417 template<concepts::Acc TAcc, typename... TArgs>
418 requires(alpaka::Dim<TAcc>::value > 2)
419 ALPAKA_FN_ACC inline auto independentGroupElementsAlongZ(TAcc const& acc, TArgs... args)
420 {
421 using Idx = alpaka::Idx<TAcc>;
422 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(
423 acc,
424 static_cast<Idx>(args)...);
425 }
426
427} // namespace alpaka
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC const_iterator begin() const
ALPAKA_FN_ACC const_iterator end() const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc)
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc, Idx groups)
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition Iterator.hpp:133
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition Iterator.hpp:127
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWorkDiv(TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TWorkDiv > >
Get the extent requested.
Definition Traits.hpp:33
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdx(TIdx const &idx, TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TIdx > >
Get the indices requested.
Definition Accessors.hpp:23
ALPAKA_FN_ACC auto independentGroupsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongZ(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroups(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongZ(TAcc const &acc, TArgs... args)
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
STL namespace.