alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IndependentElements.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <algorithm>
7#include <cstddef>
8#include <type_traits>
9
10namespace alpaka
11{
12
13 namespace detail
14 {
15
16 /* IndependentGroupsAlong
17 *
18 * `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the
19 * group indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If
20 * `groups` is not specified, it defaults to the number of blocks along the `Dim` dimension.
21 *
22 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
23 * that can infer the accelerator type from the argument.
24 *
25 * In a 1-dimensional kernel, `independentGroups(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc,
26 * 0>(acc, ...)`.
27 *
28 * In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop),
29 * followed by dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop). For convenience
30 * when converting CUDA or HIP code, `independentGroupsAlongX(acc, ...)`, `Y` and `Z` are shorthands for
31 * `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`.
32 *
33 * `independentGroupsAlong<Dim>(acc, ...)` should be called consistently by all the threads in a block. All
34 * threads in a block see the same loop iterations, while threads in different blocks may see a different
35 * number of iterations.
36 * If the work division has more blocks than the required number of groups, the first blocks will perform one
37 * iteration of the loop, while the other blocks will exit the loop immediately.
38 * If the work division has less blocks than the required number of groups, some of the blocks will perform
39 * more than one iteration, in order to cover then whole problem space.
40 *
41 * For example,
42 *
43 * for (auto group: independentGroupsAlong<Dim>(acc, 7))
44 *
45 * will return the group range from 0 to 6, distributed across all blocks in the work division.
46 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the
47 * other blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from
48 * 0 to 6 will process one group while block 7 will no process any.
49 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
50 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0
51 * will process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6,
52 * and block 3 will process group 3.
53 */
54
55 template<
56 typename TAcc,
57 std::size_t Dim,
58 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
60 {
61 public:
63
64 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
65 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
66 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
67 , extent_{stride_}
68 {
69 }
70
71 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
72 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
73 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
74 , extent_{groups}
75 {
76 }
77
78 class const_iterator;
80
82 {
83 return const_iterator(stride_, extent_, first_);
84 }
85
87 {
88 return const_iterator(stride_, extent_, extent_);
89 }
90
92 {
94
95 ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
96 : stride_{stride}
97 , extent_{extent}
98 , first_{std::min(first, extent)}
99 {
100 }
101
102 public:
104 {
105 return first_;
106 }
107
108 // pre-increment the iterator
110 {
111 // increment the first-element-in-block index by the grid stride
112 first_ += stride_;
113 if(first_ < extent_)
114 return *this;
115
116 // the iterator has reached or passed the end of the extent, clamp it to the extent
117 first_ = extent_;
118 return *this;
119 }
120
121 // post-increment the iterator
123 {
124 const_iterator old = *this;
125 ++(*this);
126 return old;
127 }
128
129 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
130 {
131 return (first_ == other.first_);
132 }
133
134 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
135 {
136 return not(*this == other);
137 }
138
139 private:
140 // non-const to support iterator copy and assignment
141 Idx stride_;
142 Idx extent_;
143 // modified by the pre/post-increment operator
144 Idx first_;
145 };
146
147 private:
148 Idx const first_;
149 Idx const stride_;
150 Idx const extent_;
151 };
152
153 } // namespace detail
154
155 /* independentGroups
156 *
157 * `independentGroups(acc, groups)` returns a one-dimensional iteratable range than spans the group indices from 0
158 * to `groups`. If `groups` is not specified, it defaults to the number of blocks.
159 *
160 * `independentGroups(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, 0>(acc, ...)`.
161 *
162 * `independentGroups(acc, ...)` should be called consistently by all the threads in a block. All threads in a
163 * block see the same loop iterations, while threads in different blocks may see a different number of iterations.
164 * If the work division has more blocks than the required number of groups, the first blocks will perform one
165 * iteration of the loop, while the other blocks will exit the loop immediately.
166 * If the work division has less blocks than the required number of groups, some of the blocks will perform more
167 * than one iteration, in order to cover then whole problem space.
168 *
169 * For example,
170 *
171 * for (auto group: independentGroups(acc, 7))
172 *
173 * will return the group range from 0 to 6, distributed across all blocks in the work division.
174 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other
175 * blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6
176 * will process one group while block 7 will no process any.
177 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
178 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will
179 * process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block
180 * 3 will process group 3.
181 *
182 * Note that `independentGroups(acc, ...)` is only suitable for one-dimensional kernels. For N-dimensional kernels,
183 * use
184 * - `independentGroupsAlong<Dim>(acc, ...)` to perform the iteration explicitly along dimension `Dim`;
185 * - `independentGroupsAlongX(acc, ...)`, `independentGroupsAlongY(acc, ...)`, or `independentGroupsAlongZ(acc,
186 * ...)` to loop along the fastest, second-fastest, or third-fastest dimension.
187 */
188
189 template<
190 typename TAcc,
191 typename... TArgs,
192 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
193 ALPAKA_FN_ACC inline auto independentGroups(TAcc const& acc, TArgs... args)
194 {
195 using Idx = alpaka::Idx<TAcc>;
196 return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
197 }
198
199 /* independentGroupsAlong<Dim>
200 *
201 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
202 * that can infer the accelerator type from the argument.
203 */
204
205 template<
206 std::size_t Dim,
207 typename TAcc,
208 typename... TArgs,
209 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
210 ALPAKA_FN_ACC inline auto independentGroupsAlong(TAcc const& acc, TArgs... args)
211 {
212 using Idx = alpaka::Idx<TAcc>;
213 return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
214 }
215
216 /* independentGroupsAlongX, Y, Z
217 *
218 * Like `independentGroups` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
219 * dimensions.
220 */
221
222 template<
223 typename TAcc,
224 typename... TArgs,
225 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
226 ALPAKA_FN_ACC inline auto independentGroupsAlongX(TAcc const& acc, TArgs... args)
227 {
228 using Idx = alpaka::Idx<TAcc>;
229 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
230 }
231
232 template<
233 typename TAcc,
234 typename... TArgs,
235 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
236 ALPAKA_FN_ACC inline auto independentGroupsAlongY(TAcc const& acc, TArgs... args)
237 {
238 using Idx = alpaka::Idx<TAcc>;
239 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
240 }
241
242 template<
243 typename TAcc,
244 typename... TArgs,
245 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
246 ALPAKA_FN_ACC inline auto independentGroupsAlongZ(TAcc const& acc, TArgs... args)
247 {
248 using Idx = alpaka::Idx<TAcc>;
249 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
250 }
251
252 namespace detail
253 {
254
255 /* IndependentGroupElementsAlong
256 *
257 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupElementsAlong<TAcc,
258 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
259 */
260
261 template<
262 typename TAcc,
263 std::size_t Dim,
264 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
265 class IndependentGroupElementsAlong
266 {
267 public:
268 using Idx = alpaka::Idx<TAcc>;
269
270 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc)
271 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
272 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
273 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
274 , extent_{stride_}
275 {
276 }
277
278 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
279 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
280 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
281 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
282 , extent_{extent}
283 {
284 }
285
286 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
287 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
288 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first}
289 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
290 , extent_{extent}
291 {
292 }
293
294 class const_iterator;
295 using iterator = const_iterator;
296
297 ALPAKA_FN_ACC inline const_iterator begin() const
298 {
299 return const_iterator(elements_, stride_, extent_, thread_);
300 }
301
302 ALPAKA_FN_ACC inline const_iterator end() const
303 {
304 return const_iterator(elements_, stride_, extent_, extent_);
305 }
306
308 {
310
311 ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
312 : elements_{elements}
313 ,
314 // we need to reduce the stride by on element range because index_ is later increased with each
315 // increment
316 stride_{stride - elements}
317 , extent_{extent}
318 , index_{std::min(first, extent)}
319 {
320 }
321
322 public:
323 ALPAKA_FN_ACC inline Idx operator*() const
324 {
325 return index_;
326 }
327
328 // pre-increment the iterator
330 {
331 ++indexElem_;
332 ++index_;
333 if(indexElem_ >= elements_)
334 {
335 indexElem_ = 0;
336 index_ += stride_;
337 }
338 if(index_ >= extent_)
339 index_ = extent_;
340
341 return *this;
342 }
343
344 // post-increment the iterator
346 {
347 const_iterator old = *this;
348 ++(*this);
349 return old;
350 }
351
352 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
353 {
354 return (*(*this) == *other);
355 }
356
357 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
358 {
359 return not(*this == other);
360 }
361
362 private:
363 // non-const to support iterator copy and assignment
364 Idx elements_;
365 Idx stride_;
366 Idx extent_;
367 // modified by the pre/post-increment operator
368 Idx index_;
369 Idx indexElem_ = 0;
370 };
371
372 private:
373 Idx const elements_;
374 Idx const thread_;
375 Idx const stride_;
376 Idx const extent_;
377 };
378
379 } // namespace detail
380
381 /* independentGroupElements
382 */
383
384 template<
385 typename TAcc,
386 typename... TArgs,
387 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
388 ALPAKA_FN_ACC inline auto independentGroupElements(TAcc const& acc, TArgs... args)
389 {
390 using Idx = alpaka::Idx<TAcc>;
391 return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
392 }
393
394 /* independentGroupElementsAlong<Dim>
395 *
396 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupElementsAlong<TAcc,
397 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
398 */
399
400 template<
401 std::size_t Dim,
402 typename TAcc,
403 typename... TArgs,
404 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
405 ALPAKA_FN_ACC inline auto independentGroupElementsAlong(TAcc const& acc, TArgs... args)
406 {
407 using Idx = alpaka::Idx<TAcc>;
408 return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
409 }
410
411 /* independentGroupElementsAlongX, Y, Z
412 *
413 * Like `independentGroupElements` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
414 * dimensions.
415 */
416
417 template<
418 typename TAcc,
419 typename... TArgs,
420 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
421 ALPAKA_FN_ACC inline auto independentGroupElementsAlongX(TAcc const& acc, TArgs... args)
422 {
423 using Idx = alpaka::Idx<TAcc>;
424 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(
425 acc,
426 static_cast<Idx>(args)...);
427 }
428
429 template<
430 typename TAcc,
431 typename... TArgs,
432 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
433 ALPAKA_FN_ACC inline auto independentGroupElementsAlongY(TAcc const& acc, TArgs... args)
434 {
435 using Idx = alpaka::Idx<TAcc>;
436 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(
437 acc,
438 static_cast<Idx>(args)...);
439 }
440
441 template<
442 typename TAcc,
443 typename... TArgs,
444 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
445 ALPAKA_FN_ACC inline auto independentGroupElementsAlongZ(TAcc const& acc, TArgs... args)
446 {
447 using Idx = alpaka::Idx<TAcc>;
448 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(
449 acc,
450 static_cast<Idx>(args)...);
451 }
452
453} // namespace alpaka
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc)
ALPAKA_FN_ACC const_iterator begin() const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc, Idx groups)
ALPAKA_FN_ACC const_iterator end() const
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition Iterator.hpp:139
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition Iterator.hpp:133
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWorkDiv(TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TWorkDiv > >
Get the extent requested.
Definition Traits.hpp:33
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdx(TIdx const &idx, TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TIdx > >
Get the indices requested.
Definition Accessors.hpp:23
ALPAKA_FN_ACC auto independentGroupsAlongZ(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroups(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongZ(TAcc const &acc, TArgs... args)
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
STL namespace.