alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
IndependentElements.hpp
Go to the documentation of this file.
1#pragma once
2
5
6#include <algorithm>
7#include <ciso646> // workaround for MSVC in c++17 mode - TODO: remove once we move to c++20
8#include <cstddef>
9#include <type_traits>
10
11namespace alpaka
12{
13
14 namespace detail
15 {
16
17 /* IndependentGroupsAlong
18 *
19 * `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the
20 * group indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If
21 * `groups` is not specified, it defaults to the number of blocks along the `Dim` dimension.
22 *
23 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
24 * that can infer the accelerator type from the argument.
25 *
26 * In a 1-dimensional kernel, `independentGroups(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc,
27 * 0>(acc, ...)`.
28 *
29 * In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop),
30 * followed by dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop). For convenience
31 * when converting CUDA or HIP code, `independentGroupsAlongX(acc, ...)`, `Y` and `Z` are shorthands for
32 * `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`.
33 *
34 * `independentGroupsAlong<Dim>(acc, ...)` should be called consistently by all the threads in a block. All
35 * threads in a block see the same loop iterations, while threads in different blocks may see a different
36 * number of iterations.
37 * If the work division has more blocks than the required number of groups, the first blocks will perform one
38 * iteration of the loop, while the other blocks will exit the loop immediately.
39 * If the work division has less blocks than the required number of groups, some of the blocks will perform
40 * more than one iteration, in order to cover then whole problem space.
41 *
42 * For example,
43 *
44 * for (auto group: independentGroupsAlong<Dim>(acc, 7))
45 *
46 * will return the group range from 0 to 6, distributed across all blocks in the work division.
47 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the
48 * other blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from
49 * 0 to 6 will process one group while block 7 will no process any.
50 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
51 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0
52 * will process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6,
53 * and block 3 will process group 3.
54 */
55
56 template<
57 typename TAcc,
58 std::size_t Dim,
59 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
61 {
62 public:
64
65 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
66 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
67 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
68 , extent_{stride_}
69 {
70 }
71
72 ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
73 : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
74 , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
75 , extent_{groups}
76 {
77 }
78
79 class const_iterator;
81
83 {
84 return const_iterator(stride_, extent_, first_);
85 }
86
88 {
89 return const_iterator(stride_, extent_, extent_);
90 }
91
93 {
95
96 ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
97 : stride_{stride}
98 , extent_{extent}
99 , first_{std::min(first, extent)}
100 {
101 }
102
103 public:
105 {
106 return first_;
107 }
108
109 // pre-increment the iterator
111 {
112 // increment the first-element-in-block index by the grid stride
113 first_ += stride_;
114 if(first_ < extent_)
115 return *this;
116
117 // the iterator has reached or passed the end of the extent, clamp it to the extent
118 first_ = extent_;
119 return *this;
120 }
121
122 // post-increment the iterator
124 {
125 const_iterator old = *this;
126 ++(*this);
127 return old;
128 }
129
130 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
131 {
132 return (first_ == other.first_);
133 }
134
135 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
136 {
137 return not(*this == other);
138 }
139
140 private:
141 // non-const to support iterator copy and assignment
142 Idx stride_;
143 Idx extent_;
144 // modified by the pre/post-increment operator
145 Idx first_;
146 };
147
148 private:
149 Idx const first_;
150 Idx const stride_;
151 Idx const extent_;
152 };
153
154 } // namespace detail
155
156 /* independentGroups
157 *
158 * `independentGroups(acc, groups)` returns a one-dimensional iteratable range than spans the group indices from 0
159 * to `groups`. If `groups` is not specified, it defaults to the number of blocks.
160 *
161 * `independentGroups(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, 0>(acc, ...)`.
162 *
163 * `independentGroups(acc, ...)` should be called consistently by all the threads in a block. All threads in a
164 * block see the same loop iterations, while threads in different blocks may see a different number of iterations.
165 * If the work division has more blocks than the required number of groups, the first blocks will perform one
166 * iteration of the loop, while the other blocks will exit the loop immediately.
167 * If the work division has less blocks than the required number of groups, some of the blocks will perform more
168 * than one iteration, in order to cover then whole problem space.
169 *
170 * For example,
171 *
172 * for (auto group: independentGroups(acc, 7))
173 *
174 * will return the group range from 0 to 6, distributed across all blocks in the work division.
175 * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other
176 * blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6
177 * will process one group while block 7 will no process any.
178 * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
179 * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will
180 * process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block
181 * 3 will process group 3.
182 *
183 * Note that `independentGroups(acc, ...)` is only suitable for one-dimensional kernels. For N-dimensional kernels,
184 * use
185 * - `independentGroupsAlong<Dim>(acc, ...)` to perform the iteration explicitly along dimension `Dim`;
186 * - `independentGroupsAlongX(acc, ...)`, `independentGroupsAlongY(acc, ...)`, or `independentGroupsAlongZ(acc,
187 * ...)` to loop along the fastest, second-fastest, or third-fastest dimension.
188 */
189
190 template<
191 typename TAcc,
192 typename... TArgs,
193 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
194 ALPAKA_FN_ACC inline auto independentGroups(TAcc const& acc, TArgs... args)
195 {
196 using Idx = alpaka::Idx<TAcc>;
197 return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
198 }
199
200 /* independentGroupsAlong<Dim>
201 *
202 * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
203 * that can infer the accelerator type from the argument.
204 */
205
206 template<
207 std::size_t Dim,
208 typename TAcc,
209 typename... TArgs,
210 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
211 ALPAKA_FN_ACC inline auto independentGroupsAlong(TAcc const& acc, TArgs... args)
212 {
213 using Idx = alpaka::Idx<TAcc>;
214 return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
215 }
216
217 /* independentGroupsAlongX, Y, Z
218 *
219 * Like `independentGroups` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
220 * dimensions.
221 */
222
223 template<
224 typename TAcc,
225 typename... TArgs,
226 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
227 ALPAKA_FN_ACC inline auto independentGroupsAlongX(TAcc const& acc, TArgs... args)
228 {
229 using Idx = alpaka::Idx<TAcc>;
230 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
231 }
232
233 template<
234 typename TAcc,
235 typename... TArgs,
236 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
237 ALPAKA_FN_ACC inline auto independentGroupsAlongY(TAcc const& acc, TArgs... args)
238 {
239 using Idx = alpaka::Idx<TAcc>;
240 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
241 }
242
243 template<
244 typename TAcc,
245 typename... TArgs,
246 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
247 ALPAKA_FN_ACC inline auto independentGroupsAlongZ(TAcc const& acc, TArgs... args)
248 {
249 using Idx = alpaka::Idx<TAcc>;
250 return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
251 }
252
253 namespace detail
254 {
255
256 /* IndependentGroupElementsAlong
257 *
258 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupElementsAlong<TAcc,
259 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
260 */
261
262 template<
263 typename TAcc,
264 std::size_t Dim,
265 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
266 class IndependentGroupElementsAlong
267 {
268 public:
269 using Idx = alpaka::Idx<TAcc>;
270
271 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc)
272 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
273 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
274 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
275 , extent_{stride_}
276 {
277 }
278
279 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
280 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
281 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
282 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
283 , extent_{extent}
284 {
285 }
286
287 ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
288 : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
289 , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first}
290 , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
291 , extent_{extent}
292 {
293 }
294
295 class const_iterator;
296 using iterator = const_iterator;
297
298 ALPAKA_FN_ACC inline const_iterator begin() const
299 {
300 return const_iterator(elements_, stride_, extent_, thread_);
301 }
302
303 ALPAKA_FN_ACC inline const_iterator end() const
304 {
305 return const_iterator(elements_, stride_, extent_, extent_);
306 }
307
309 {
311
312 ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
313 : elements_{elements}
314 ,
315 // we need to reduce the stride by on element range because index_ is later increased with each
316 // increment
317 stride_{stride - elements}
318 , extent_{extent}
319 , index_{std::min(first, extent)}
320 {
321 }
322
323 public:
324 ALPAKA_FN_ACC inline Idx operator*() const
325 {
326 return index_;
327 }
328
329 // pre-increment the iterator
331 {
332 ++indexElem_;
333 ++index_;
334 if(indexElem_ >= elements_)
335 {
336 indexElem_ = 0;
337 index_ += stride_;
338 }
339 if(index_ >= extent_)
340 index_ = extent_;
341
342 return *this;
343 }
344
345 // post-increment the iterator
347 {
348 const_iterator old = *this;
349 ++(*this);
350 return old;
351 }
352
353 ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
354 {
355 return (*(*this) == *other);
356 }
357
358 ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
359 {
360 return not(*this == other);
361 }
362
363 private:
364 // non-const to support iterator copy and assignment
365 Idx elements_;
366 Idx stride_;
367 Idx extent_;
368 // modified by the pre/post-increment operator
369 Idx index_;
370 Idx indexElem_ = 0;
371 };
372
373 private:
374 Idx const elements_;
375 Idx const thread_;
376 Idx const stride_;
377 Idx const extent_;
378 };
379
380 } // namespace detail
381
382 /* independentGroupElements
383 */
384
385 template<
386 typename TAcc,
387 typename... TArgs,
388 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
389 ALPAKA_FN_ACC inline auto independentGroupElements(TAcc const& acc, TArgs... args)
390 {
391 using Idx = alpaka::Idx<TAcc>;
392 return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
393 }
394
395 /* independentGroupElementsAlong<Dim>
396 *
397 * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupElementsAlong<TAcc,
398 * Dim>(acc, ...)` that can infer the accelerator type from the argument.
399 */
400
401 template<
402 std::size_t Dim,
403 typename TAcc,
404 typename... TArgs,
405 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
406 ALPAKA_FN_ACC inline auto independentGroupElementsAlong(TAcc const& acc, TArgs... args)
407 {
408 using Idx = alpaka::Idx<TAcc>;
409 return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
410 }
411
412 /* independentGroupElementsAlongX, Y, Z
413 *
414 * Like `independentGroupElements` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
415 * dimensions.
416 */
417
418 template<
419 typename TAcc,
420 typename... TArgs,
421 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
422 ALPAKA_FN_ACC inline auto independentGroupElementsAlongX(TAcc const& acc, TArgs... args)
423 {
424 using Idx = alpaka::Idx<TAcc>;
425 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(
426 acc,
427 static_cast<Idx>(args)...);
428 }
429
430 template<
431 typename TAcc,
432 typename... TArgs,
433 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
434 ALPAKA_FN_ACC inline auto independentGroupElementsAlongY(TAcc const& acc, TArgs... args)
435 {
436 using Idx = alpaka::Idx<TAcc>;
437 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(
438 acc,
439 static_cast<Idx>(args)...);
440 }
441
442 template<
443 typename TAcc,
444 typename... TArgs,
445 typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
446 ALPAKA_FN_ACC inline auto independentGroupElementsAlongZ(TAcc const& acc, TArgs... args)
447 {
448 using Idx = alpaka::Idx<TAcc>;
449 return detail::IndependentGroupElementsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(
450 acc,
451 static_cast<Idx>(args)...);
452 }
453
454} // namespace alpaka
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc)
ALPAKA_FN_ACC const_iterator begin() const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc, Idx groups)
ALPAKA_FN_ACC const_iterator end() const
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
Definition Iterator.hpp:139
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
Definition Iterator.hpp:133
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWorkDiv(TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TWorkDiv > >
Get the extent requested.
Definition Traits.hpp:33
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdx(TIdx const &idx, TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TIdx > >
Get the indices requested.
Definition Accessors.hpp:23
ALPAKA_FN_ACC auto independentGroupsAlongZ(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroups(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongZ(TAcc const &acc, TArgs... args)
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
STL namespace.