alpaka
Abstraction Library for Parallel Kernel Acceleration
IndependentElements.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include "alpaka/acc/Traits.hpp"
5 
6 #include <algorithm>
7 #include <ciso646> // workaround for MSVC in c++17 mode - TODO: remove once we move to c++20
8 #include <cstddef>
9 #include <type_traits>
10 
11 namespace alpaka
12 {
13 
14  namespace detail
15  {
16 
17  /* IndependentGroupsAlong
18  *
19  * `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the
20  * group indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If
21  * `groups` is not specified, it defaults to the number of blocks along the `Dim` dimension.
22  *
23  * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
24  * that can infer the accelerator type from the argument.
25  *
26  * In a 1-dimensional kernel, `independentGroups(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc,
27  * 0>(acc, ...)`.
28  *
29  * In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop),
30  * followed by dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop). For convenience
31  * when converting CUDA or HIP code, `independentGroupsAlongX(acc, ...)`, `Y` and `Z` are shorthands for
32  * `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`.
33  *
34  * `independentGroupsAlong<Dim>(acc, ...)` should be called consistently by all the threads in a block. All
35  * threads in a block see the same loop iterations, while threads in different blocks may see a different
36  * number of iterations.
37  * If the work division has more blocks than the required number of groups, the first blocks will perform one
38  * iteration of the loop, while the other blocks will exit the loop immediately.
39  * If the work division has less blocks than the required number of groups, some of the blocks will perform
40  * more than one iteration, in order to cover then whole problem space.
41  *
42  * For example,
43  *
44  * for (auto group: independentGroupsAlong<Dim>(acc, 7))
45  *
46  * will return the group range from 0 to 6, distributed across all blocks in the work division.
47  * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the
48  * other blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from
49  * 0 to 6 will process one group while block 7 will no process any.
50  * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
51  * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0
52  * will process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6,
53  * and block 3 will process group 3.
54  */
55 
56  template<
57  typename TAcc,
58  std::size_t Dim,
59  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
61  {
62  public:
64 
65  ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc)
66  : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
67  , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
68  , extent_{stride_}
69  {
70  }
71 
72  ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups)
73  : first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
74  , stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}
75  , extent_{groups}
76  {
77  }
78 
79  class const_iterator;
81 
83  {
84  return const_iterator(stride_, extent_, first_);
85  }
86 
88  {
89  return const_iterator(stride_, extent_, extent_);
90  }
91 
93  {
94  friend class IndependentGroupsAlong;
95 
96  ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first)
97  : stride_{stride}
98  , extent_{extent}
99  , first_{std::min(first, extent)}
100  {
101  }
102 
103  public:
104  ALPAKA_FN_ACC inline Idx operator*() const
105  {
106  return first_;
107  }
108 
109  // pre-increment the iterator
111  {
112  // increment the first-element-in-block index by the grid stride
113  first_ += stride_;
114  if(first_ < extent_)
115  return *this;
116 
117  // the iterator has reached or passed the end of the extent, clamp it to the extent
118  first_ = extent_;
119  return *this;
120  }
121 
122  // post-increment the iterator
124  {
125  const_iterator old = *this;
126  ++(*this);
127  return old;
128  }
129 
130  ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
131  {
132  return (first_ == other.first_);
133  }
134 
135  ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
136  {
137  return not(*this == other);
138  }
139 
140  private:
141  // non-const to support iterator copy and assignment
142  Idx stride_;
143  Idx extent_;
144  // modified by the pre/post-increment operator
145  Idx first_;
146  };
147 
148  private:
149  Idx const first_;
150  Idx const stride_;
151  Idx const extent_;
152  };
153 
154  } // namespace detail
155 
156  /* independentGroups
157  *
158  * `independentGroups(acc, groups)` returns a one-dimensional iteratable range than spans the group indices from 0
159  * to `groups`. If `groups` is not specified, it defaults to the number of blocks.
160  *
161  * `independentGroups(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, 0>(acc, ...)`.
162  *
163  * `independentGroups(acc, ...)` should be called consistently by all the threads in a block. All threads in a
164  * block see the same loop iterations, while threads in different blocks may see a different number of iterations.
165  * If the work division has more blocks than the required number of groups, the first blocks will perform one
166  * iteration of the loop, while the other blocks will exit the loop immediately.
167  * If the work division has less blocks than the required number of groups, some of the blocks will perform more
168  * than one iteration, in order to cover then whole problem space.
169  *
170  * For example,
171  *
172  * for (auto group: independentGroups(acc, 7))
173  *
174  * will return the group range from 0 to 6, distributed across all blocks in the work division.
175  * If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other
176  * blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6
177  * will process one group while block 7 will no process any.
178  * If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the
179  * loop, in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will
180  * process the groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block
181  * 3 will process group 3.
182  *
183  * Note that `independentGroups(acc, ...)` is only suitable for one-dimensional kernels. For N-dimensional kernels,
184  * use
185  * - `independentGroupsAlong<Dim>(acc, ...)` to perform the iteration explicitly along dimension `Dim`;
186  * - `independentGroupsAlongX(acc, ...)`, `independentGroupsAlongY(acc, ...)`, or `independentGroupsAlongZ(acc,
187  * ...)` to loop along the fastest, second-fastest, or third-fastest dimension.
188  */
189 
190  template<
191  typename TAcc,
192  typename... TArgs,
193  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
194  ALPAKA_FN_ACC inline auto independentGroups(TAcc const& acc, TArgs... args)
195  {
196  using Idx = alpaka::Idx<TAcc>;
197  return detail::IndependentGroupsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
198  }
199 
200  /* independentGroupsAlong<Dim>
201  *
202  * `independentGroupsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupsAlong<TAcc, Dim>(acc, ...)`
203  * that can infer the accelerator type from the argument.
204  */
205 
206  template<
207  std::size_t Dim,
208  typename TAcc,
209  typename... TArgs,
210  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
211  ALPAKA_FN_ACC inline auto independentGroupsAlong(TAcc const& acc, TArgs... args)
212  {
213  using Idx = alpaka::Idx<TAcc>;
214  return detail::IndependentGroupsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
215  }
216 
217  /* independentGroupsAlongX, Y, Z
218  *
219  * Like `independentGroups` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
220  * dimensions.
221  */
222 
223  template<
224  typename TAcc,
225  typename... TArgs,
226  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
227  ALPAKA_FN_ACC inline auto independentGroupsAlongX(TAcc const& acc, TArgs... args)
228  {
229  using Idx = alpaka::Idx<TAcc>;
230  return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 1>(acc, static_cast<Idx>(args)...);
231  }
232 
233  template<
234  typename TAcc,
235  typename... TArgs,
236  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
237  ALPAKA_FN_ACC inline auto independentGroupsAlongY(TAcc const& acc, TArgs... args)
238  {
239  using Idx = alpaka::Idx<TAcc>;
240  return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 2>(acc, static_cast<Idx>(args)...);
241  }
242 
243  template<
244  typename TAcc,
245  typename... TArgs,
246  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
247  ALPAKA_FN_ACC inline auto independentGroupsAlongZ(TAcc const& acc, TArgs... args)
248  {
249  using Idx = alpaka::Idx<TAcc>;
250  return detail::IndependentGroupsAlong<TAcc, alpaka::Dim<TAcc>::value - 3>(acc, static_cast<Idx>(args)...);
251  }
252 
253  namespace detail
254  {
255 
256  /* IndependentGroupElementsAlong
257  *
258  * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `IndependentGroupElementsAlong<TAcc,
259  * Dim>(acc, ...)` that can infer the accelerator type from the argument.
260  */
261 
262  template<
263  typename TAcc,
264  std::size_t Dim,
265  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
267  {
268  public:
270 
272  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
273  , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
274  , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
275  , extent_{stride_}
276  {
277  }
278 
279  ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent)
280  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
281  , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
282  , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
283  , extent_{extent}
284  {
285  }
286 
287  ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent)
288  : elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}
289  , thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first}
290  , stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}
291  , extent_{extent}
292  {
293  }
294 
295  class const_iterator;
297 
299  {
300  return const_iterator(elements_, stride_, extent_, thread_);
301  }
302 
304  {
305  return const_iterator(elements_, stride_, extent_, extent_);
306  }
307 
309  {
311 
312  ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first)
313  : elements_{elements}
314  ,
315  // we need to reduce the stride by on element range because index_ is later increased with each
316  // increment
317  stride_{stride - elements}
318  , extent_{extent}
319  , index_{std::min(first, extent)}
320  {
321  }
322 
323  public:
324  ALPAKA_FN_ACC inline Idx operator*() const
325  {
326  return index_;
327  }
328 
329  // pre-increment the iterator
331  {
332  ++indexElem_;
333  ++index_;
334  if(indexElem_ >= elements_)
335  {
336  indexElem_ = 0;
337  index_ += stride_;
338  }
339  if(index_ >= extent_)
340  index_ = extent_;
341 
342  return *this;
343  }
344 
345  // post-increment the iterator
347  {
348  const_iterator old = *this;
349  ++(*this);
350  return old;
351  }
352 
353  ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const
354  {
355  return (*(*this) == *other);
356  }
357 
358  ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const
359  {
360  return not(*this == other);
361  }
362 
363  private:
364  // non-const to support iterator copy and assignment
365  Idx elements_;
366  Idx stride_;
367  Idx extent_;
368  // modified by the pre/post-increment operator
369  Idx index_;
370  Idx indexElem_ = 0;
371  };
372 
373  private:
374  Idx const elements_;
375  Idx const thread_;
376  Idx const stride_;
377  Idx const extent_;
378  };
379 
380  } // namespace detail
381 
382  /* independentGroupElements
383  */
384 
385  template<
386  typename TAcc,
387  typename... TArgs,
388  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value == 1>>
389  ALPAKA_FN_ACC inline auto independentGroupElements(TAcc const& acc, TArgs... args)
390  {
391  using Idx = alpaka::Idx<TAcc>;
392  return detail::IndependentGroupElementsAlong<TAcc, 0>(acc, static_cast<Idx>(args)...);
393  }
394 
395  /* independentGroupElementsAlong<Dim>
396  *
397  * `independentGroupElementsAlong<Dim>(acc, ...)` is a shorthand for `detail::IndependentGroupElementsAlong<TAcc,
398  * Dim>(acc, ...)` that can infer the accelerator type from the argument.
399  */
400 
401  template<
402  std::size_t Dim,
403  typename TAcc,
404  typename... TArgs,
405  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>>
406  ALPAKA_FN_ACC inline auto independentGroupElementsAlong(TAcc const& acc, TArgs... args)
407  {
408  using Idx = alpaka::Idx<TAcc>;
409  return detail::IndependentGroupElementsAlong<TAcc, Dim>(acc, static_cast<Idx>(args)...);
410  }
411 
412  /* independentGroupElementsAlongX, Y, Z
413  *
414  * Like `independentGroupElements` for N-dimensional kernels, along the fastest, second-fastest, and third-fastest
415  * dimensions.
416  */
417 
418  template<
419  typename TAcc,
420  typename... TArgs,
421  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 0)>>
422  ALPAKA_FN_ACC inline auto independentGroupElementsAlongX(TAcc const& acc, TArgs... args)
423  {
424  using Idx = alpaka::Idx<TAcc>;
426  acc,
427  static_cast<Idx>(args)...);
428  }
429 
430  template<
431  typename TAcc,
432  typename... TArgs,
433  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 1)>>
434  ALPAKA_FN_ACC inline auto independentGroupElementsAlongY(TAcc const& acc, TArgs... args)
435  {
436  using Idx = alpaka::Idx<TAcc>;
438  acc,
439  static_cast<Idx>(args)...);
440  }
441 
442  template<
443  typename TAcc,
444  typename... TArgs,
445  typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and (alpaka::Dim<TAcc>::value > 2)>>
446  ALPAKA_FN_ACC inline auto independentGroupElementsAlongZ(TAcc const& acc, TArgs... args)
447  {
448  using Idx = alpaka::Idx<TAcc>;
450  acc,
451  static_cast<Idx>(args)...);
452  }
453 
454 } // namespace alpaka
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC const_iterator begin() const
ALPAKA_FN_ACC IndependentGroupElementsAlong(TAcc const &acc, Idx extent)
ALPAKA_FN_ACC IndependentGroupElementsAlong(TAcc const &acc)
ALPAKA_FN_ACC IndependentGroupElementsAlong(TAcc const &acc, Idx first, Idx extent)
ALPAKA_FN_ACC const_iterator end() const
ALPAKA_FN_ACC bool operator==(const_iterator const &other) const
ALPAKA_FN_ACC bool operator!=(const_iterator const &other) const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc)
ALPAKA_FN_ACC const_iterator begin() const
ALPAKA_FN_ACC IndependentGroupsAlong(TAcc const &acc, Idx groups)
ALPAKA_FN_ACC const_iterator end() const
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition: Common.hpp:38
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_FN_ACC auto independentGroupsAlongZ(TAcc const &acc, TArgs... args)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWorkDiv(TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TWorkDiv >>
Get the extent requested.
Definition: Traits.hpp:33
ALPAKA_FN_ACC auto independentGroups(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongX(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlongY(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupsAlong(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto independentGroupElementsAlongZ(TAcc const &acc, TArgs... args)
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdx(TIdx const &idx, TWorkDiv const &workDiv) -> Vec< Dim< TWorkDiv >, Idx< TIdx >>
Get the indices requested.
Definition: Accessors.hpp:23
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19