alpaka
Abstraction Library for Parallel Kernel Acceleration
SetKernel.hpp
Go to the documentation of this file.
1 /* Copyright 2022 Jeffrey Kelling, Bernhard Manfred Gruber
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
8 #include "alpaka/idx/MapIdx.hpp"
9 #include "alpaka/idx/Traits.hpp"
11 #include "alpaka/meta/Fold.hpp"
12 
13 namespace alpaka
14 {
15  //! any device ND memory set kernel.
17  {
18  public:
19  //! The kernel entry point.
20  //!
21  //! All but the last element of threadElemExtent must be one.
22  //!
23  //! \tparam TAcc The accelerator environment to be executed on.
24  //! \tparam TExtent extent type.
25  //! \param acc The accelerator to be executed on.
26  //! \param val value to set.
27  //! \param dst target mem ptr.
28  //! \param extent area to set.
30  template<typename TAcc, typename TExtent, typename TPitch>
32  TAcc const& acc,
33  std::uint8_t const val,
34  std::uint8_t* dst,
35  TExtent extent,
36  TPitch pitch) const -> void
37  {
39  auto const gridThreadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc));
40  auto const threadElemExtent(alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc));
41  auto const idxThreadFirstElem = getIdxThreadFirstElem(acc, gridThreadIdx, threadElemExtent);
42  auto idx = mapIdxPitchBytes<1u, Dim<TAcc>::value>(idxThreadFirstElem, pitch)[0];
43  constexpr auto lastDim = Dim<TAcc>::value - 1;
44  auto const lastIdx = idx
45  + std::min(
46  threadElemExtent[lastDim],
47  static_cast<Idx>(extent[lastDim] - idxThreadFirstElem[lastDim]));
48 
49  if((idxThreadFirstElem < extent).foldrAll(std::logical_and<bool>()))
50  {
51  for(; idx < lastIdx; ++idx)
52  {
53  *(dst + idx) = val;
54  }
55  }
56  }
57  };
58 } // namespace alpaka
any device ND memory set kernel.
Definition: SetKernel.hpp:17
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto operator()(TAcc const &acc, std::uint8_t const val, std::uint8_t *dst, TExtent extent, TPitch pitch) const -> void
The kernel entry point.
Definition: SetKernel.hpp:31
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition: Common.hpp:38
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition: Common.hpp:82
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
Definition: Traits.hpp:1280
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdxThreadFirstElem([[maybe_unused]] TIdxWorkDiv const &idxWorkDiv, TGridThreadIdx const &gridThreadIdx, TThreadElemExtent const &threadElemExtent) -> Vec< Dim< TIdxWorkDiv >, Idx< TIdxWorkDiv >>
Get the index of the first element this thread computes.
Definition: Accessors.hpp:89
The idx type trait.
Definition: Traits.hpp:25