alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
SetKernel.hpp
Go to the documentation of this file.
1/* Copyright 2022 Jeffrey Kelling, Bernhard Manfred Gruber
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
11#include "alpaka/meta/Fold.hpp"
12
13namespace alpaka
14{
15 //! any device ND memory set kernel.
17 {
18 public:
19 //! The kernel entry point.
20 //!
21 //! All but the last element of threadElemExtent must be one.
22 //!
23 //! \tparam TAcc The accelerator environment to be executed on.
24 //! \tparam TExtent extent type.
25 //! \param acc The accelerator to be executed on.
26 //! \param val value to set.
27 //! \param dst target mem ptr.
28 //! \param extent area to set.
30 template<typename TAcc, typename TExtent, typename TPitch>
32 TAcc const& acc,
33 std::uint8_t const val,
34 std::uint8_t* dst,
35 TExtent extent,
36 TPitch pitch) const -> void
37 {
39 auto const gridThreadIdx(alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc));
40 auto const threadElemExtent(alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc));
41 auto const idxThreadFirstElem = getIdxThreadFirstElem(acc, gridThreadIdx, threadElemExtent);
42 auto idx = mapIdxPitchBytes<1u, Dim<TAcc>::value>(idxThreadFirstElem, pitch)[0];
43 constexpr auto lastDim = Dim<TAcc>::value - 1;
44 auto const lastIdx = idx
45 + std::min(
46 threadElemExtent[lastDim],
47 static_cast<Idx>(extent[lastDim] - idxThreadFirstElem[lastDim]));
48
49 if((idxThreadFirstElem < extent).foldrAll(std::logical_and<bool>()))
50 {
51 for(; idx < lastIdx; ++idx)
52 {
53 *(dst + idx) = val;
54 }
55 }
56 }
57 };
58} // namespace alpaka
any device ND memory set kernel.
Definition SetKernel.hpp:17
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto operator()(TAcc const &acc, std::uint8_t const val, std::uint8_t *dst, TExtent extent, TPitch pitch) const -> void
The kernel entry point.
Definition SetKernel.hpp:31
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
#define ALPAKA_NO_HOST_ACC_WARNING
Disable nvcc warning: 'calling a host function from host device function.' Usage: ALPAKA_NO_HOST_ACC_...
Definition Common.hpp:82
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getIdxThreadFirstElem(TIdxWorkDiv const &idxWorkDiv, TGridThreadIdx const &gridThreadIdx, TThreadElemExtent const &threadElemExtent) -> Vec< Dim< TIdxWorkDiv >, Idx< TIdxWorkDiv > >
Get the index of the first element this thread computes.
Definition Accessors.hpp:89
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
The idx type trait.
Definition Traits.hpp:25