alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Transform.hpp
Go to the documentation of this file.
1/* Copyright 2025 Andrea Bocci, Simeon Ehrig
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
7#include "alpaka/acc/Tag.hpp"
10#include "alpaka/dim/Traits.hpp"
14#include "alpaka/idx/Traits.hpp"
17#include "alpaka/vec/Vec.hpp"
19
20#include <iterator>
21#include <type_traits>
22
23namespace alpaka
24{
25
26 namespace detail
27 {
28
29 template<typename TFn>
31 {
32 TFn fn;
33
34 template<typename TAcc, typename T>
35 ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* in_ptr, T* out_ptr, alpaka::Idx<TAcc> size) const
36 {
37 static_assert(std::is_invocable_r_v<T, TFn, T> or std::is_invocable_r_v<T, TFn, TAcc const&, T>);
38
39 static_assert(alpaka::Dim<TAcc>::value == 1u);
40 using Idx = alpaka::Idx<TAcc>;
41
42 for(Idx i : alpaka::uniformElements(acc, size))
43 {
44 if constexpr(std::is_invocable_r_v<T, TFn, T>)
45 {
46 // std::is_invocable_r_v<T, TFn, T>
47 out_ptr[i] = fn(in_ptr[i]);
48 }
49 else
50 {
51 // std::is_invocable_r_v<T, TFn, TAcc const&, T>
52 out_ptr[i] = fn(acc, in_ptr[i]);
53 }
54 }
55 }
56 };
57
58 template<typename TFn>
60 {
61 TFn fn;
62
63 template<typename TAcc, typename T>
65 TAcc const& acc,
66 T const* in_ptr,
68 T* out_ptr,
71 {
72 static_assert(std::is_invocable_r_v<T, TFn, T> or std::is_invocable_r_v<T, TFn, TAcc const&, T>);
73
74 using Dim = alpaka::Dim<TAcc>;
75 using Idx = alpaka::Idx<TAcc>;
77
78 for(Vec idx : alpaka::uniformElementsND(acc, in_size))
79 {
80 auto p_in = reinterpret_cast<T const*>(
81 reinterpret_cast<uintptr_t>(in_ptr) + static_cast<uintptr_t>((idx * in_pithces).sum()));
82 auto p_out = reinterpret_cast<T*>(
83 reinterpret_cast<uintptr_t>(out_ptr) + static_cast<uintptr_t>((idx * out_pitches).sum()));
84 if constexpr(std::is_invocable_r_v<T, TFn, T>)
85 {
86 // std::is_invocable_r_v<T, TFn, T>
87 *p_out = fn(*p_in);
88 }
89 else
90 {
91 // std::is_invocable_r_v<T, TFn, TAcc const&, T>
92 *p_out = fn(acc, *p_in);
93 }
94 }
95 }
96 };
97
98 } // namespace detail
99
100 /*
101 * Applies asynchronously the given function `fn` to the elements of the input range starting at `in`,
102 * and stores the result in the semi-open output range [`out_begin`,`out_end`), using the accelerator
103 * back-end identified by `Tag`.
104 */
105 template<alpaka::concepts::Tag TTag, typename TQueue, typename T, typename TFn>
106 void transform(TQueue& queue, T* out_begin, T* out_end, TFn&& fn, T* in)
107 {
108 using Idx = typename std::iterator_traits<T*>::difference_type;
110
111 static_assert(
112 std::is_invocable_r_v<T, TFn, T> or std::is_invocable_r_v<T, TFn, Acc1D const&, T>,
113 "TFn must accept either one argument (of type T) or two arguments (an accelerator and an argument of type "
114 "T), and return a value of type T.");
115
116 Idx size = std::distance(out_begin, out_end);
118
119 // Find a valid work division. This could be further optimised.
120 auto const config
122 auto const grid = alpaka::getValidWorkDiv(config, alpaka::getDev(queue), kernel, in, out_begin, size);
123
124 // Apply the fn function to all elements of the input range.
125 alpaka::exec<Acc1D>(queue, grid, kernel, in, out_begin, size);
126 }
127
128 /*
129 * Applies asynchronously the given function `fn` to the elements of the input buffer `in`,
130 * and stores the result in the corresponding elements of the output buffer `out`,
131 * using the accelerator back-end identified by `Tag`.
132 */
133 template<alpaka::concepts::Tag TTag, typename TQueue, typename TBuf, typename TFn, typename TConstBuf>
134 void transform(TQueue& queue, TBuf& out, TFn&& fn, TConstBuf const& in)
135 {
136 // Check that the input and output buffers have compatible types.
138 static_assert(
139 std::is_same_v<alpaka::Idx<TBuf>, Idx>,
140 "The input and output buffers must have the same index type.");
142 static_assert(
143 std::is_same_v<alpaka::Dim<TBuf>, Dim>,
144 "The input and output buffers must have the same dimension.");
145 using In = std::remove_const_t<alpaka::Elem<TConstBuf>>;
146 using Out = alpaka::Elem<TBuf>;
149
150 static_assert(
151 std::is_invocable_r_v<Out, TFn, In const> or std::is_invocable_r_v<Out, TFn, Acc const&, In const>,
152 "TFn must accept either one argument (of the buffer's element type) or two arguments (an accelerator and "
153 "the element type), and return a value of the buffer's element type.");
154
155 // Check that the input and output buffers have the same size.
156 Vec size = alpaka::getExtents(in);
157 assert(alpaka::getExtents(out) == size and "The input and output buffers must have the same extents.");
158
159 // Pass details of the input and output buffers to the kernel:
160 // - address of the first elements
161 // - pitches (in bytes) along all dimensions
162 // - number of elements along all dimensions
164
165 // Find a valid work division. This could be further optimised.
166 auto const config = alpaka::KernelCfg<Acc>{
167 size,
168 Vec::ones(),
169 false,
171 auto const grid = alpaka::getValidWorkDiv(
172 config,
173 alpaka::getDev(queue),
174 kernel,
175 in.data(),
177 out.data(),
179 size);
180
181 // Apply the fn function to all elements of the input buffer.
182 alpaka::exec<Acc>(
183 queue,
184 grid,
185 kernel,
186 in.data(),
188 out.data(),
190 size);
191 }
192
193} // namespace alpaka
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_FN_HOST auto getValidWorkDiv(KernelCfg< TAcc, TGridElemExtent, TThreadElemExtent > const &kernelCfg, TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> WorkDivMembers< Dim< TAcc >, Idx< TAcc > >
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:225
ALPAKA_FN_ACC auto uniformElements(TAcc const &acc, TArgs... args)
ALPAKA_FN_ACC auto uniformElementsND(TAcc const &acc)
@ Unrestricted
The block thread extent will not have any restrictions.
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T > >
Definition Traits.hpp:59
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
void transform(TQueue &queue, T *out_begin, T *out_end, TFn &&fn, T *in)
typename trait::AccType< T >::type Acc
The accelerator type trait alias template to remove the ::type.
Definition Traits.hpp:83
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition Traits.hpp:19
typename trait::TagToAcc< TTag, TDim, TIdx >::type TagToAcc
maps a tag type to an acc type
Definition Tag.hpp:74
Kernel start configuration to determine a valid work division.
ALPAKA_FN_ACC void operator()(TAcc const &acc, T const *in_ptr, alpaka::Vec< alpaka::Dim< TAcc >, alpaka::Idx< TAcc > > in_pithces, T *out_ptr, alpaka::Vec< alpaka::Dim< TAcc >, alpaka::Idx< TAcc > > out_pitches, alpaka::Vec< alpaka::Dim< TAcc >, alpaka::Idx< TAcc > > in_size) const
Definition Transform.hpp:64
ALPAKA_FN_ACC void operator()(TAcc const &acc, T const *in_ptr, T *out_ptr, alpaka::Idx< TAcc > size) const
Definition Transform.hpp:35