alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Fill.hpp
Go to the documentation of this file.
1/* Copyright 2025 Maria Michailidi, Anna Polova, Abdulrahman Al Marzouqi
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10#include "alpaka/core/Hip.hpp"
11#include "alpaka/dev/Traits.hpp"
22
23#include <iostream>
24#include <type_traits>
25
26#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
27
28namespace alpaka
29{
30 template<typename TApi>
31 class DevUniformCudaHipRt;
32
33 namespace detail
34 {
35 template<typename TElem, typename TExtent, typename TPitchBytes>
37 {
38 template<typename TAcc>
40 TAcc const& acc,
41 TElem* ptr,
42 TElem value,
43 TExtent extent,
44 TPitchBytes pitchBytes) const
45 {
46 for(auto const& idx : alpaka::uniformElementsND(acc, extent))
47 {
48 // The host code checks that the pitches are a multiple of TElem's alignment.
49 std::uintptr_t offsetBytes = static_cast<std::uintptr_t>((pitchBytes * idx).sum());
50 TElem* elem = reinterpret_cast<TElem*>(
51 __builtin_assume_aligned(reinterpret_cast<std::uint8_t*>(ptr) + offsetBytes, alignof(TElem)));
52
53 // Write value at element address
54 *elem = value;
55 }
56 }
57 };
58
59 template<typename TElem>
61 {
62 template<typename TAcc>
63 ALPAKA_FN_ACC void operator()([[maybe_unused]] TAcc const& acc, TElem* ptr, TElem value) const
64 {
65 // A zero-dimensional buffer always has a single element.
66 *ptr = value;
67 }
68 };
69
70
71 } // namespace detail
72
73 namespace trait
74 {
75 template<typename TDim, typename TApi>
77 {
78 template<typename TExtent, typename TViewFwd, typename TValue>
79 ALPAKA_FN_HOST static auto createTaskFill(TViewFwd&& view, TValue const& value, TExtent const& extent)
80 {
81 using View = std::remove_reference_t<TViewFwd>;
82 using Idx = alpaka::Idx<View>;
87 static_assert(
88 std::is_trivially_copyable_v<Elem>,
89 "Only trivially copyable types are supported for fill");
90
91 if constexpr(TDim::value == 0)
92 {
93 // A zero-dimensional buffer always has a single element.
94 WorkDiv grid{Vec{}, Vec{}, Vec{}};
95 return alpaka::createTaskKernel<Acc>(
96 grid,
98 std::data(view),
99 value);
100 }
101 else
102 {
103 // TODO: compute an efficient work division.
104 Vec const elements = Vec::ones();
105 Vec threads = Vec::ones();
106 threads.x() = 64;
107 Vec const blocks = Vec::ones();
108 WorkDiv grid = WorkDiv(blocks, threads, elements);
109
110 // Check that the pitches are a multiple of Elem's alignment.
111 auto pitches = getPitchesInBytes(view);
112 for([[maybe_unused]] auto pitch : pitches)
113 {
114 ALPAKA_ASSERT(static_cast<std::size_t>(pitch) % alignof(Elem) == 0);
115 }
116 return alpaka::createTaskKernel<Acc>(
117 grid,
119 std::data(view),
120 value,
121 extent,
122 pitches);
123 }
124 }
125 };
126 } // namespace trait
127} // namespace alpaka
128
129#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
The CUDA/HIP RT device handle.
A n-dimensional vector.
Definition Vec.hpp:38
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition Vec.hpp:106
A basic class holding the work division as grid block extent, block thread and thread element extent.
#define ALPAKA_FN_ACC
All functions that can be used on an accelerator have to be attributed with ALPAKA_FN_ACC or ALPAKA_F...
Definition Common.hpp:38
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition Traits.hpp:29
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:199
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_FN_ACC auto uniformElementsND(TAcc const &acc)
typename trait::AccType< T >::type Acc
The accelerator type trait alias template to remove the ::type.
Definition Traits.hpp:78
ALPAKA_FN_ACC void operator()(TAcc const &acc, TElem *ptr, TElem value) const
Definition Fill.hpp:63
ALPAKA_FN_ACC void operator()(TAcc const &acc, TElem *ptr, TElem value, TExtent extent, TPitchBytes pitchBytes) const
Definition Fill.hpp:39
static ALPAKA_FN_HOST auto createTaskFill(TViewFwd &&view, TValue const &value, TExtent const &extent)
Definition Fill.hpp:79