alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
Set.hpp
Go to the documentation of this file.
1/* Copyright 2023 Benjamin Worpitz, Erik Zenker, Matthias Werner, René Widera, Andrea Bocci, Bernhard Manfred Gruber,
2 * Antonio Di Pilato, Jan Stephan
3 * SPDX-License-Identifier: MPL-2.0
4 */
5
6#pragma once
7
10#include "alpaka/core/Hip.hpp"
11#include "alpaka/dev/Traits.hpp"
19
20#include <cstddef>
21
22#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
23
24namespace alpaka
25{
26 template<typename TApi>
27 class DevUniformCudaHipRt;
28
29 namespace detail
30 {
31 //! The CUDA/HIP memory set task base.
32 template<typename TApi, typename TDim, typename TView, typename TExtent>
34 {
35 TaskSetUniformCudaHipBase(TView& view, std::uint8_t const& byte, TExtent const& extent)
36 : m_view(view)
37 , m_byte(byte)
38 , m_extent(extent)
40 {
41 }
42
43 protected:
44 TView& m_view;
45 std::uint8_t const m_byte;
46 TExtent const m_extent;
47 std::int32_t const m_iDevice;
48 };
49
50 //! The CUDA/HIP memory set task.
51 template<typename TApi, typename TDim, typename TView, typename TExtent>
53
54 //! The scalar CUDA/HIP memory set task.
55 template<typename TApi, typename TView, typename TExtent>
56 struct TaskSetUniformCudaHip<TApi, DimInt<0u>, TView, TExtent>
57 : public TaskSetUniformCudaHipBase<TApi, DimInt<0u>, TView, TExtent>
58 {
59 template<typename TViewFwd>
60 TaskSetUniformCudaHip(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
61 : TaskSetUniformCudaHipBase<TApi, DimInt<0u>, TView, TExtent>(
62 std::forward<TViewFwd>(view),
63 byte,
64 extent)
65 {
66 }
67
68 template<typename TQueue>
69 auto enqueue(TQueue& queue) const -> void
70 {
71 // Initiate the memory set.
72 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memsetAsync(
73 getPtrNative(this->m_view),
74 static_cast<int>(this->m_byte),
75 sizeof(Elem<TView>),
76 queue.getNativeHandle()));
77 }
78 };
79
80 //! The 1D CUDA/HIP memory set task.
81 template<typename TApi, typename TView, typename TExtent>
82 struct TaskSetUniformCudaHip<TApi, DimInt<1u>, TView, TExtent>
83 : public TaskSetUniformCudaHipBase<TApi, DimInt<1u>, TView, TExtent>
84 {
85 template<typename TViewFwd>
86 TaskSetUniformCudaHip(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
87 : TaskSetUniformCudaHipBase<TApi, DimInt<1u>, TView, TExtent>(
88 std::forward<TViewFwd>(view),
89 byte,
90 extent)
91 {
92 }
93
94 template<typename TQueue>
95 auto enqueue(TQueue& queue) const -> void
96 {
97 auto& view = this->m_view;
98 auto const& extent = this->m_extent;
99
100 auto const extentWidth = getWidth(extent);
101 ALPAKA_ASSERT(extentWidth <= getWidth(view));
102
103 if(extentWidth == 0)
104 {
105 return;
106 }
107
108 // Initiate the memory set.
109 auto const extentWidthBytes = static_cast<std::size_t>(extentWidth) * sizeof(Elem<TView>);
110 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memsetAsync(
111 getPtrNative(view),
112 static_cast<int>(this->m_byte),
113 extentWidthBytes,
114 queue.getNativeHandle()));
115 }
116 };
117
118 //! The 2D CUDA/HIP memory set task.
119 template<typename TApi, typename TView, typename TExtent>
120 struct TaskSetUniformCudaHip<TApi, DimInt<2u>, TView, TExtent>
121 : public TaskSetUniformCudaHipBase<TApi, DimInt<2u>, TView, TExtent>
122 {
123 template<typename TViewFwd>
124 TaskSetUniformCudaHip(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
125 : TaskSetUniformCudaHipBase<TApi, DimInt<2u>, TView, TExtent>(
126 std::forward<TViewFwd>(view),
127 byte,
128 extent)
129 {
130 }
131
132 template<typename TQueue>
133 auto enqueue(TQueue& queue) const -> void
134 {
135 auto& view = this->m_view;
136 auto const& extent = this->m_extent;
137
138 auto const extentWidth = getWidth(extent);
139 auto const extentHeight = getHeight(extent);
140
141 if(extentWidth == 0 || extentHeight == 0)
142 {
143 return;
144 }
145
146 auto const extentWidthBytes = static_cast<std::size_t>(extentWidth) * sizeof(Elem<TView>);
147
148# if !defined(NDEBUG)
149 auto const dstWidth = getWidth(view);
150 auto const dstHeight = getHeight(view);
151# endif
152 auto const dstRowPitchBytes = static_cast<std::size_t>(getPitchesInBytes(view)[0]);
153 auto const dstNativePtr = reinterpret_cast<void*>(getPtrNative(view));
154 ALPAKA_ASSERT(extentWidth <= dstWidth);
155 ALPAKA_ASSERT(extentHeight <= dstHeight);
156
157 // Initiate the memory set.
158 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memset2DAsync(
159 dstNativePtr,
160 dstRowPitchBytes,
161 static_cast<int>(this->m_byte),
162 extentWidthBytes,
163 static_cast<std::size_t>(extentHeight),
164 queue.getNativeHandle()));
165 }
166 };
167
168 //! The 3D CUDA/HIP memory set task.
169 template<typename TApi, typename TView, typename TExtent>
170 struct TaskSetUniformCudaHip<TApi, DimInt<3u>, TView, TExtent>
171 : public TaskSetUniformCudaHipBase<TApi, DimInt<3u>, TView, TExtent>
172 {
173 template<typename TViewFwd>
174 TaskSetUniformCudaHip(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
175 : TaskSetUniformCudaHipBase<TApi, DimInt<3u>, TView, TExtent>(
176 std::forward<TViewFwd>(view),
177 byte,
178 extent)
179 {
180 }
181
182 template<typename TQueue>
183 auto enqueue(TQueue& queue) const -> void
184 {
186
187 auto& view = this->m_view;
188 auto const& extent = this->m_extent;
189
190 auto const extentWidth = getWidth(extent);
191 auto const extentHeight = getHeight(extent);
192 auto const extentDepth = getDepth(extent);
193
194 // This is not only an optimization but also prevents a division by zero.
195 if(extentWidth == 0 || extentHeight == 0 || extentDepth == 0)
196 {
197 return;
198 }
199
200 auto const dstWidth = getWidth(view);
201# if !defined(NDEBUG)
202 auto const dstHeight = getHeight(view);
203 auto const dstDepth = getDepth(view);
204# endif
205 auto const [dstSlicePitchBytes, dstRowPitchBytes, _] = getPitchesInBytes(view);
206 auto const dstNativePtr = reinterpret_cast<void*>(getPtrNative(view));
207 ALPAKA_ASSERT(extentWidth <= dstWidth);
208 ALPAKA_ASSERT(extentHeight <= dstHeight);
209 ALPAKA_ASSERT(extentDepth <= dstDepth);
210
211 // Fill CUDA parameter structures.
212 typename TApi::PitchedPtr_t const pitchedPtrVal = TApi::makePitchedPtr(
213 dstNativePtr,
214 static_cast<std::size_t>(dstRowPitchBytes),
215 static_cast<std::size_t>(dstWidth) * sizeof(Elem),
216 static_cast<std::size_t>(dstSlicePitchBytes / dstRowPitchBytes));
217
218 typename TApi::Extent_t const extentVal = TApi::makeExtent(
219 static_cast<std::size_t>(extentWidth) * sizeof(Elem),
220 static_cast<std::size_t>(extentHeight),
221 static_cast<std::size_t>(extentDepth));
222
223 // Initiate the memory set.
224 ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memset3DAsync(
225 pitchedPtrVal,
226 static_cast<int>(this->m_byte),
227 extentVal,
228 queue.getNativeHandle()));
229 }
230 };
231 } // namespace detail
232
233 namespace trait
234 {
235 //! The CUDA device memory set trait specialization.
236 template<typename TApi, typename TDim>
238 {
239 template<typename TExtent, typename TView>
240 ALPAKA_FN_HOST static auto createTaskMemset(TView& view, std::uint8_t const& byte, TExtent const& extent)
242 {
244 }
245 };
246
247 //! The CUDA non-blocking device queue scalar set enqueue trait specialization.
248 template<typename TApi, typename TView, typename TExtent>
249 struct Enqueue<
251 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<0u>, TView, TExtent>>
252 {
255 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<0u>, TView, TExtent> const& task) -> void
256 {
258
259 task.enqueue(queue);
260 }
261 };
262
263 //! The CUDA blocking device queue scalar set enqueue trait specialization.
264 template<typename TApi, typename TView, typename TExtent>
265 struct Enqueue<
267 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<0u>, TView, TExtent>>
268 {
271 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<0u>, TView, TExtent> const& task) -> void
272 {
274
275 task.enqueue(queue);
276
277 wait(queue);
278 }
279 };
280
281 //! The CUDA non-blocking device queue 1D set enqueue trait specialization.
282 template<typename TApi, typename TView, typename TExtent>
283 struct Enqueue<
285 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<1u>, TView, TExtent>>
286 {
289 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<1u>, TView, TExtent> const& task) -> void
290 {
292
293 task.enqueue(queue);
294 }
295 };
296
297 //! The CUDA blocking device queue 1D set enqueue trait specialization.
298 template<typename TApi, typename TView, typename TExtent>
299 struct Enqueue<
301 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<1u>, TView, TExtent>>
302 {
305 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<1u>, TView, TExtent> const& task) -> void
306 {
308
309 task.enqueue(queue);
310
311 wait(queue);
312 }
313 };
314
315 //! The CUDA non-blocking device queue 2D set enqueue trait specialization.
316 template<typename TApi, typename TView, typename TExtent>
317 struct Enqueue<
319 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<2u>, TView, TExtent>>
320 {
323 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<2u>, TView, TExtent> const& task) -> void
324 {
326
327 task.enqueue(queue);
328 }
329 };
330
331 //! The CUDA blocking device queue 2D set enqueue trait specialization.
332 template<typename TApi, typename TView, typename TExtent>
333 struct Enqueue<
335 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<2u>, TView, TExtent>>
336 {
339 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<2u>, TView, TExtent> const& task) -> void
340 {
342
343 task.enqueue(queue);
344
345 wait(queue);
346 }
347 };
348
349 //! The CUDA non-blocking device queue 3D set enqueue trait specialization.
350 template<typename TApi, typename TView, typename TExtent>
351 struct Enqueue<
353 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<3u>, TView, TExtent>>
354 {
357 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<3u>, TView, TExtent> const& task) -> void
358 {
360
361 task.enqueue(queue);
362 }
363 };
364
365 //! The CUDA blocking device queue 3D set enqueue trait specialization.
366 template<typename TApi, typename TView, typename TExtent>
367 struct Enqueue<
369 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<3u>, TView, TExtent>>
370 {
373 alpaka::detail::TaskSetUniformCudaHip<TApi, DimInt<3u>, TView, TExtent> const& task) -> void
374 {
376
377 task.enqueue(queue);
378
379 wait(queue);
380 }
381 };
382 } // namespace trait
383} // namespace alpaka
384
385#endif
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
Definition Assert.hpp:13
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The alpaka accelerator library.
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView > >
Definition Traits.hpp:196
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:108
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:136
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
Definition Traits.hpp:21
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto getNativeHandle(TImpl const &impl)
Get the native handle of the alpaka object. It will return the alpaka object handle if there is any,...
Definition Traits.hpp:29
std::integral_constant< std::size_t, N > DimInt
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition Traits.hpp:34
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:121
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:95
STL namespace.
The CUDA/HIP memory set task base.
Definition Set.hpp:34
TaskSetUniformCudaHipBase(TView &view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:35
TaskSetUniformCudaHip(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:60
TaskSetUniformCudaHip(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:86
TaskSetUniformCudaHip(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:124
TaskSetUniformCudaHip(TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent)
Definition Set.hpp:174
The CUDA/HIP memory set task.
Definition Set.hpp:52
static ALPAKA_FN_HOST auto createTaskMemset(TView &view, std::uint8_t const &byte, TExtent const &extent) -> alpaka::detail::TaskSetUniformCudaHip< TApi, TDim, TView, TExtent >
Definition Set.hpp:240
The memory set task trait.
Definition Traits.hpp:114
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 0u >, TView, TExtent > const &task) -> void
Definition Set.hpp:269
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 3u >, TView, TExtent > const &task) -> void
Definition Set.hpp:371
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 1u >, TView, TExtent > const &task) -> void
Definition Set.hpp:303
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 2u >, TView, TExtent > const &task) -> void
Definition Set.hpp:337
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtNonBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 1u >, TView, TExtent > const &task) -> void
Definition Set.hpp:287
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtNonBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 2u >, TView, TExtent > const &task) -> void
Definition Set.hpp:321
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtNonBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 0u >, TView, TExtent > const &task) -> void
Definition Set.hpp:253
static ALPAKA_FN_HOST auto enqueue(QueueUniformCudaHipRtNonBlocking< TApi > &queue, alpaka::detail::TaskSetUniformCudaHip< TApi, DimInt< 3u >, TView, TExtent > const &task) -> void
Definition Set.hpp:355
The queue enqueue trait.
Definition Traits.hpp:27