alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
BufGenericSyclTraits.hpp
Go to the documentation of this file.
1/* Copyright 2025 Jan Stephan, Luca Ferragina, Aurora Perego, Andrea Bocci, Anton Reinhard
2 * SPDX-License-Identifier: MPL-2.0
3 */
4
5#pragma once
6
10
11#ifdef ALPAKA_ACC_SYCL_ENABLED
12
13namespace alpaka::trait
14{
15 //! The SYCL device memory buffer type trait specialization.
16 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
17 struct BufType<DevGenericSycl<TTag>, TElem, TDim, TIdx>
18 {
19 using type = BufGenericSycl<TElem, TDim, TIdx, TTag>;
20 };
21
22 //! The BufGenericSycl device type trait specialization.
23 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
24 struct DevType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
25 {
26 using type = DevGenericSycl<TTag>;
27 };
28
29 //! The BufGenericSycl device get trait specialization.
30 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
31 struct GetDev<BufGenericSycl<TElem, TDim, TIdx, TTag>>
32 {
33 ALPAKA_FN_HOST static auto getDev(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf) -> DevGenericSycl<TTag>
34 {
35 return buf.m_spBufImpl->m_dev;
36 }
37 };
38
39 //! The BufGenericSycl dimension getter trait.
40 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
41 struct DimType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
42 {
43 using type = TDim;
44 };
45
46 //! The BufGenericSycl memory element type get trait specialization.
47 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
48 struct ElemType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
49 {
50 using type = TElem;
51 };
52
53 //! The BufGenericSycl width get trait specialization.
54 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
55 struct GetExtents<BufGenericSycl<TElem, TDim, TIdx, TTag>>
56 {
57 ALPAKA_FN_HOST auto operator()(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf)
58 {
59 return buf.m_spBufImpl->m_extentElements;
60 }
61 };
62
63 //! The BufGenericSycl native pointer get trait specialization.
64 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
65 struct GetPtrNative<BufGenericSycl<TElem, TDim, TIdx, TTag>>
66 {
67 ALPAKA_FN_HOST static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf) -> TElem const*
68 {
69 return buf.m_spBufImpl->m_pMem;
70 }
71
72 ALPAKA_FN_HOST static auto getPtrNative(BufGenericSycl<TElem, TDim, TIdx, TTag>& buf) -> TElem*
73 {
74 return buf.m_spBufImpl->m_pMem;
75 }
76 };
77
78 //! The BufGenericSycl pointer on device get trait specialization.
79 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
80 struct GetPtrDev<BufGenericSycl<TElem, TDim, TIdx, TTag>, DevGenericSycl<TTag>>
81 {
82 ALPAKA_FN_HOST static auto getPtrDev(
83 BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf,
84 DevGenericSycl<TTag> const& dev) -> TElem const*
85 {
86 if(dev == getDev(buf))
87 {
88 return buf.m_spBufImpl->m_pMem;
89 }
90 else
91 {
92 throw std::runtime_error("The buffer is not accessible from the given device!");
93 }
94 }
95
96 ALPAKA_FN_HOST static auto getPtrDev(
97 BufGenericSycl<TElem, TDim, TIdx, TTag>& buf,
98 DevGenericSycl<TTag> const& dev) -> TElem*
99 {
100 if(dev == getDev(buf))
101 {
102 return buf.m_spBufImpl->m_pMem;
103 }
104 else
105 {
106 throw std::runtime_error("The buffer is not accessible from the given device!");
107 }
108 }
109 };
110
111 //! The BufGenericSycl offset get trait specialization.
112 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
113 struct GetOffsets<BufGenericSycl<TElem, TDim, TIdx, TTag>>
114 {
115 ALPAKA_FN_HOST auto operator()(BufGenericSycl<TElem, TDim, TIdx, TTag> const& /*buf*/) const -> Vec<TDim, TIdx>
116 {
117 return Vec<TDim, TIdx>::zeros();
118 }
119 };
120
121 //! The BufGenericSycl idx type trait specialization.
122 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
123 struct IdxType<BufGenericSycl<TElem, TDim, TIdx, TTag>>
124 {
125 using type = TIdx;
126 };
127
128 //! The MakeConstBuf trait for Sycl buffers.
129 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
130 struct MakeConstBuf<BufGenericSycl<TElem, TDim, TIdx, TTag>>
131 {
132 ALPAKA_FN_HOST static auto makeConstBuf(BufGenericSycl<TElem, TDim, TIdx, TTag> const& buf)
133 -> ConstBufGenericSycl<TElem, TDim, TIdx, TTag>
134 {
135 return ConstBufGenericSycl<TElem, TDim, TIdx, TTag>(buf);
136 }
137
138 ALPAKA_FN_HOST static auto makeConstBuf(BufGenericSycl<TElem, TDim, TIdx, TTag>&& buf)
139 -> ConstBufGenericSycl<TElem, TDim, TIdx, TTag>
140 {
141 return ConstBufGenericSycl<TElem, TDim, TIdx, TTag>(std::move(buf));
142 }
143 };
144
145 //! The SYCL memory allocation trait specialization.
146 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
147 struct BufAlloc<TElem, TDim, TIdx, DevGenericSycl<TTag>>
148 {
149 template<typename TExtent>
150 ALPAKA_FN_HOST static auto allocBuf(DevGenericSycl<TTag> const& dev, TExtent const& extent)
151 -> BufGenericSycl<TElem, TDim, TIdx, TTag>
152 {
154
155# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
156 if constexpr(TDim::value == 0)
157 std::cout << __func__ << " ewb: " << sizeof(TElem) << '\n';
158 else if constexpr(TDim::value == 1)
159 {
160 auto const width = getWidth(extent);
161
162 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
163 std::cout << __func__ << " ew: " << width << " ewb: " << widthBytes << '\n';
164 }
165 else if constexpr(TDim::value == 2)
166 {
167 auto const width = getWidth(extent);
168 auto const height = getHeight(extent);
169
170 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
171 std::cout << __func__ << " ew: " << width << " eh: " << height << " ewb: " << widthBytes
172 << " pitch: " << widthBytes << '\n';
173 }
174 else if constexpr(TDim::value == 3)
175 {
176 auto const width = getWidth(extent);
177 auto const height = getHeight(extent);
178 auto const depth = getDepth(extent);
179
180 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
181 std::cout << __func__ << " ew: " << width << " eh: " << height << " ed: " << depth
182 << " ewb: " << widthBytes << " pitch: " << widthBytes << '\n';
183 }
184# endif
185
186 auto const& [nativeDev, nativeContext] = dev.getNativeHandle();
187 TElem* memPtr = sycl::malloc_device<TElem>(
188 static_cast<std::size_t>(getExtentProduct(extent)),
189 nativeDev,
190 nativeContext);
191 auto deleter = [ctx = nativeContext](TElem* ptr) { sycl::free(ptr, ctx); };
192
193 return BufGenericSycl<TElem, TDim, TIdx, TTag>(dev, memPtr, std::move(deleter), extent);
194 }
195 };
196
197 //! The BufGenericSycl stream-ordered memory allocation capability trait specialization.
198 template<typename TDim, concepts::Tag TTag>
199 struct HasAsyncBufSupport<TDim, DevGenericSycl<TTag>> : std::true_type
200 {
201 };
202
203 //! The BufGenericSycl stream-ordered memory allocation trait specialization.
204 template<typename TElem, typename TDim, typename TIdx, concepts::Tag TTag>
205 struct AsyncBufAlloc<TElem, TDim, TIdx, DevGenericSycl<TTag>>
206 {
207 template<bool TBlocking, typename TExtent>
208 ALPAKA_FN_HOST static auto allocAsyncBuf(
209 detail::QueueGenericSyclBase<TTag, TBlocking> queue,
210 TExtent const& extent) -> BufGenericSycl<TElem, TDim, TIdx, TTag>
211 {
213
214# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
215 if constexpr(TDim::value == 0)
216 std::cout << __func__ << " ewb: " << sizeof(TElem) << '\n';
217 else if constexpr(TDim::value == 1)
218 {
219 auto const width = getWidth(extent);
220
221 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
222 std::cout << __func__ << " ew: " << width << " ewb: " << widthBytes << '\n';
223 }
224 else if constexpr(TDim::value == 2)
225 {
226 auto const width = getWidth(extent);
227 auto const height = getHeight(extent);
228
229 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
230 std::cout << __func__ << " ew: " << width << " eh: " << height << " ewb: " << widthBytes
231 << " pitch: " << widthBytes << '\n';
232 }
233 else if constexpr(TDim::value == 3)
234 {
235 auto const width = getWidth(extent);
236 auto const height = getHeight(extent);
237 auto const depth = getDepth(extent);
238
239 auto const widthBytes = width * static_cast<TIdx>(sizeof(TElem));
240 std::cout << __func__ << " ew: " << width << " eh: " << height << " ed: " << depth
241 << " ewb: " << widthBytes << " pitch: " << widthBytes << '\n';
242 }
243# endif
244
245 sycl::queue q = queue.getNativeHandle();
246 TElem* memPtr = sycl::malloc_device<TElem>(static_cast<std::size_t>(getExtentProduct(extent)), q);
247 auto deleter = [q](TElem* ptr) mutable
248 {
249 if constexpr(TBlocking)
250 {
251 // If the queue is blocking, all operations submitted when the buffer goes out of scope should
252 // always have completed. Free the memory immediately, and wait for the free operation to complete.
253 // From SYCL 4.8.3.6: Note: Whether free is blocking or non-blocking is unspecified. Applications
254 // should not rely on free for synchronization, nor assume that free cannot cause deadlocks.
255 sycl::free(ptr, q);
256 q.wait();
257 }
258 else
259 {
260 // If the queue is non-blocking, enqueue the free() operation in a host_task.
261 q.submit([&](sycl::handler& cgh) { //
262 cgh.host_task([=]() { sycl::free(ptr, q); });
263 });
264 }
265 };
266
267 return BufGenericSycl<TElem, TDim, TIdx, TTag>(getDev(queue), memPtr, std::move(deleter), extent);
268 }
269 };
270
271 //! The pinned/mapped memory allocation capability trait specialization.
272 template<concepts::Tag TTag>
273 struct HasMappedBufSupport<PlatformGenericSycl<TTag>> : public std::true_type
274 {
275 };
276
277 //! The pinned/mapped memory allocation trait specialization for the SYCL devices.
278 template<concepts::Tag TTag, typename TElem, typename TDim, typename TIdx>
279 struct BufAllocMapped<PlatformGenericSycl<TTag>, TElem, TDim, TIdx>
280 {
281 template<typename TExtent>
282 ALPAKA_FN_HOST static auto allocMappedBuf(
283 DevCpu const& host,
284 PlatformGenericSycl<TTag> const& platform,
285 TExtent const& extent) -> BufCpu<TElem, TDim, TIdx>
286 {
288
289 // Allocate SYCL page-locked memory on the host, mapped into the SYCL platform's address space and
290 // accessible to all devices in the SYCL platform.
291 auto ctx = platform.syclContext();
292 TElem* memPtr = sycl::malloc_host<TElem>(static_cast<std::size_t>(getExtentProduct(extent)), ctx);
293 auto deleter = [ctx](TElem* ptr) { sycl::free(ptr, ctx); };
294
295 return BufCpu<TElem, TDim, TIdx>(host, memPtr, std::move(deleter), extent);
296 }
297 };
298
299} // namespace alpaka::trait
300
301#endif
#define ALPAKA_DEBUG_MINIMAL_LOG_SCOPE
Definition Debug.hpp:55
ALPAKA_NO_HOST_ACC_WARNING static ALPAKA_FN_HOST_ACC constexpr auto zeros() -> Vec< TDim, TVal >
Zero value constructor.
Definition Vec.hpp:99
#define ALPAKA_FN_HOST
Definition Common.hpp:40
The accelerator traits.
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
Definition Traits.hpp:134
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:108
ALPAKA_FN_HOST auto makeConstBuf(TBuf const &buf)
Creates a constant buffer from the given mutable buffer.
Definition Traits.hpp:212
ALPAKA_FN_HOST auto allocMappedBuf(DevCpu const &host, TPlatform const &platform, TExtent const &extent=TExtent())
Allocates pinned/mapped host memory, accessible by all devices in the given platform.
Definition Traits.hpp:151
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition Traits.hpp:165
ALPAKA_FN_HOST auto allocBuf(TDev const &dev, TExtent const &extent=TExtent())
Allocates memory on the given device.
Definition Traits.hpp:77
ALPAKA_FN_HOST auto getDev(T const &t)
Definition Traits.hpp:68
ALPAKA_FN_HOST auto getPtrDev(TView const &view, TDev const &dev) -> Elem< TView > const *
Gets the pointer to the view on the given device.
Definition Traits.hpp:186
ALPAKA_FN_HOST auto allocAsyncBuf(TQueue queue, TExtent const &extent=TExtent())
Allocates stream-ordered memory on the given device.
Definition Traits.hpp:92
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:121
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
Definition Traits.hpp:95