23 #include <type_traits>
25 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
32 template<
typename TApi,
typename TDim,
typename TViewDst,
typename TViewSrc,
typename TExtent>
36 template<
typename TApi,
typename TViewDst,
typename TViewSrc,
typename TExtent>
41 template<
typename TViewDstFwd>
43 TViewDstFwd&& viewDst,
44 TViewSrc
const& viewSrc,
45 [[maybe_unused]] TExtent
const& extent,
46 typename TApi::MemcpyKind_t
const& uniformMemCpyKind,
47 int const& iDstDevice,
48 int const& iSrcDevice)
49 : m_uniformMemCpyKind(uniformMemCpyKind)
50 , m_iDstDevice(iDstDevice)
51 , m_iSrcDevice(iSrcDevice)
52 , m_dstMemNative(reinterpret_cast<void*>(
getPtrNative(viewDst)))
53 , m_srcMemNative(reinterpret_cast<void const*>(
getPtrNative(viewSrc)))
55 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
60 template<
typename TQueue>
63 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
76 queue.getNativeHandle()));
80 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
83 std::cout << __func__ <<
" ddev: " << m_iDstDevice <<
" ew: " <<
Idx(1u)
85 <<
" dptr: " << m_dstMemNative <<
" sdev: " << m_iSrcDevice <<
" sw: " <<
Idx(1u)
86 <<
" sptr: " << m_srcMemNative << std::endl;
90 typename TApi::MemcpyKind_t m_uniformMemCpyKind;
94 void const* m_srcMemNative;
98 template<
typename TApi,
typename TViewDst,
typename TViewSrc,
typename TExtent>
103 template<
typename TViewDstFwd>
105 TViewDstFwd&& viewDst,
106 TViewSrc
const& viewSrc,
107 TExtent
const& extent,
108 typename TApi::MemcpyKind_t
const& uniformMemCpyKind,
109 int const& iDstDevice,
110 int const& iSrcDevice)
111 : m_uniformMemCpyKind(uniformMemCpyKind)
112 , m_iDstDevice(iDstDevice)
113 , m_iSrcDevice(iSrcDevice)
119 , m_extentWidthBytes(static_cast<std::size_t>(
getWidth(extent)) * sizeof(
Elem<TViewDst>))
120 , m_dstMemNative(reinterpret_cast<void*>(
getPtrNative(viewDst)))
121 , m_srcMemNative(reinterpret_cast<void const*>(
getPtrNative(viewSrc)))
123 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
129 template<
typename TQueue>
132 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
135 if(m_extentWidthBytes == std::size_t{0})
150 queue.getNativeHandle()));
154 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
157 std::cout << __func__ <<
" ddev: " << m_iDstDevice <<
" ew: " << m_extentWidth
158 <<
" ewb: " << m_extentWidthBytes <<
" dw: " << m_dstWidth <<
" dptr: " << m_dstMemNative
159 <<
" sdev: " << m_iSrcDevice <<
" sw: " << m_srcWidth <<
" sptr: " << m_srcMemNative
164 typename TApi::MemcpyKind_t m_uniformMemCpyKind;
167 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
172 std::size_t m_extentWidthBytes;
173 void* m_dstMemNative;
174 void const* m_srcMemNative;
178 template<
typename TApi,
typename TViewDst,
typename TViewSrc,
typename TExtent>
183 template<
typename TViewDstFwd>
185 TViewDstFwd&& viewDst,
186 TViewSrc
const& viewSrc,
187 TExtent
const& extent,
188 typename TApi::MemcpyKind_t
const& uniformMemcpyKind,
189 int const& iDstDevice,
190 int const& iSrcDevice)
191 : m_uniformMemCpyKind(uniformMemcpyKind)
192 , m_iDstDevice(iDstDevice)
193 , m_iSrcDevice(iSrcDevice)
197 , m_extentWidthBytes(static_cast<std::size_t>(
getWidth(extent)) * sizeof(
Elem<TViewDst>))
207 , m_dstMemNative(reinterpret_cast<void*>(
getPtrNative(viewDst)))
208 , m_srcMemNative(reinterpret_cast<void const*>(
getPtrNative(viewSrc)))
210 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
220 template<
typename TQueue>
223 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
227 if(m_extentWidthBytes == std::size_t{0} || m_extentHeight == 0)
243 static_cast<std::size_t
>(m_extentHeight),
245 queue.getNativeHandle()));
249 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
252 std::cout << __func__ <<
" ew: " << m_extentWidth <<
" eh: " << m_extentHeight
253 <<
" ewb: " << m_extentWidthBytes <<
" ddev: " << m_iDstDevice <<
" dw: " << m_dstWidth
254 <<
" dh: " << m_dstHeight <<
" dptr: " << m_dstMemNative <<
" dpitch: " << m_dstRowPitchBytes
255 <<
" sdev: " << m_iSrcDevice <<
" sw: " << m_srcWidth <<
" sh: " << m_srcHeight
256 <<
" sptr: " << m_srcMemNative <<
" spitch: " << m_srcRowPitchBytes << std::endl;
260 typename TApi::MemcpyKind_t m_uniformMemCpyKind;
263 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
266 std::size_t m_extentWidthBytes;
271 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
275 std::size_t m_dstRowPitchBytes;
276 std::size_t m_srcRowPitchBytes;
278 void* m_dstMemNative;
279 void const* m_srcMemNative;
283 template<
typename TApi,
typename TViewDst,
typename TViewSrc,
typename TExtent>
288 template<
typename TViewDstFwd>
290 TViewDstFwd&& viewDst,
291 TViewSrc
const& viewSrc,
292 TExtent
const& extent,
293 typename TApi::MemcpyKind_t
const& uniformMemcpyKind,
294 int const& iDstDevice,
295 int const& iSrcDevice)
296 : m_uniformMemCpyKind(uniformMemcpyKind)
297 , m_iDstDevice(iDstDevice)
298 , m_iSrcDevice(iSrcDevice)
300 , m_extentWidthBytes(static_cast<std::size_t>(m_extentWidth) * sizeof(
Elem<TViewDst>))
315 , m_dstMemNative(reinterpret_cast<void*>(
getPtrNative(viewDst)))
316 , m_srcMemNative(reinterpret_cast<void const*>(
getPtrNative(viewSrc)))
318 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
330 template<
typename TQueue>
333 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
337 if(m_extentWidthBytes == std::size_t{0} || m_extentHeight == 0 || m_extentDepth == 0)
343 typename TApi::Memcpy3DParms_t
const uniformCudaHipMemCpy3DParms(buildUniformCudaHipMemcpy3DParms());
351 TApi::memcpy3DAsync(&uniformCudaHipMemCpy3DParms, queue.getNativeHandle()));
355 ALPAKA_FN_HOST auto buildUniformCudaHipMemcpy3DParms() const -> typename TApi::Memcpy3DParms_t
360 typename TApi::Memcpy3DParms_t memCpy3DParms{};
361 memCpy3DParms.srcPtr = TApi::makePitchedPtr(
362 const_cast<void*
>(m_srcMemNative),
364 static_cast<std::size_t
>(m_srcWidth),
365 m_srcSlicePitchBytes / m_srcRowPitchBytes);
366 memCpy3DParms.dstPtr = TApi::makePitchedPtr(
369 static_cast<std::size_t
>(m_dstWidth),
370 m_dstSlicePitchBytes / m_dstRowPitchBytes);
371 memCpy3DParms.extent = TApi::makeExtent(
373 static_cast<std::size_t
>(m_extentHeight),
374 static_cast<std::size_t
>(m_extentDepth));
375 memCpy3DParms.kind = m_uniformMemCpyKind;
376 return memCpy3DParms;
379 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
382 std::cout << __func__ <<
" ew: " << m_extentWidth <<
" eh: " << m_extentHeight
383 <<
" ed: " << m_extentDepth <<
" ewb: " << m_extentWidthBytes <<
" ddev: " << m_iDstDevice
384 <<
" dw: " << m_dstWidth <<
" dh: " << m_dstHeight <<
" dd: " << m_dstDepth
385 <<
" dptr: " << m_dstMemNative <<
" drowpitch: " << m_dstRowPitchBytes
386 <<
" dslicepitch: " << m_dstSlicePitchBytes <<
" sdev: " << m_iSrcDevice
387 <<
" sw: " << m_srcWidth <<
" sh: " << m_srcHeight <<
" sd: " << m_srcDepth
388 <<
" sptr: " << m_srcMemNative <<
" srowpitch: " << m_srcRowPitchBytes
389 <<
" sslicepitch: " << m_srcSlicePitchBytes << std::endl;
392 typename TApi::MemcpyKind_t m_uniformMemCpyKind;
397 std::size_t m_extentWidthBytes;
403 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
409 std::size_t m_dstRowPitchBytes;
410 std::size_t m_srcRowPitchBytes;
411 std::size_t m_dstSlicePitchBytes;
412 std::size_t m_srcSlicePitchBytes;
414 void* m_dstMemNative;
415 void const* m_srcMemNative;
423 template<
typename TApi,
typename TDim>
426 template<
typename TExtent,
typename TViewSrc,
typename TViewDstFwd>
428 TViewDstFwd&& viewDst,
429 TViewSrc
const& viewSrc,
430 TExtent
const& extent) -> alpaka::detail::
431 TaskCopyUniformCudaHip<TApi, TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
435 auto const iDevice =
getDev(viewSrc).getNativeHandle();
438 std::forward<TViewDstFwd>(viewDst),
441 TApi::memcpyDeviceToHost,
448 template<
typename TApi,
typename TDim>
451 template<
typename TExtent,
typename TViewSrc,
typename TViewDstFwd>
453 TViewDstFwd&& viewDst,
454 TViewSrc
const& viewSrc,
455 TExtent
const& extent) -> alpaka::detail::
456 TaskCopyUniformCudaHip<TApi, TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
460 auto const iDevice =
getDev(viewDst).getNativeHandle();
463 std::forward<TViewDstFwd>(viewDst),
466 TApi::memcpyHostToDevice,
473 template<
typename TApi,
typename TDim>
476 template<
typename TExtent,
typename TViewSrc,
typename TViewDstFwd>
478 TViewDstFwd&& viewDst,
479 TViewSrc
const& viewSrc,
480 TExtent
const& extent) -> alpaka::detail::
481 TaskCopyUniformCudaHip<TApi, TDim, std::remove_reference_t<TViewDstFwd>, TViewSrc, TExtent>
485 auto const iDstDevice =
getDev(viewDst).getNativeHandle();
488 std::forward<TViewDstFwd>(viewDst),
491 TApi::memcpyDeviceToDevice,
493 getDev(viewSrc).getNativeHandle()};
498 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
515 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
534 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
551 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
570 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
587 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
606 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
623 template<
typename TApi,
typename TExtent,
typename TViewSrc,
typename TViewDst>
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
#define ALPAKA_DEBUG
Set the minimum log level if it is not defined.
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
#define ALPAKA_DEBUG_FULL
The full debug level.
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtentProduct(T const &object) -> Idx< T >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getHeight(TExtent const &extent=TExtent()) -> Idx< TExtent >
ALPAKA_FN_HOST auto getPitchesInBytes(TView const &view) -> Vec< Dim< TView >, Idx< TView >>
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
std::remove_volatile_t< typename trait::ElemType< TView >::type > Elem
The element type trait alias template to remove the ::type.
ALPAKA_FN_HOST auto getDev(T const &t)
std::integral_constant< std::size_t, N > DimInt
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getDepth(TExtent const &extent=TExtent()) -> Idx< TExtent >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getWidth(TExtent const &extent=TExtent()) -> Idx< TExtent >
ALPAKA_FN_HOST TaskCopyUniformCudaHip(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, [[maybe_unused]] TExtent const &extent, typename TApi::MemcpyKind_t const &uniformMemCpyKind, int const &iDstDevice, int const &iSrcDevice)
alpaka::Idx< TExtent > Idx
auto enqueue(TQueue &queue) const -> void
ALPAKA_FN_HOST TaskCopyUniformCudaHip(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent, typename TApi::MemcpyKind_t const &uniformMemCpyKind, int const &iDstDevice, int const &iSrcDevice)
alpaka::Idx< TExtent > Idx
auto enqueue(TQueue &queue) const -> void
auto enqueue(TQueue &queue) const -> void
alpaka::Idx< TExtent > Idx
ALPAKA_FN_HOST TaskCopyUniformCudaHip(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent, typename TApi::MemcpyKind_t const &uniformMemcpyKind, int const &iDstDevice, int const &iSrcDevice)
auto enqueue(TQueue &queue) const -> void
ALPAKA_FN_HOST TaskCopyUniformCudaHip(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent, typename TApi::MemcpyKind_t const &uniformMemcpyKind, int const &iDstDevice, int const &iSrcDevice)
alpaka::Idx< TExtent > Idx
The memory copy task trait.