alpaka
Abstraction Library for Parallel Kernel Acceleration
DeviceGlobalUniformCudaHipBuiltIn.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
11 
12 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
13 
14 namespace alpaka
15 {
16 
17  namespace detail
18  {
19  template<typename T>
21  {
22  // CUDA implementation
24  };
25 
26  template<typename T>
28  {
29  // HIP/ROCm implementation
31  };
32  } // namespace detail
33 
34  // from device to host
35  template<
36  typename TTag,
37  typename TApi,
38  bool TBlocking,
39  typename TViewDst,
40  typename TTypeSrc,
41  typename std::enable_if_t<
42 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
43  (std::is_same_v<TTag, TagGpuCudaRt> && std::is_same_v<TApi, ApiCudaRt>)
44 # else
45  (std::is_same_v<TTag, TagGpuHipRt> && std::is_same_v<TApi, ApiHipRt>)
46 # endif
47  ,
48  int>
49  = 0>
52  TViewDst& viewDst,
54  {
55  using Type = std::remove_const_t<std::remove_all_extents_t<TTypeSrc>>;
56  using TypeExt = std::remove_const_t<TTypeSrc>;
57  auto extent = getExtents(viewDst);
58  TypeExt* pMemAcc(nullptr);
60  TApi::getSymbolAddress(reinterpret_cast<void**>(&pMemAcc), *(const_cast<TypeExt*>(&viewSrc))));
61 
62  auto view = alpaka::ViewPlainPtr<
64  Type,
65  alpaka::Dim<decltype(extent)>,
66  alpaka::Idx<decltype(extent)>>(reinterpret_cast<Type*>(pMemAcc), alpaka::getDev(queue), extent);
67  enqueue(queue, createTaskMemcpy(std::forward<TViewDst>(viewDst), view, extent));
68  }
69 
70  // from host to device
71  template<
72  typename TTag,
73  typename TApi,
74  bool TBlocking,
75  typename TTypeDst,
76  typename TViewSrc,
77  typename std::enable_if_t<
78 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
79  (std::is_same_v<TTag, TagGpuCudaRt> && std::is_same_v<TApi, ApiCudaRt>)
80 # else
81  (std::is_same_v<TTag, TagGpuHipRt> && std::is_same_v<TApi, ApiHipRt>)
82 # endif
83  ,
84  int>
85  = 0>
89  TViewSrc const& viewSrc)
90  {
91  using Type = std::remove_const_t<std::remove_all_extents_t<TTypeDst>>;
92  using TypeExt = std::remove_const_t<TTypeDst>;
93  auto extent = getExtents(viewSrc);
94  Type* pMemAcc(nullptr);
96  TApi::getSymbolAddress(reinterpret_cast<void**>(&pMemAcc), *(const_cast<TypeExt*>(&viewDst))));
97 
98  auto view = alpaka::ViewPlainPtr<
100  Type,
101  alpaka::Dim<decltype(extent)>,
102  alpaka::Idx<decltype(extent)>>(reinterpret_cast<Type*>(pMemAcc), alpaka::getDev(queue), extent);
103  enqueue(queue, createTaskMemcpy(std::forward<decltype(view)>(view), viewSrc, extent));
104  }
105 
106  // from device to host
107  template<
108  typename TTag,
109  typename TApi,
110  bool TBlocking,
111  typename TViewDst,
112  typename TTypeSrc,
113  typename TExtent,
114  typename std::enable_if_t<
115 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
116  (std::is_same_v<TTag, TagGpuCudaRt> && std::is_same_v<TApi, ApiCudaRt>)
117 # else
118  (std::is_same_v<TTag, TagGpuHipRt> && std::is_same_v<TApi, ApiHipRt>)
119 # endif
120  ,
121  int>
122  = 0>
125  TViewDst& viewDst,
127  TExtent extent)
128  {
129  using Type = std::remove_const_t<std::remove_all_extents_t<TTypeSrc>>;
130  using TypeExt = std::remove_const_t<TTypeSrc>;
131  Type* pMemAcc(nullptr);
133  TApi::getSymbolAddress(reinterpret_cast<void**>(&pMemAcc), *(const_cast<TypeExt*>(&viewSrc))));
134 
136  reinterpret_cast<Type*>(pMemAcc),
137  alpaka::getDev(queue),
138  extent);
139  enqueue(queue, createTaskMemcpy(std::forward<TViewDst>(viewDst), view, extent));
140  }
141 
142  // from host to device
143  template<
144  typename TTag,
145  typename TApi,
146  bool TBlocking,
147  typename TTypeDst,
148  typename TViewSrc,
149  typename TExtent,
150  typename std::enable_if_t<
151 # if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
152  (std::is_same_v<TTag, TagGpuCudaRt> && std::is_same_v<TApi, ApiCudaRt>)
153 # else
154  (std::is_same_v<TTag, TagGpuHipRt> && std::is_same_v<TApi, ApiHipRt>)
155 # endif
156  ,
157  int>
158  = 0>
162  TViewSrc const& viewSrc,
163  TExtent extent)
164  {
165  using Type = std::remove_const_t<std::remove_all_extents_t<TTypeDst>>;
166  using TypeExt = std::remove_const_t<TTypeDst>;
167  Type* pMemAcc(nullptr);
169  TApi::getSymbolAddress(reinterpret_cast<void**>(&pMemAcc), *(const_cast<TypeExt*>(&viewDst))));
170 
172  reinterpret_cast<Type*>(pMemAcc),
173  alpaka::getDev(queue),
174  extent);
175  enqueue(queue, createTaskMemcpy(std::forward<decltype(view)>(view), viewSrc, extent));
176  }
177 } // namespace alpaka
178 
179 #endif
#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)
CUDA/HIP runtime error checking with log and exception.
The CUDA/HIP RT device handle.
#define ALPAKA_FN_HOST
Definition: Common.hpp:40
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
ALPAKA_FN_HOST auto createTaskMemcpy(TViewDstFwd &&viewDst, TViewSrc const &viewSrc, TExtent const &extent)
Creates a memory copy task.
Definition: Traits.hpp:253
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
Definition: Traits.hpp:59
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto enqueue(TQueue &queue, TTask &&task) -> void
Queues the given task in the given queue.
Definition: Traits.hpp:47
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
The memory view to wrap plain pointers.