39 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
43 template<
typename TApi,
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
44 class TaskKernelGpuUniformCudaHipRt;
49 template<
typename TApi,
typename TDim,
typename TIdx>
55 AtomicUniformCudaHipBuiltIn,
56 AtomicUniformCudaHipBuiltIn,
57 AtomicUniformCudaHipBuiltIn>
64 # ifdef ALPAKA_DISABLE_VENDOR_RNG
73 sizeof(TIdx) >=
sizeof(
int),
74 "Index type is not supported, consider using int or a larger type.");
91 template<
typename TApi,
typename TDim,
typename TIdx>
98 template<
typename TApi,
typename TDim,
typename TIdx>
104 template<
typename TApi,
typename TDim,
typename TIdx>
110 template<
typename TApi,
typename TDim,
typename TIdx>
115 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
118 int multiProcessorCount = {};
120 &multiProcessorCount,
121 TApi::deviceAttributeMultiprocessorCount,
122 dev.getNativeHandle()));
124 int maxGridSize[3] = {};
127 TApi::deviceAttributeMaxGridDimX,
128 dev.getNativeHandle()));
131 TApi::deviceAttributeMaxGridDimY,
132 dev.getNativeHandle()));
135 TApi::deviceAttributeMaxGridDimZ,
136 dev.getNativeHandle()));
138 int maxBlockDim[3] = {};
141 TApi::deviceAttributeMaxBlockDimX,
142 dev.getNativeHandle()));
145 TApi::deviceAttributeMaxBlockDimY,
146 dev.getNativeHandle()));
149 TApi::deviceAttributeMaxBlockDimZ,
150 dev.getNativeHandle()));
152 int maxThreadsPerBlock = {};
155 TApi::deviceAttributeMaxThreadsPerBlock,
156 dev.getNativeHandle()));
158 int sharedMemSizeBytes = {};
161 TApi::deviceAttributeMaxSharedMemoryPerBlock,
162 dev.getNativeHandle()));
165 alpaka::core::clipCast<TIdx>(multiProcessorCount),
168 alpaka::core::clipCast<TIdx>(maxGridSize[2u]),
169 alpaka::core::clipCast<TIdx>(maxGridSize[1u]),
170 alpaka::core::clipCast<TIdx>(maxGridSize[0u]))),
175 alpaka::core::clipCast<TIdx>(maxBlockDim[2u]),
176 alpaka::core::clipCast<TIdx>(maxBlockDim[1u]),
177 alpaka::core::clipCast<TIdx>(maxBlockDim[0u]))),
179 alpaka::core::clipCast<TIdx>(maxThreadsPerBlock),
185 static_cast<size_t>(sharedMemSizeBytes),
190 typename TApi::DeviceProp_t properties;
194 alpaka::core::clipCast<TIdx>(properties.multiProcessorCount),
197 alpaka::core::clipCast<TIdx>(properties.maxGridSize[2u]),
198 alpaka::core::clipCast<TIdx>(properties.maxGridSize[1u]),
199 alpaka::core::clipCast<TIdx>(properties.maxGridSize[0u]))),
204 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[2u]),
205 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[1u]),
206 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[0u]))),
208 alpaka::core::clipCast<TIdx>(properties.maxThreadsPerBlock),
214 static_cast<size_t>(properties.sharedMemPerBlock),
222 template<
typename TApi,
typename TDim,
typename TIdx>
227 return std::string(
"AccGpu") + TApi::name +
"Rt<" + std::to_string(TDim::value) +
","
228 + core::demangled<TIdx> +
">";
233 template<
typename TApi,
typename TDim,
typename TIdx>
240 template<
typename TApi,
typename TDim,
typename TIdx>
254 template<
typename TApi,
typename TDim,
typename TIdx>
257 template<
typename TKernelFnObj,
typename... TArgs>
272 typename TKernelFnObj,
277 TWorkDiv
const& workDiv,
278 TKernelFnObj
const& kernelFnObj,
287 TArgs...>(workDiv, kernelFnObj, std::forward<TArgs>(args)...);
292 template<
typename TApi,
typename TDim,
typename TIdx>
299 template<
typename TApi,
typename TDim,
typename TIdx>
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
#define ALPAKA_FN_HOST_ACC
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
std::integral_constant< std::size_t, N > DimInt
The acceleration properties on a device.
Check that the return of TKernelFnObj is void.
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
The accelerator type trait.
The kernel execution task creation trait.
The dimension getter type trait.
The device properties get trait.
The accelerator name trait.
The multi thread accelerator trait.
The single thread accelerator trait.