39 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
43 template<
typename TApi,
typename TAcc,
typename TDim,
typename TIdx,
typename TKernelFnObj,
typename... TArgs>
44 class TaskKernelGpuUniformCudaHipRt;
49 template<
typename TApi,
typename TDim,
typename TIdx>
55 AtomicUniformCudaHipBuiltIn,
56 AtomicUniformCudaHipBuiltIn,
57 AtomicUniformCudaHipBuiltIn>
64 # ifdef ALPAKA_DISABLE_VENDOR_RNG
73 sizeof(TIdx) >=
sizeof(
int),
74 "Index type is not supported, consider using int or a larger type.");
91 template<
typename TApi,
typename TDim,
typename TIdx>
98 template<
typename TApi,
typename TDim,
typename TIdx>
103 # ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
106 int multiProcessorCount = {};
108 &multiProcessorCount,
109 TApi::deviceAttributeMultiprocessorCount,
110 dev.getNativeHandle()));
112 int maxGridSize[3] = {};
115 TApi::deviceAttributeMaxGridDimX,
116 dev.getNativeHandle()));
119 TApi::deviceAttributeMaxGridDimY,
120 dev.getNativeHandle()));
123 TApi::deviceAttributeMaxGridDimZ,
124 dev.getNativeHandle()));
126 int maxBlockDim[3] = {};
129 TApi::deviceAttributeMaxBlockDimX,
130 dev.getNativeHandle()));
133 TApi::deviceAttributeMaxBlockDimY,
134 dev.getNativeHandle()));
137 TApi::deviceAttributeMaxBlockDimZ,
138 dev.getNativeHandle()));
140 int maxThreadsPerBlock = {};
143 TApi::deviceAttributeMaxThreadsPerBlock,
144 dev.getNativeHandle()));
146 int sharedMemSizeBytes = {};
149 TApi::deviceAttributeMaxSharedMemoryPerBlock,
150 dev.getNativeHandle()));
153 alpaka::core::clipCast<TIdx>(multiProcessorCount),
156 alpaka::core::clipCast<TIdx>(maxGridSize[2u]),
157 alpaka::core::clipCast<TIdx>(maxGridSize[1u]),
158 alpaka::core::clipCast<TIdx>(maxGridSize[0u]))),
163 alpaka::core::clipCast<TIdx>(maxBlockDim[2u]),
164 alpaka::core::clipCast<TIdx>(maxBlockDim[1u]),
165 alpaka::core::clipCast<TIdx>(maxBlockDim[0u]))),
167 alpaka::core::clipCast<TIdx>(maxThreadsPerBlock),
173 static_cast<size_t>(sharedMemSizeBytes),
178 typename TApi::DeviceProp_t properties;
182 alpaka::core::clipCast<TIdx>(properties.multiProcessorCount),
185 alpaka::core::clipCast<TIdx>(properties.maxGridSize[2u]),
186 alpaka::core::clipCast<TIdx>(properties.maxGridSize[1u]),
187 alpaka::core::clipCast<TIdx>(properties.maxGridSize[0u]))),
192 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[2u]),
193 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[1u]),
194 alpaka::core::clipCast<TIdx>(properties.maxThreadsDim[0u]))),
196 alpaka::core::clipCast<TIdx>(properties.maxThreadsPerBlock),
202 static_cast<size_t>(properties.sharedMemPerBlock),
210 template<
typename TApi,
typename TDim,
typename TIdx>
215 return std::string(
"AccGpu") + TApi::name +
"Rt<" + std::to_string(TDim::value) +
","
216 + core::demangled<TIdx> +
">";
221 template<
typename TApi,
typename TDim,
typename TIdx>
228 template<
typename TApi,
typename TDim,
typename TIdx>
242 template<
typename TApi,
typename TDim,
typename TIdx>
245 template<
typename TKernelFnObj,
typename... TArgs>
260 typename TKernelFnObj,
265 TWorkDiv
const& workDiv,
266 TKernelFnObj
const& kernelFnObj,
275 TArgs...>(workDiv, kernelFnObj, std::forward<TArgs>(args)...);
280 template<
typename TApi,
typename TDim,
typename TIdx>
287 template<
typename TApi,
typename TDim,
typename TIdx>
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
#define ALPAKA_FN_HOST_ACC
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto max(T const &max_ctx, Tx const &x, Ty const &y)
Returns the larger of two arguments. NaNs are treated as missing data (between a NaN and a numeric va...
The alpaka accelerator library.
ALPAKA_FN_HOST auto getMemBytes(TDev const &dev) -> std::size_t
std::integral_constant< std::size_t, N > DimInt
The acceleration properties on a device.
Tag used in class inheritance hierarchies that describes that a specific concept (TConcept) is implem...
Check that the return of TKernelFnObj is void.
The accelerator type trait.
The kernel execution task creation trait.
The dimension getter type trait.
The device properties get trait.
The accelerator name trait.