19 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
28 template<
typename TApi>
31 # if defined(BOOST_COMP_GNUC) && BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(11, 0, 0) \
32 && BOOST_COMP_GNUC < BOOST_VERSION_NUMBER(12, 0, 0)
42 template<
typename TApi>
49 template<
typename TApi>
57 typename TApi::Error_t error = TApi::getDeviceCount(&iNumDevices);
58 if(error != TApi::success)
61 return static_cast<std::size_t
>(iNumDevices);
66 template<
typename TApi>
76 if(devIdx >= devCount)
78 std::stringstream ssErr;
79 ssErr <<
"Unable to return device handle for device " << devIdx <<
". There are only " << devCount
81 throw std::runtime_error(ssErr.str());
84 if(isDevUsable(devIdx))
89 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
90 typename TApi::DeviceProp_t devProp;
93 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
94 printDeviceProperties(devProp);
95 # elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
96 std::cout << __func__ << devProp.name << std::endl;
102 std::stringstream ssErr;
103 ssErr <<
"Unable to return device handle for device " << devIdx <<
". It is not accessible!";
104 throw std::runtime_error(ssErr.str());
110 ALPAKA_FN_HOST static auto isDevUsable(std::size_t iDevice) ->
bool
112 typename TApi::Error_t rc = TApi::setDevice(
static_cast<int>(iDevice));
113 typename TApi::Stream_t queue = {};
117 if(rc == TApi::success)
119 rc = TApi::streamCreate(&queue);
122 if(rc == TApi::success)
133 std::ignore = TApi::getLastError();
138 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
140 ALPAKA_FN_HOST static auto printDeviceProperties(
typename TApi::DeviceProp_t
const& devProp) ->
void
144 constexpr
auto KiB = std::size_t{1024};
145 constexpr
auto MiB = KiB * KiB;
146 std::cout <<
"name: " << devProp.name << std::endl;
147 std::cout <<
"totalGlobalMem: " << devProp.totalGlobalMem / MiB <<
" MiB" << std::endl;
148 std::cout <<
"sharedMemPerBlock: " << devProp.sharedMemPerBlock / KiB <<
" KiB" << std::endl;
149 std::cout <<
"regsPerBlock: " << devProp.regsPerBlock << std::endl;
150 std::cout <<
"warpSize: " << devProp.warpSize << std::endl;
151 std::cout <<
"maxThreadsPerBlock: " << devProp.maxThreadsPerBlock << std::endl;
152 std::cout <<
"maxThreadsDim[3]: (" << devProp.maxThreadsDim[0] <<
", " << devProp.maxThreadsDim[1]
153 <<
", " << devProp.maxThreadsDim[2] <<
")" << std::endl;
154 std::cout <<
"maxGridSize[3]: (" << devProp.maxGridSize[0] <<
", " << devProp.maxGridSize[1] <<
", "
155 << devProp.maxGridSize[2] <<
")" << std::endl;
156 std::cout <<
"clockRate: " << devProp.clockRate <<
" kHz" << std::endl;
157 std::cout <<
"totalConstMem: " << devProp.totalConstMem / KiB <<
" KiB" << std::endl;
158 std::cout <<
"major: " << devProp.major << std::endl;
159 std::cout <<
"minor: " << devProp.minor << std::endl;
162 std::cout <<
"multiProcessorCount: " << devProp.multiProcessorCount << std::endl;
163 std::cout <<
"integrated: " << devProp.integrated << std::endl;
164 std::cout <<
"canMapHostMemory: " << devProp.canMapHostMemory << std::endl;
165 std::cout <<
"computeMode: " << devProp.computeMode << std::endl;
166 std::cout <<
"concurrentKernels: " << devProp.concurrentKernels << std::endl;
167 std::cout <<
"pciBusID: " << devProp.pciBusID << std::endl;
168 std::cout <<
"pciDeviceID: " << devProp.pciDeviceID << std::endl;
169 std::cout <<
"pciDomainID: " << devProp.pciDomainID << std::endl;
170 std::cout <<
"memoryClockRate: " << devProp.memoryClockRate <<
" kHz" << std::endl;
171 std::cout <<
"memoryBusWidth: " << devProp.memoryBusWidth <<
" b" << std::endl;
172 std::cout <<
"l2CacheSize: " << devProp.l2CacheSize <<
" B" << std::endl;
173 std::cout <<
"maxThreadsPerMultiProcessor: " << devProp.maxThreadsPerMultiProcessor << std::endl;
174 std::cout <<
"isMultiGpuBoard: " << devProp.isMultiGpuBoard << std::endl;
175 if constexpr(std::is_same_v<TApi, ApiCudaRt>)
177 std::cout <<
"memPitch: " << devProp.memPitch <<
" B" << std::endl;
178 std::cout <<
"textureAlignment: " << devProp.textureAlignment << std::endl;
179 std::cout <<
"texturePitchAlignment: " << devProp.texturePitchAlignment << std::endl;
180 std::cout <<
"kernelExecTimeoutEnabled: " << devProp.kernelExecTimeoutEnabled << std::endl;
181 std::cout <<
"unifiedAddressing: " << devProp.unifiedAddressing << std::endl;
182 std::cout <<
"multiGpuBoardGroupID: " << devProp.multiGpuBoardGroupID << std::endl;
183 std::cout <<
"singleToDoublePrecisionPerfRatio: " << devProp.singleToDoublePrecisionPerfRatio
185 std::cout <<
"pageableMemoryAccess: " << devProp.pageableMemoryAccess << std::endl;
186 std::cout <<
"concurrentManagedAccess: " << devProp.concurrentManagedAccess << std::endl;
187 std::cout <<
"computePreemptionSupported: " << devProp.computePreemptionSupported << std::endl;
188 std::cout <<
"canUseHostPointerForRegisteredMem: " << devProp.canUseHostPointerForRegisteredMem
190 std::cout <<
"cooperativeLaunch: " << devProp.cooperativeLaunch << std::endl;
191 std::cout <<
"cooperativeMultiDeviceLaunch: " << devProp.cooperativeMultiDeviceLaunch << std::endl;
192 std::cout <<
"maxTexture1D: " << devProp.maxTexture1D << std::endl;
193 std::cout <<
"maxTexture1DLinear: " << devProp.maxTexture1DLinear << std::endl;
194 std::cout <<
"maxTexture2D[2]: " << devProp.maxTexture2D[0] <<
"x" << devProp.maxTexture2D[1]
196 std::cout <<
"maxTexture2DLinear[3]: " << devProp.maxTexture2DLinear[0] <<
"x"
197 << devProp.maxTexture2DLinear[1] <<
"x" << devProp.maxTexture2DLinear[2] << std::endl;
198 std::cout <<
"maxTexture2DGather[2]: " << devProp.maxTexture2DGather[0] <<
"x"
199 << devProp.maxTexture2DGather[1] << std::endl;
200 std::cout <<
"maxTexture3D[3]: " << devProp.maxTexture3D[0] <<
"x" << devProp.maxTexture3D[1]
201 <<
"x" << devProp.maxTexture3D[2] << std::endl;
202 std::cout <<
"maxTextureCubemap: " << devProp.maxTextureCubemap << std::endl;
203 std::cout <<
"maxTexture1DLayered[2]: " << devProp.maxTexture1DLayered[0] <<
"x"
204 << devProp.maxTexture1DLayered[1] << std::endl;
205 std::cout <<
"maxTexture2DLayered[3]: " << devProp.maxTexture2DLayered[0] <<
"x"
206 << devProp.maxTexture2DLayered[1] <<
"x" << devProp.maxTexture2DLayered[2] << std::endl;
207 std::cout <<
"maxTextureCubemapLayered[2]: " << devProp.maxTextureCubemapLayered[0] <<
"x"
208 << devProp.maxTextureCubemapLayered[1] << std::endl;
209 std::cout <<
"maxSurface1D: " << devProp.maxSurface1D << std::endl;
210 std::cout <<
"maxSurface2D[2]: " << devProp.maxSurface2D[0] <<
"x" << devProp.maxSurface2D[1]
212 std::cout <<
"maxSurface3D[3]: " << devProp.maxSurface3D[0] <<
"x" << devProp.maxSurface3D[1]
213 <<
"x" << devProp.maxSurface3D[2] << std::endl;
214 std::cout <<
"maxSurface1DLayered[2]: " << devProp.maxSurface1DLayered[0] <<
"x"
215 << devProp.maxSurface1DLayered[1] << std::endl;
216 std::cout <<
"maxSurface2DLayered[3]: " << devProp.maxSurface2DLayered[0] <<
"x"
217 << devProp.maxSurface2DLayered[1] <<
"x" << devProp.maxSurface2DLayered[2] << std::endl;
218 std::cout <<
"maxSurfaceCubemap: " << devProp.maxSurfaceCubemap << std::endl;
219 std::cout <<
"maxSurfaceCubemapLayered[2]: " << devProp.maxSurfaceCubemapLayered[0] <<
"x"
220 << devProp.maxSurfaceCubemapLayered[1] << std::endl;
221 std::cout <<
"surfaceAlignment: " << devProp.surfaceAlignment << std::endl;
222 std::cout <<
"ECCEnabled: " << devProp.ECCEnabled << std::endl;
223 std::cout <<
"tccDriver: " << devProp.tccDriver << std::endl;
224 std::cout <<
"asyncEngineCount: " << devProp.asyncEngineCount << std::endl;
225 std::cout <<
"streamPrioritiesSupported: " << devProp.streamPrioritiesSupported << std::endl;
226 std::cout <<
"globalL1CacheSupported: " << devProp.globalL1CacheSupported << std::endl;
227 std::cout <<
"localL1CacheSupported: " << devProp.localL1CacheSupported << std::endl;
228 std::cout <<
"sharedMemPerMultiprocessor: " << devProp.sharedMemPerMultiprocessor << std::endl;
229 std::cout <<
"regsPerMultiprocessor: " << devProp.regsPerMultiprocessor << std::endl;
230 std::cout <<
"managedMemory: " << devProp.managedMemory << std::endl;
234 std::cout <<
"clockInstructionRate: " << devProp.clockInstructionRate <<
"kHz" << std::endl;
235 std::cout <<
"maxSharedMemoryPerMultiProcessor: " << devProp.maxSharedMemoryPerMultiProcessor / KiB
236 <<
" KiB" << std::endl;
237 std::cout <<
"gcnArchName: " << devProp.gcnArchName << std::endl;
238 std::cout <<
"arch: " << std::endl;
239 std::cout <<
" hasGlobalInt32Atomics: " << devProp.arch.hasGlobalInt32Atomics << std::endl;
240 std::cout <<
" hasGlobalFloatAtomicExch: " << devProp.arch.hasGlobalFloatAtomicExch
242 std::cout <<
" hasSharedInt32Atomics: " << devProp.arch.hasSharedInt32Atomics << std::endl;
243 std::cout <<
" hasSharedFloatAtomicExch: " << devProp.arch.hasSharedFloatAtomicExch
245 std::cout <<
" hasFloatAtomicAdd: " << devProp.arch.hasFloatAtomicAdd << std::endl;
246 std::cout <<
" hasGlobalInt64Atomics: " << devProp.arch.hasGlobalInt64Atomics << std::endl;
247 std::cout <<
" hasSharedInt64Atomics: " << devProp.arch.hasSharedInt64Atomics << std::endl;
248 std::cout <<
" hasDoubles: " << devProp.arch.hasDoubles << std::endl;
249 std::cout <<
" hasWarpVote: " << devProp.arch.hasWarpVote << std::endl;
250 std::cout <<
" hasWarpBallot: " << devProp.arch.hasWarpBallot << std::endl;
251 std::cout <<
" hasWarpShuffle: " << devProp.arch.hasWarpShuffle << std::endl;
252 std::cout <<
" hasFunnelShift: " << devProp.arch.hasFunnelShift << std::endl;
253 std::cout <<
" hasThreadFenceSystem: " << devProp.arch.hasThreadFenceSystem << std::endl;
254 std::cout <<
" hasSyncThreadsExt: " << devProp.arch.hasSyncThreadsExt << std::endl;
255 std::cout <<
" hasSurfaceFuncs: " << devProp.arch.hasSurfaceFuncs << std::endl;
256 std::cout <<
" has3dGrid: " << devProp.arch.has3dGrid << std::endl;
257 std::cout <<
" hasDynamicParallelism: " << devProp.arch.hasDynamicParallelism << std::endl;
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
The alpaka accelerator library.
ALPAKA_FN_HOST auto getDevCount(TPlatform const &platform)
Tag used in class inheritance hierarchies that describes that a specific interface (TInterface) is im...
The device count get trait.