15 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
22 #ifdef ALPAKA_ACC_SYCL_ENABLED
25 # pragma clang diagnostic push
26 # pragma clang diagnostic ignored "-Wswitch-default"
29 # include <sycl/sycl.hpp>
35 template<concepts::Tag TTag>
36 struct SYCLDeviceSelector;
40 template<concepts::Tag TTag>
41 struct PlatformGenericSycl : interface::Implements<ConceptPlatform, PlatformGenericSycl<TTag>>
44 : platform{detail::SYCLDeviceSelector<TTag>{}}
45 , devices(platform.get_devices())
46 , context{sycl::context{
48 [](sycl::exception_list exceptions)
50 auto ss_err = std::stringstream{};
51 ss_err <<
"Caught asynchronous SYCL exception(s):\n";
52 for(std::exception_ptr
e : exceptions)
56 std::rethrow_exception(
e);
58 catch(sycl::exception
const& err)
60 ss_err << err.what() <<
" (" << err.code() <<
")\n";
63 throw std::runtime_error(ss_err.str());
68 [[nodiscard]]
auto syclPlatform() -> sycl::platform&
73 [[nodiscard]]
auto syclPlatform() const -> sycl::platform const&
78 [[nodiscard]]
auto syclDevices() -> std::vector<sycl::device>&
83 [[nodiscard]]
auto syclDevices() const -> std::vector<sycl::device> const&
88 [[nodiscard]]
auto syclContext() -> sycl::context&
93 [[nodiscard]]
auto syclContext() const -> sycl::context const&
99 sycl::platform platform;
100 std::vector<sycl::device> devices;
101 sycl::context context;
107 template<concepts::Tag TTag>
108 struct DevType<PlatformGenericSycl<TTag>>
110 using type = DevGenericSycl<TTag>;
114 template<concepts::Tag TTag>
115 struct GetDevCount<PlatformGenericSycl<TTag>>
117 static auto getDevCount(PlatformGenericSycl<TTag>
const& platform) -> std::size_t
121 return platform.syclDevices().size();
126 template<concepts::Tag TTag>
127 struct GetDevByIdx<PlatformGenericSycl<TTag>>
129 static auto getDevByIdx(PlatformGenericSycl<TTag>
const& platform, std::size_t
const& devIdx)
133 auto const& devices = platform.syclDevices();
134 if(devIdx >= devices.size())
136 auto ss_err = std::stringstream{};
137 ss_err <<
"Unable to return device handle for device " << devIdx <<
". There are only "
138 << devices.size() <<
" SYCL devices!";
139 throw std::runtime_error(ss_err.str());
142 auto sycl_dev = devices.at(devIdx);
145 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
146 printDeviceProperties(sycl_dev);
147 # elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
148 std::cout << __func__ << sycl_dev.template get_info<sycl::info::device::name>() <<
'\n';
150 using SyclPlatform = alpaka::PlatformGenericSycl<TTag>;
151 return typename DevType<SyclPlatform>::type{sycl_dev, platform.syclContext()};
155 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
157 static auto printDeviceProperties(sycl::device
const& device) ->
void
161 constexpr
auto KiB = std::size_t{1024};
162 constexpr
auto MiB = KiB * KiB;
164 std::cout <<
"Device type: ";
165 switch(device.get_info<sycl::info::device::device_type>())
167 case sycl::info::device_type::cpu:
171 case sycl::info::device_type::gpu:
175 case sycl::info::device_type::accelerator:
176 std::cout <<
"Accelerator";
179 case sycl::info::device_type::custom:
180 std::cout <<
"Custom";
183 case sycl::info::device_type::automatic:
184 std::cout <<
"Automatic";
187 case sycl::info::device_type::host:
200 std::cout <<
"Name: " << device.get_info<sycl::info::device::name>() <<
'\n';
202 std::cout <<
"Vendor: " << device.get_info<sycl::info::device::vendor>() <<
'\n';
204 std::cout <<
"Vendor ID: " << device.get_info<sycl::info::device::vendor_id>() <<
'\n';
206 std::cout <<
"Driver version: " << device.get_info<sycl::info::device::driver_version>() <<
'\n';
208 std::cout <<
"SYCL version: " << device.get_info<sycl::info::device::version>() <<
'\n';
210 # if !defined(BOOST_COMP_ICPX)
212 std::cout <<
"Backend version: " << device.get_info<sycl::info::device::backend_version>() <<
'\n';
215 std::cout <<
"Aspects: " <<
'\n';
217 # if defined(BOOST_COMP_ICPX)
218 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
220 if(device.has(sycl::aspect::emulated))
221 std::cout <<
"\t* emulated\n";
223 if(device.has(sycl::aspect::host_debuggable))
224 std::cout <<
"\t* debuggable using standard debuggers\n";
228 if(device.has(sycl::aspect::fp16))
229 std::cout <<
"\t* supports sycl::half precision\n";
231 if(device.has(sycl::aspect::fp64))
232 std::cout <<
"\t* supports double precision\n";
234 if(device.has(sycl::aspect::atomic64))
235 std::cout <<
"\t* supports 64-bit atomics\n";
237 if(device.has(sycl::aspect::image))
238 std::cout <<
"\t* supports images\n";
240 if(device.has(sycl::aspect::online_compiler))
241 std::cout <<
"\t* supports online compilation of device code\n";
243 if(device.has(sycl::aspect::online_linker))
244 std::cout <<
"\t* supports online linking of device code\n";
246 if(device.has(sycl::aspect::queue_profiling))
247 std::cout <<
"\t* supports queue profiling\n";
249 if(device.has(sycl::aspect::usm_device_allocations))
250 std::cout <<
"\t* supports explicit USM allocations\n";
252 if(device.has(sycl::aspect::usm_host_allocations))
253 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host\n";
255 if(device.has(sycl::aspect::usm_atomic_host_allocations))
256 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n";
258 if(device.has(sycl::aspect::usm_shared_allocations))
259 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared\n";
261 if(device.has(sycl::aspect::usm_atomic_shared_allocations))
262 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n";
264 if(device.has(sycl::aspect::usm_system_allocations))
265 std::cout <<
"\t* can access memory allocated by the system allocator\n";
267 std::cout <<
"Available compute units: " << device.get_info<sycl::info::device::max_compute_units>()
270 std::cout <<
"Maximum work item dimensions: ";
271 auto dims = device.get_info<sycl::info::device::max_work_item_dimensions>();
272 std::cout << dims << std::endl;
274 std::cout <<
"Maximum number of work items:\n";
275 auto const wi_1D = device.get_info<sycl::info::device::max_work_item_sizes<1>>();
276 auto const wi_2D = device.get_info<sycl::info::device::max_work_item_sizes<2>>();
277 auto const wi_3D = device.get_info<sycl::info::device::max_work_item_sizes<3>>();
278 std::cout <<
"\t* 1D: (" << wi_1D.get(0) <<
")\n";
279 std::cout <<
"\t* 2D: (" << wi_2D.get(0) <<
", " << wi_2D.get(1) <<
")\n";
280 std::cout <<
"\t* 3D: (" << wi_3D.get(0) <<
", " << wi_3D.get(1) <<
", " << wi_3D.get(2) <<
")\n";
282 std::cout <<
"Maximum number of work items per work-group: "
283 << device.get_info<sycl::info::device::max_work_group_size>() <<
'\n';
285 std::cout <<
"Maximum number of sub-groups per work-group: "
286 << device.get_info<sycl::info::device::max_num_sub_groups>() <<
'\n';
288 std::cout <<
"Supported sub-group sizes: ";
289 auto const sg_sizes = device.get_info<sycl::info::device::sub_group_sizes>();
290 for(
auto const& sz : sg_sizes)
291 std::cout << sz <<
", ";
294 std::cout <<
"Preferred native vector width (char): "
295 << device.get_info<sycl::info::device::preferred_vector_width_char>() <<
'\n';
297 std::cout <<
"Native ISA vector width (char): "
298 << device.get_info<sycl::info::device::native_vector_width_char>() <<
'\n';
300 std::cout <<
"Preferred native vector width (short): "
301 << device.get_info<sycl::info::device::preferred_vector_width_short>() <<
'\n';
303 std::cout <<
"Native ISA vector width (short): "
304 << device.get_info<sycl::info::device::native_vector_width_short>() <<
'\n';
306 std::cout <<
"Preferred native vector width (int): "
307 << device.get_info<sycl::info::device::preferred_vector_width_int>() <<
'\n';
309 std::cout <<
"Native ISA vector width (int): "
310 << device.get_info<sycl::info::device::native_vector_width_int>() <<
'\n';
312 std::cout <<
"Preferred native vector width (long): "
313 << device.get_info<sycl::info::device::preferred_vector_width_long>() <<
'\n';
315 std::cout <<
"Native ISA vector width (long): "
316 << device.get_info<sycl::info::device::native_vector_width_long>() <<
'\n';
318 std::cout <<
"Preferred native vector width (float): "
319 << device.get_info<sycl::info::device::preferred_vector_width_float>() <<
'\n';
321 std::cout <<
"Native ISA vector width (float): "
322 << device.get_info<sycl::info::device::native_vector_width_float>() <<
'\n';
324 if(device.has(sycl::aspect::fp64))
326 std::cout <<
"Preferred native vector width (double): "
327 << device.get_info<sycl::info::device::preferred_vector_width_double>() <<
'\n';
329 std::cout <<
"Native ISA vector width (double): "
330 << device.get_info<sycl::info::device::native_vector_width_double>() <<
'\n';
333 if(device.has(sycl::aspect::fp16))
335 std::cout <<
"Preferred native vector width (half): "
336 << device.get_info<sycl::info::device::preferred_vector_width_half>() <<
'\n';
338 std::cout <<
"Native ISA vector width (half): "
339 << device.get_info<sycl::info::device::native_vector_width_half>() <<
'\n';
342 std::cout <<
"Maximum clock frequency: " << device.get_info<sycl::info::device::max_clock_frequency>()
345 std::cout <<
"Address space size: " << device.get_info<sycl::info::device::address_bits>() <<
"-bit\n";
347 std::cout <<
"Maximum size of memory object allocation: "
348 << device.get_info<sycl::info::device::max_mem_alloc_size>() <<
" bytes\n";
350 if(device.has(sycl::aspect::image))
352 std::cout <<
"Maximum number of simultaneous image object reads per kernel: "
353 << device.get_info<sycl::info::device::max_read_image_args>() <<
'\n';
355 std::cout <<
"Maximum number of simultaneous image writes per kernel: "
356 << device.get_info<sycl::info::device::max_write_image_args>() <<
'\n';
358 std::cout <<
"Maximum 1D/2D image width: "
359 << device.get_info<sycl::info::device::image2d_max_width>() <<
" px\n";
361 std::cout <<
"Maximum 2D image height: "
362 << device.get_info<sycl::info::device::image2d_max_height>() <<
" px\n";
364 std::cout <<
"Maximum 3D image width: " << device.get_info<sycl::info::device::image3d_max_width>()
367 std::cout <<
"Maximum 3D image height: "
368 << device.get_info<sycl::info::device::image3d_max_height>() <<
" px\n";
370 std::cout <<
"Maximum 3D image depth: " << device.get_info<sycl::info::device::image3d_max_depth>()
373 std::cout <<
"Maximum number of samplers per kernel: "
374 << device.get_info<sycl::info::device::max_samplers>() <<
'\n';
377 std::cout <<
"Maximum kernel argument size: "
378 << device.get_info<sycl::info::device::max_parameter_size>() <<
" bytes\n";
380 std::cout <<
"Memory base address alignment: "
381 << device.get_info<sycl::info::device::mem_base_addr_align>() <<
" bit\n";
383 auto print_fp_config = [](std::string
const& fp, std::vector<sycl::info::fp_config>
const& conf)
385 std::cout << fp <<
" precision floating-point capabilities:\n";
387 auto find_and_print = [&](sycl::info::fp_config val)
389 auto it = std::find(
begin(conf),
end(conf), val);
390 std::cout << (it ==
std::end(conf) ?
"No" :
"Yes") <<
'\n';
393 std::cout <<
"\t* denorm support: ";
394 find_and_print(sycl::info::fp_config::denorm);
396 std::cout <<
"\t* INF & quiet NaN support: ";
397 find_and_print(sycl::info::fp_config::inf_nan);
399 std::cout <<
"\t* round to nearest even support: ";
400 find_and_print(sycl::info::fp_config::round_to_nearest);
402 std::cout <<
"\t* round to zero support: ";
403 find_and_print(sycl::info::fp_config::round_to_zero);
405 std::cout <<
"\t* round to infinity support: ";
406 find_and_print(sycl::info::fp_config::round_to_inf);
408 std::cout <<
"\t* IEEE754-2008 FMA support: ";
411 std::cout <<
"\t* correctly rounded divide/sqrt support: ";
412 find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt);
414 std::cout <<
"\t* software-implemented floating point operations: ";
415 find_and_print(sycl::info::fp_config::soft_float);
418 if(device.has(sycl::aspect::fp16))
420 auto const fp16_conf = device.get_info<sycl::info::device::half_fp_config>();
421 print_fp_config(
"Half", fp16_conf);
424 auto const fp32_conf = device.get_info<sycl::info::device::single_fp_config>();
425 print_fp_config(
"Single", fp32_conf);
427 if(device.has(sycl::aspect::fp64))
429 auto const fp64_conf = device.get_info<sycl::info::device::double_fp_config>();
430 print_fp_config(
"Double", fp64_conf);
433 std::cout <<
"Global memory cache type: ";
434 auto has_global_mem_cache =
false;
435 switch(device.get_info<sycl::info::device::global_mem_cache_type>())
437 case sycl::info::global_mem_cache_type::none:
441 case sycl::info::global_mem_cache_type::read_only:
442 std::cout <<
"read-only";
443 has_global_mem_cache =
true;
446 case sycl::info::global_mem_cache_type::read_write:
447 std::cout <<
"read-write";
448 has_global_mem_cache =
true;
453 if(has_global_mem_cache)
455 std::cout <<
"Global memory cache line size: "
456 << device.get_info<sycl::info::device::global_mem_cache_line_size>() <<
" bytes\n";
458 std::cout <<
"Global memory cache size: "
459 << device.get_info<sycl::info::device::global_mem_cache_size>() / KiB <<
" KiB\n";
462 std::cout <<
"Global memory size: " << device.get_info<sycl::info::device::global_mem_size>() / MiB
463 <<
" MiB" << std::endl;
465 std::cout <<
"Local memory type: ";
466 auto has_local_memory =
false;
467 switch(device.get_info<sycl::info::device::local_mem_type>())
469 case sycl::info::local_mem_type::none:
473 case sycl::info::local_mem_type::local:
474 std::cout <<
"local";
475 has_local_memory =
true;
478 case sycl::info::local_mem_type::global:
479 std::cout <<
"global";
480 has_local_memory =
true;
486 std::cout <<
"Local memory size: " << device.get_info<sycl::info::device::local_mem_size>() / KiB
489 std::cout <<
"Error correction support: "
490 << (device.get_info<sycl::info::device::error_correction_support>() ?
"Yes" :
"No") <<
'\n';
492 auto print_memory_orders = [](std::vector<sycl::memory_order>
const& mem_orders)
494 for(
auto const& cap : mem_orders)
498 case sycl::memory_order::relaxed:
499 std::cout <<
"relaxed";
502 case sycl::memory_order::acquire:
503 std::cout <<
"acquire";
506 case sycl::memory_order::release:
507 std::cout <<
"release";
510 case sycl::memory_order::acq_rel:
511 std::cout <<
"acq_rel";
514 case sycl::memory_order::seq_cst:
515 std::cout <<
"seq_cst";
517 # if defined(BOOST_COMP_ICPX)
519 case sycl::memory_order::__consume_unsupported:
528 std::cout <<
"Supported memory orderings for atomic operations: ";
529 auto const mem_orders = device.get_info<sycl::info::device::atomic_memory_order_capabilities>();
530 print_memory_orders(mem_orders);
532 # if defined(BOOST_COMP_ICPX)
533 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
535 std::cout <<
"Supported memory orderings for sycl::atomic_fence: ";
536 auto const fence_orders = device.get_info<sycl::info::device::atomic_fence_order_capabilities>();
537 print_memory_orders(fence_orders);
541 auto print_memory_scopes = [](std::vector<sycl::memory_scope>
const& mem_scopes)
543 for(
auto const& cap : mem_scopes)
547 case sycl::memory_scope::work_item:
548 std::cout <<
"work-item";
551 case sycl::memory_scope::sub_group:
552 std::cout <<
"sub-group";
555 case sycl::memory_scope::work_group:
556 std::cout <<
"work-group";
559 case sycl::memory_scope::device:
560 std::cout <<
"device";
563 case sycl::memory_scope::system:
564 std::cout <<
"system";
572 std::cout <<
"Supported memory scopes for atomic operations: ";
573 auto const mem_scopes = device.get_info<sycl::info::device::atomic_memory_scope_capabilities>();
574 print_memory_scopes(mem_scopes);
576 # if defined(BOOST_COMP_ICPX)
577 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
579 std::cout <<
"Supported memory scopes for sycl::atomic_fence: ";
580 auto const fence_scopes = device.get_info<sycl::info::device::atomic_fence_scope_capabilities>();
581 print_memory_scopes(fence_scopes);
585 std::cout <<
"Device timer resolution: "
586 << device.get_info<sycl::info::device::profiling_timer_resolution>() <<
" ns\n";
588 std::cout <<
"Built-in kernels: ";
589 auto const builtins = device.get_info<sycl::info::device::built_in_kernel_ids>();
590 for(
auto const& b : builtins)
591 std::cout << b.get_name() <<
", ";
594 std::cout <<
"Maximum number of subdevices: ";
595 auto const max_subs = device.get_info<sycl::info::device::partition_max_sub_devices>();
596 std::cout << max_subs <<
'\n';
600 std::cout <<
"Supported partition properties: ";
601 auto const part_props = device.get_info<sycl::info::device::partition_properties>();
602 auto has_affinity_domains =
false;
603 for(
auto const& prop : part_props)
607 case sycl::info::partition_property::no_partition:
608 std::cout <<
"no partition";
611 case sycl::info::partition_property::partition_equally:
612 std::cout <<
"equally";
615 case sycl::info::partition_property::partition_by_counts:
616 std::cout <<
"by counts";
619 case sycl::info::partition_property::partition_by_affinity_domain:
620 std::cout <<
"by affinity domain";
621 has_affinity_domains =
true;
623 # if defined(BOOST_COMP_ICPX)
624 case sycl::info::partition_property::ext_intel_partition_by_cslice:
625 std::cout <<
"by compute slice (Intel extension; deprecated)";
633 if(has_affinity_domains)
635 std::cout <<
"Supported partition affinity domains: ";
636 auto const aff_doms = device.get_info<sycl::info::device::partition_affinity_domains>();
637 for(
auto const& dom : aff_doms)
641 case sycl::info::partition_affinity_domain::not_applicable:
642 std::cout <<
"not applicable";
645 case sycl::info::partition_affinity_domain::numa:
649 case sycl::info::partition_affinity_domain::L4_cache:
650 std::cout <<
"L4 cache";
653 case sycl::info::partition_affinity_domain::L3_cache:
654 std::cout <<
"L3 cache";
657 case sycl::info::partition_affinity_domain::L2_cache:
658 std::cout <<
"L2 cache";
661 case sycl::info::partition_affinity_domain::L1_cache:
662 std::cout <<
"L1 cache";
665 case sycl::info::partition_affinity_domain::next_partitionable:
666 std::cout <<
"next partitionable";
674 std::cout <<
"Current partition property: ";
675 switch(device.get_info<sycl::info::device::partition_type_property>())
677 case sycl::info::partition_property::no_partition:
678 std::cout <<
"no partition";
681 case sycl::info::partition_property::partition_equally:
682 std::cout <<
"partitioned equally";
685 case sycl::info::partition_property::partition_by_counts:
686 std::cout <<
"partitioned by counts";
689 case sycl::info::partition_property::partition_by_affinity_domain:
690 std::cout <<
"partitioned by affinity domain";
693 # if defined(BOOST_COMP_ICPX)
694 case sycl::info::partition_property::ext_intel_partition_by_cslice:
695 std::cout <<
"partitioned by compute slice (Intel extension; deprecated)";
701 std::cout <<
"Current partition affinity domain: ";
702 switch(device.get_info<sycl::info::device::partition_type_affinity_domain>())
704 case sycl::info::partition_affinity_domain::not_applicable:
705 std::cout <<
"not applicable";
708 case sycl::info::partition_affinity_domain::numa:
712 case sycl::info::partition_affinity_domain::L4_cache:
713 std::cout <<
"L4 cache";
716 case sycl::info::partition_affinity_domain::L3_cache:
717 std::cout <<
"L3 cache";
720 case sycl::info::partition_affinity_domain::L2_cache:
721 std::cout <<
"L2 cache";
724 case sycl::info::partition_affinity_domain::L1_cache:
725 std::cout <<
"L1 cache";
728 case sycl::info::partition_affinity_domain::next_partitionable:
729 std::cout <<
"next partitionable";
742 # if BOOST_COMP_CLANG
743 # pragma clang diagnostic pop
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto fma(T const &fma_ctx, Tx const &x, Ty const &y, Tz const &z)
Computes x * y + z as if to infinite precision and rounded only once to fit the result type.
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
The alpaka accelerator library.
ALPAKA_FN_HOST auto getDevCount(TPlatform const &platform)
ALPAKA_FN_HOST auto getDevByIdx(TPlatform const &platform, std::size_t const &devIdx) -> Dev< TPlatform >