15#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
22#ifdef ALPAKA_ACC_SYCL_ENABLED
25# pragma clang diagnostic push
26# pragma clang diagnostic ignored "-Wswitch-default"
29# include <sycl/sycl.hpp>
35 template<concepts::Tag TTag>
36 struct SYCLDeviceSelector;
40 template<concepts::Tag TTag>
41 struct PlatformGenericSycl : interface::Implements<ConceptPlatform, PlatformGenericSycl<TTag>>
47 m_platform = sycl::platform(detail::SYCLDeviceSelector<TTag>{});
48 m_devices = m_platform->get_devices();
49 m_context = sycl::context{
51 [](sycl::exception_list exceptions)
53 auto ss_err = std::stringstream{};
54 ss_err <<
"Caught asynchronous SYCL exception(s):\n";
55 for(std::exception_ptr e : exceptions)
59 std::rethrow_exception(e);
61 catch(sycl::exception
const& err)
63 ss_err << err.what() <<
" (" << err.code() <<
")\n";
66 throw std::runtime_error(ss_err.str());
69 catch(sycl::exception
const&)
80 [[nodiscard]]
auto syclPlatform() -> sycl::platform&
82 if(not m_platform.has_value())
83 throw std::runtime_error(
"The underlying SYCL platform is empty and invalid.");
85 return m_platform.value();
88 [[nodiscard]]
auto syclPlatform() const -> sycl::platform const&
90 if(not m_platform.has_value())
91 throw std::runtime_error(
"The underlying SYCL platform is empty and invalid.");
93 return m_platform.value();
96 [[nodiscard]]
auto syclDevices() -> std::vector<sycl::device>&
101 [[nodiscard]]
auto syclDevices() const ->
std::vector<sycl::device> const&
106 [[nodiscard]]
auto syclContext() -> sycl::context&
108 if(not m_context.has_value())
109 throw std::runtime_error(
"The underlying SYCL platform is empty and invalid.");
111 return m_context.value();
114 [[nodiscard]]
auto syclContext() const -> sycl::context const&
116 if(not m_context.has_value())
117 throw std::runtime_error(
"The underlying SYCL platform is empty and invalid.");
119 return m_context.value();
123 std::optional<sycl::platform> m_platform;
124 std::vector<sycl::device> m_devices;
125 std::optional<sycl::context> m_context;
131 template<concepts::Tag TTag>
132 struct DevType<PlatformGenericSycl<TTag>>
134 using type = DevGenericSycl<TTag>;
138 template<concepts::Tag TTag>
139 struct GetDevCount<PlatformGenericSycl<TTag>>
141 static auto getDevCount(PlatformGenericSycl<TTag>
const& platform) -> std::size_t
145 return platform.syclDevices().size();
150 template<concepts::Tag TTag>
151 struct GetDevByIdx<PlatformGenericSycl<TTag>>
153 static auto getDevByIdx(PlatformGenericSycl<TTag>
const& platform, std::size_t
const& devIdx)
157 auto const& devices = platform.syclDevices();
158 if(devIdx >= devices.size())
160 auto ss_err = std::stringstream{};
161 ss_err <<
"Unable to return device handle for device " << devIdx <<
". There are only "
162 << devices.size() <<
" SYCL devices!";
163 throw std::runtime_error(ss_err.str());
166 auto sycl_dev = devices.at(devIdx);
169# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
170 printDeviceProperties(sycl_dev);
171# elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
172 std::cout << __func__ << sycl_dev.template get_info<sycl::info::device::name>() <<
'\n';
174 using SyclPlatform = alpaka::PlatformGenericSycl<TTag>;
175 return typename DevType<SyclPlatform>::type{sycl_dev, platform.syclContext()};
179# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
181 static auto printDeviceProperties(sycl::device
const& device) ->
void
185 constexpr auto KiB = std::size_t{1024};
186 constexpr auto MiB = KiB * KiB;
188 std::cout <<
"Device type: ";
189 switch(device.get_info<sycl::info::device::device_type>())
191 case sycl::info::device_type::cpu:
195 case sycl::info::device_type::gpu:
199 case sycl::info::device_type::accelerator:
200 std::cout <<
"Accelerator";
203 case sycl::info::device_type::custom:
204 std::cout <<
"Custom";
207 case sycl::info::device_type::automatic:
208 std::cout <<
"Automatic";
211 case sycl::info::device_type::host:
218 case sycl::info::device_type::all:
224 std::cout <<
"Name: " << device.get_info<sycl::info::device::name>() <<
'\n';
226 std::cout <<
"Vendor: " << device.get_info<sycl::info::device::vendor>() <<
'\n';
228 std::cout <<
"Vendor ID: " << device.get_info<sycl::info::device::vendor_id>() <<
'\n';
230 std::cout <<
"Driver version: " << device.get_info<sycl::info::device::driver_version>() <<
'\n';
232 std::cout <<
"SYCL version: " << device.get_info<sycl::info::device::version>() <<
'\n';
234# if !defined(ALPAKA_COMP_ICPX)
236 std::cout <<
"Backend version: " << device.get_info<sycl::info::device::backend_version>() <<
'\n';
239 std::cout <<
"Aspects: " <<
'\n';
241# if defined(ALPAKA_COMP_ICPX)
242# if ALPAKA_COMP_ICPX >= ALPAKA_VERSION_NUMBER(53, 2, 0)
244 if(device.has(sycl::aspect::emulated))
245 std::cout <<
"\t* emulated\n";
247 if(device.has(sycl::aspect::host_debuggable))
248 std::cout <<
"\t* debuggable using standard debuggers\n";
252 if(device.has(sycl::aspect::fp16))
253 std::cout <<
"\t* supports sycl::half precision\n";
255 if(device.has(sycl::aspect::fp64))
256 std::cout <<
"\t* supports double precision\n";
258 if(device.has(sycl::aspect::atomic64))
259 std::cout <<
"\t* supports 64-bit atomics\n";
261 if(device.has(sycl::aspect::image))
262 std::cout <<
"\t* supports images\n";
264 if(device.has(sycl::aspect::online_compiler))
265 std::cout <<
"\t* supports online compilation of device code\n";
267 if(device.has(sycl::aspect::online_linker))
268 std::cout <<
"\t* supports online linking of device code\n";
270 if(device.has(sycl::aspect::queue_profiling))
271 std::cout <<
"\t* supports queue profiling\n";
273 if(device.has(sycl::aspect::usm_device_allocations))
274 std::cout <<
"\t* supports explicit USM allocations\n";
276 if(device.has(sycl::aspect::usm_host_allocations))
277 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host\n";
279 if(device.has(sycl::aspect::usm_atomic_host_allocations))
280 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n";
282 if(device.has(sycl::aspect::usm_shared_allocations))
283 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared\n";
285 if(device.has(sycl::aspect::usm_atomic_shared_allocations))
286 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n";
288 if(device.has(sycl::aspect::usm_system_allocations))
289 std::cout <<
"\t* can access memory allocated by the system allocator\n";
291 std::cout <<
"Available compute units: " << device.get_info<sycl::info::device::max_compute_units>()
294 std::cout <<
"Maximum work item dimensions: ";
295 auto dims = device.get_info<sycl::info::device::max_work_item_dimensions>();
296 std::cout << dims << std::endl;
298 std::cout <<
"Maximum number of work items:\n";
299 auto const wi_1D = device.get_info<sycl::info::device::max_work_item_sizes<1>>();
300 auto const wi_2D = device.get_info<sycl::info::device::max_work_item_sizes<2>>();
301 auto const wi_3D = device.get_info<sycl::info::device::max_work_item_sizes<3>>();
302 std::cout <<
"\t* 1D: (" << wi_1D.get(0) <<
")\n";
303 std::cout <<
"\t* 2D: (" << wi_2D.get(0) <<
", " << wi_2D.get(1) <<
")\n";
304 std::cout <<
"\t* 3D: (" << wi_3D.get(0) <<
", " << wi_3D.get(1) <<
", " << wi_3D.get(2) <<
")\n";
306 std::cout <<
"Maximum number of work items per work-group: "
307 << device.get_info<sycl::info::device::max_work_group_size>() <<
'\n';
309 std::cout <<
"Maximum number of sub-groups per work-group: "
310 << device.get_info<sycl::info::device::max_num_sub_groups>() <<
'\n';
312 std::cout <<
"Supported sub-group sizes: ";
313 auto const sg_sizes = device.get_info<sycl::info::device::sub_group_sizes>();
314 for(
auto const& sz : sg_sizes)
315 std::cout << sz <<
", ";
318 std::cout <<
"Preferred native vector width (char): "
319 << device.get_info<sycl::info::device::preferred_vector_width_char>() <<
'\n';
321 std::cout <<
"Native ISA vector width (char): "
322 << device.get_info<sycl::info::device::native_vector_width_char>() <<
'\n';
324 std::cout <<
"Preferred native vector width (short): "
325 << device.get_info<sycl::info::device::preferred_vector_width_short>() <<
'\n';
327 std::cout <<
"Native ISA vector width (short): "
328 << device.get_info<sycl::info::device::native_vector_width_short>() <<
'\n';
330 std::cout <<
"Preferred native vector width (int): "
331 << device.get_info<sycl::info::device::preferred_vector_width_int>() <<
'\n';
333 std::cout <<
"Native ISA vector width (int): "
334 << device.get_info<sycl::info::device::native_vector_width_int>() <<
'\n';
336 std::cout <<
"Preferred native vector width (long): "
337 << device.get_info<sycl::info::device::preferred_vector_width_long>() <<
'\n';
339 std::cout <<
"Native ISA vector width (long): "
340 << device.get_info<sycl::info::device::native_vector_width_long>() <<
'\n';
342 std::cout <<
"Preferred native vector width (float): "
343 << device.get_info<sycl::info::device::preferred_vector_width_float>() <<
'\n';
345 std::cout <<
"Native ISA vector width (float): "
346 << device.get_info<sycl::info::device::native_vector_width_float>() <<
'\n';
348 if(device.has(sycl::aspect::fp64))
350 std::cout <<
"Preferred native vector width (double): "
351 << device.get_info<sycl::info::device::preferred_vector_width_double>() <<
'\n';
353 std::cout <<
"Native ISA vector width (double): "
354 << device.get_info<sycl::info::device::native_vector_width_double>() <<
'\n';
357 if(device.has(sycl::aspect::fp16))
359 std::cout <<
"Preferred native vector width (half): "
360 << device.get_info<sycl::info::device::preferred_vector_width_half>() <<
'\n';
362 std::cout <<
"Native ISA vector width (half): "
363 << device.get_info<sycl::info::device::native_vector_width_half>() <<
'\n';
366 std::cout <<
"Maximum clock frequency: " << device.get_info<sycl::info::device::max_clock_frequency>()
369 std::cout <<
"Address space size: " << device.get_info<sycl::info::device::address_bits>() <<
"-bit\n";
371 std::cout <<
"Maximum size of memory object allocation: "
372 << device.get_info<sycl::info::device::max_mem_alloc_size>() <<
" bytes\n";
374 if(device.has(sycl::aspect::image))
376 std::cout <<
"Maximum number of simultaneous image object reads per kernel: "
377 << device.get_info<sycl::info::device::max_read_image_args>() <<
'\n';
379 std::cout <<
"Maximum number of simultaneous image writes per kernel: "
380 << device.get_info<sycl::info::device::max_write_image_args>() <<
'\n';
382 std::cout <<
"Maximum 1D/2D image width: "
383 << device.get_info<sycl::info::device::image2d_max_width>() <<
" px\n";
385 std::cout <<
"Maximum 2D image height: "
386 << device.get_info<sycl::info::device::image2d_max_height>() <<
" px\n";
388 std::cout <<
"Maximum 3D image width: " << device.get_info<sycl::info::device::image3d_max_width>()
391 std::cout <<
"Maximum 3D image height: "
392 << device.get_info<sycl::info::device::image3d_max_height>() <<
" px\n";
394 std::cout <<
"Maximum 3D image depth: " << device.get_info<sycl::info::device::image3d_max_depth>()
397 std::cout <<
"Maximum number of samplers per kernel: "
398 << device.get_info<sycl::info::device::max_samplers>() <<
'\n';
401 std::cout <<
"Maximum kernel argument size: "
402 << device.get_info<sycl::info::device::max_parameter_size>() <<
" bytes\n";
404 std::cout <<
"Memory base address alignment: "
405 << device.get_info<sycl::info::device::mem_base_addr_align>() <<
" bit\n";
407 auto print_fp_config = [](std::string
const& fp, std::vector<sycl::info::fp_config>
const& conf)
409 std::cout << fp <<
" precision floating-point capabilities:\n";
411 auto find_and_print = [&](sycl::info::fp_config val)
413 auto it = std::find(
begin(conf),
end(conf), val);
414 std::cout << (it == std::end(conf) ?
"No" :
"Yes") <<
'\n';
417 std::cout <<
"\t* denorm support: ";
418 find_and_print(sycl::info::fp_config::denorm);
420 std::cout <<
"\t* INF & quiet NaN support: ";
421 find_and_print(sycl::info::fp_config::inf_nan);
423 std::cout <<
"\t* round to nearest even support: ";
424 find_and_print(sycl::info::fp_config::round_to_nearest);
426 std::cout <<
"\t* round to zero support: ";
427 find_and_print(sycl::info::fp_config::round_to_zero);
429 std::cout <<
"\t* round to infinity support: ";
430 find_and_print(sycl::info::fp_config::round_to_inf);
432 std::cout <<
"\t* IEEE754-2008 FMA support: ";
433 find_and_print(sycl::info::fp_config::fma);
435 std::cout <<
"\t* correctly rounded divide/sqrt support: ";
436 find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt);
438 std::cout <<
"\t* software-implemented floating point operations: ";
439 find_and_print(sycl::info::fp_config::soft_float);
442 if(device.has(sycl::aspect::fp16))
444 auto const fp16_conf = device.get_info<sycl::info::device::half_fp_config>();
445 print_fp_config(
"Half", fp16_conf);
448 auto const fp32_conf = device.get_info<sycl::info::device::single_fp_config>();
449 print_fp_config(
"Single", fp32_conf);
451 if(device.has(sycl::aspect::fp64))
453 auto const fp64_conf = device.get_info<sycl::info::device::double_fp_config>();
454 print_fp_config(
"Double", fp64_conf);
457 std::cout <<
"Global memory cache type: ";
458 auto has_global_mem_cache =
false;
459 switch(device.get_info<sycl::info::device::global_mem_cache_type>())
461 case sycl::info::global_mem_cache_type::none:
465 case sycl::info::global_mem_cache_type::read_only:
466 std::cout <<
"read-only";
467 has_global_mem_cache =
true;
470 case sycl::info::global_mem_cache_type::read_write:
471 std::cout <<
"read-write";
472 has_global_mem_cache =
true;
477 if(has_global_mem_cache)
479 std::cout <<
"Global memory cache line size: "
480 << device.get_info<sycl::info::device::global_mem_cache_line_size>() <<
" bytes\n";
482 std::cout <<
"Global memory cache size: "
483 << device.get_info<sycl::info::device::global_mem_cache_size>() / KiB <<
" KiB\n";
486 std::cout <<
"Global memory size: " << device.get_info<sycl::info::device::global_mem_size>() / MiB
487 <<
" MiB" << std::endl;
489 std::cout <<
"Local memory type: ";
490 auto has_local_memory =
false;
491 switch(device.get_info<sycl::info::device::local_mem_type>())
493 case sycl::info::local_mem_type::none:
497 case sycl::info::local_mem_type::local:
498 std::cout <<
"local";
499 has_local_memory =
true;
502 case sycl::info::local_mem_type::global:
503 std::cout <<
"global";
504 has_local_memory =
true;
510 std::cout <<
"Local memory size: " << device.get_info<sycl::info::device::local_mem_size>() / KiB
513 std::cout <<
"Error correction support: "
514 << (device.get_info<sycl::info::device::error_correction_support>() ?
"Yes" :
"No") <<
'\n';
516 auto print_memory_orders = [](std::vector<sycl::memory_order>
const& mem_orders)
518 for(
auto const& cap : mem_orders)
522 case sycl::memory_order::relaxed:
523 std::cout <<
"relaxed";
526 case sycl::memory_order::acquire:
527 std::cout <<
"acquire";
530 case sycl::memory_order::release:
531 std::cout <<
"release";
534 case sycl::memory_order::acq_rel:
535 std::cout <<
"acq_rel";
538 case sycl::memory_order::seq_cst:
539 std::cout <<
"seq_cst";
541# if defined(ALPAKA_COMP_ICPX)
543 case sycl::memory_order::__consume_unsupported:
552 std::cout <<
"Supported memory orderings for atomic operations: ";
553 auto const mem_orders = device.get_info<sycl::info::device::atomic_memory_order_capabilities>();
554 print_memory_orders(mem_orders);
556# if defined(ALPAKA_COMP_ICPX)
557# if ALPAKA_COMP_ICPX >= ALPAKA_VERSION_NUMBER(53, 2, 0)
559 std::cout <<
"Supported memory orderings for sycl::atomic_fence: ";
560 auto const fence_orders = device.get_info<sycl::info::device::atomic_fence_order_capabilities>();
561 print_memory_orders(fence_orders);
565 auto print_memory_scopes = [](std::vector<sycl::memory_scope>
const& mem_scopes)
567 for(
auto const& cap : mem_scopes)
571 case sycl::memory_scope::work_item:
572 std::cout <<
"work-item";
575 case sycl::memory_scope::sub_group:
576 std::cout <<
"sub-group";
579 case sycl::memory_scope::work_group:
580 std::cout <<
"work-group";
583 case sycl::memory_scope::device:
584 std::cout <<
"device";
587 case sycl::memory_scope::system:
588 std::cout <<
"system";
596 std::cout <<
"Supported memory scopes for atomic operations: ";
597 auto const mem_scopes = device.get_info<sycl::info::device::atomic_memory_scope_capabilities>();
598 print_memory_scopes(mem_scopes);
600# if defined(ALPAKA_COMP_ICPX)
601# if ALPAKA_COMP_ICPX >= ALPAKA_VERSION_NUMBER(53, 2, 0)
603 std::cout <<
"Supported memory scopes for sycl::atomic_fence: ";
604 auto const fence_scopes = device.get_info<sycl::info::device::atomic_fence_scope_capabilities>();
605 print_memory_scopes(fence_scopes);
609 std::cout <<
"Device timer resolution: "
610 << device.get_info<sycl::info::device::profiling_timer_resolution>() <<
" ns\n";
612 std::cout <<
"Built-in kernels: ";
613 auto const builtins = device.get_info<sycl::info::device::built_in_kernel_ids>();
614 for(
auto const& b : builtins)
615 std::cout << b.get_name() <<
", ";
618 std::cout <<
"Maximum number of subdevices: ";
619 auto const max_subs = device.get_info<sycl::info::device::partition_max_sub_devices>();
620 std::cout << max_subs <<
'\n';
624 std::cout <<
"Supported partition properties: ";
625 auto const part_props = device.get_info<sycl::info::device::partition_properties>();
626 auto has_affinity_domains =
false;
627 for(
auto const& prop : part_props)
631 case sycl::info::partition_property::no_partition:
632 std::cout <<
"no partition";
635 case sycl::info::partition_property::partition_equally:
636 std::cout <<
"equally";
639 case sycl::info::partition_property::partition_by_counts:
640 std::cout <<
"by counts";
643 case sycl::info::partition_property::partition_by_affinity_domain:
644 std::cout <<
"by affinity domain";
645 has_affinity_domains =
true;
647# if defined(ALPAKA_COMP_ICPX)
648 case sycl::info::partition_property::ext_intel_partition_by_cslice:
649 std::cout <<
"by compute slice (Intel extension; deprecated)";
657 if(has_affinity_domains)
659 std::cout <<
"Supported partition affinity domains: ";
660 auto const aff_doms = device.get_info<sycl::info::device::partition_affinity_domains>();
661 for(
auto const& dom : aff_doms)
665 case sycl::info::partition_affinity_domain::not_applicable:
666 std::cout <<
"not applicable";
669 case sycl::info::partition_affinity_domain::numa:
673 case sycl::info::partition_affinity_domain::L4_cache:
674 std::cout <<
"L4 cache";
677 case sycl::info::partition_affinity_domain::L3_cache:
678 std::cout <<
"L3 cache";
681 case sycl::info::partition_affinity_domain::L2_cache:
682 std::cout <<
"L2 cache";
685 case sycl::info::partition_affinity_domain::L1_cache:
686 std::cout <<
"L1 cache";
689 case sycl::info::partition_affinity_domain::next_partitionable:
690 std::cout <<
"next partitionable";
698 std::cout <<
"Current partition property: ";
699 switch(device.get_info<sycl::info::device::partition_type_property>())
701 case sycl::info::partition_property::no_partition:
702 std::cout <<
"no partition";
705 case sycl::info::partition_property::partition_equally:
706 std::cout <<
"partitioned equally";
709 case sycl::info::partition_property::partition_by_counts:
710 std::cout <<
"partitioned by counts";
713 case sycl::info::partition_property::partition_by_affinity_domain:
714 std::cout <<
"partitioned by affinity domain";
717# if defined(ALPAKA_COMP_ICPX)
718 case sycl::info::partition_property::ext_intel_partition_by_cslice:
719 std::cout <<
"partitioned by compute slice (Intel extension; deprecated)";
725 std::cout <<
"Current partition affinity domain: ";
726 switch(device.get_info<sycl::info::device::partition_type_affinity_domain>())
728 case sycl::info::partition_affinity_domain::not_applicable:
729 std::cout <<
"not applicable";
732 case sycl::info::partition_affinity_domain::numa:
736 case sycl::info::partition_affinity_domain::L4_cache:
737 std::cout <<
"L4 cache";
740 case sycl::info::partition_affinity_domain::L3_cache:
741 std::cout <<
"L3 cache";
744 case sycl::info::partition_affinity_domain::L2_cache:
745 std::cout <<
"L2 cache";
748 case sycl::info::partition_affinity_domain::L1_cache:
749 std::cout <<
"L1 cache";
752 case sycl::info::partition_affinity_domain::next_partitionable:
753 std::cout <<
"next partitionable";
766# if ALPAKA_COMP_CLANG
767# pragma clang diagnostic pop
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
The alpaka accelerator library.
ALPAKA_FN_HOST auto getDevCount(TPlatform const &platform)
ALPAKA_FN_HOST auto getDevByIdx(TPlatform const &platform, std::size_t const &devIdx) -> Dev< TPlatform >