14 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
21 #ifdef ALPAKA_ACC_SYCL_ENABLED
23 # include <sycl/sycl.hpp>
28 template<
typename TSelector>
29 struct PlatformGenericSycl : concepts::Implements<ConceptPlatform, PlatformGenericSycl<TSelector>>
32 : platform{TSelector{}}
33 , devices(platform.get_devices())
34 , context{sycl::context{
36 [](sycl::exception_list exceptions)
38 auto ss_err = std::stringstream{};
39 ss_err <<
"Caught asynchronous SYCL exception(s):\n";
40 for(std::exception_ptr
e : exceptions)
44 std::rethrow_exception(
e);
46 catch(sycl::exception
const& err)
48 ss_err << err.what() <<
" (" << err.code() <<
")\n";
51 throw std::runtime_error(ss_err.str());
56 [[nodiscard]]
auto syclPlatform() -> sycl::platform&
61 [[nodiscard]]
auto syclPlatform() const -> sycl::platform const&
66 [[nodiscard]]
auto syclDevices() -> std::vector<sycl::device>&
71 [[nodiscard]]
auto syclDevices() const -> std::vector<sycl::device> const&
76 [[nodiscard]]
auto syclContext() -> sycl::context&
81 [[nodiscard]]
auto syclContext() const -> sycl::context const&
87 sycl::platform platform;
88 std::vector<sycl::device> devices;
89 sycl::context context;
96 template<
typename TSelector>
97 struct GetDevCount<PlatformGenericSycl<TSelector>>
99 static auto getDevCount(PlatformGenericSycl<TSelector>
const& platform) -> std::size_t
103 return platform.syclDevices().size();
108 template<
typename TSelector>
109 struct GetDevByIdx<
alpaka::PlatformGenericSycl<TSelector>>
111 static auto getDevByIdx(PlatformGenericSycl<TSelector>
const& platform, std::size_t
const& devIdx)
115 auto const& devices = platform.syclDevices();
116 if(devIdx >= devices.size())
118 auto ss_err = std::stringstream{};
119 ss_err <<
"Unable to return device handle for device " << devIdx <<
". There are only "
120 << devices.size() <<
" SYCL devices!";
121 throw std::runtime_error(ss_err.str());
124 auto sycl_dev = devices.at(devIdx);
127 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
128 printDeviceProperties(sycl_dev);
129 # elif ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
130 std::cout << __func__ << sycl_dev.template get_info<sycl::info::device::name>() <<
'\n';
132 using SyclPlatform = alpaka::PlatformGenericSycl<TSelector>;
133 return typename DevType<SyclPlatform>::type{sycl_dev, platform.syclContext()};
137 # if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
139 static auto printDeviceProperties(sycl::device
const& device) ->
void
143 constexpr
auto KiB = std::size_t{1024};
144 constexpr
auto MiB = KiB * KiB;
146 std::cout <<
"Device type: ";
147 switch(device.get_info<sycl::info::device::device_type>())
149 case sycl::info::device_type::cpu:
153 case sycl::info::device_type::gpu:
157 case sycl::info::device_type::accelerator:
158 std::cout <<
"Accelerator";
161 case sycl::info::device_type::custom:
162 std::cout <<
"Custom";
165 case sycl::info::device_type::automatic:
166 std::cout <<
"Automatic";
169 case sycl::info::device_type::host:
182 std::cout <<
"Name: " << device.get_info<sycl::info::device::name>() <<
'\n';
184 std::cout <<
"Vendor: " << device.get_info<sycl::info::device::vendor>() <<
'\n';
186 std::cout <<
"Vendor ID: " << device.get_info<sycl::info::device::vendor_id>() <<
'\n';
188 std::cout <<
"Driver version: " << device.get_info<sycl::info::device::driver_version>() <<
'\n';
190 std::cout <<
"SYCL version: " << device.get_info<sycl::info::device::version>() <<
'\n';
192 # if !defined(BOOST_COMP_ICPX)
194 std::cout <<
"Backend version: " << device.get_info<sycl::info::device::backend_version>() <<
'\n';
197 std::cout <<
"Aspects: " <<
'\n';
199 # if defined(BOOST_COMP_ICPX)
200 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
202 if(device.has(sycl::aspect::emulated))
203 std::cout <<
"\t* emulated\n";
205 if(device.has(sycl::aspect::host_debuggable))
206 std::cout <<
"\t* debuggable using standard debuggers\n";
210 if(device.has(sycl::aspect::fp16))
211 std::cout <<
"\t* supports sycl::half precision\n";
213 if(device.has(sycl::aspect::fp64))
214 std::cout <<
"\t* supports double precision\n";
216 if(device.has(sycl::aspect::atomic64))
217 std::cout <<
"\t* supports 64-bit atomics\n";
219 if(device.has(sycl::aspect::image))
220 std::cout <<
"\t* supports images\n";
222 if(device.has(sycl::aspect::online_compiler))
223 std::cout <<
"\t* supports online compilation of device code\n";
225 if(device.has(sycl::aspect::online_linker))
226 std::cout <<
"\t* supports online linking of device code\n";
228 if(device.has(sycl::aspect::queue_profiling))
229 std::cout <<
"\t* supports queue profiling\n";
231 if(device.has(sycl::aspect::usm_device_allocations))
232 std::cout <<
"\t* supports explicit USM allocations\n";
234 if(device.has(sycl::aspect::usm_host_allocations))
235 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host\n";
237 if(device.has(sycl::aspect::usm_atomic_host_allocations))
238 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::host atomically\n";
240 if(device.has(sycl::aspect::usm_shared_allocations))
241 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared\n";
243 if(device.has(sycl::aspect::usm_atomic_shared_allocations))
244 std::cout <<
"\t* can access USM memory allocated by sycl::usm::alloc::shared atomically\n";
246 if(device.has(sycl::aspect::usm_system_allocations))
247 std::cout <<
"\t* can access memory allocated by the system allocator\n";
249 std::cout <<
"Available compute units: " << device.get_info<sycl::info::device::max_compute_units>()
252 std::cout <<
"Maximum work item dimensions: ";
253 auto dims = device.get_info<sycl::info::device::max_work_item_dimensions>();
254 std::cout << dims << std::endl;
256 std::cout <<
"Maximum number of work items:\n";
257 auto const wi_1D = device.get_info<sycl::info::device::max_work_item_sizes<1>>();
258 auto const wi_2D = device.get_info<sycl::info::device::max_work_item_sizes<2>>();
259 auto const wi_3D = device.get_info<sycl::info::device::max_work_item_sizes<3>>();
260 std::cout <<
"\t* 1D: (" << wi_1D.get(0) <<
")\n";
261 std::cout <<
"\t* 2D: (" << wi_2D.get(0) <<
", " << wi_2D.get(1) <<
")\n";
262 std::cout <<
"\t* 3D: (" << wi_3D.get(0) <<
", " << wi_3D.get(1) <<
", " << wi_3D.get(2) <<
")\n";
264 std::cout <<
"Maximum number of work items per work-group: "
265 << device.get_info<sycl::info::device::max_work_group_size>() <<
'\n';
267 std::cout <<
"Maximum number of sub-groups per work-group: "
268 << device.get_info<sycl::info::device::max_num_sub_groups>() <<
'\n';
270 std::cout <<
"Supported sub-group sizes: ";
271 auto const sg_sizes = device.get_info<sycl::info::device::sub_group_sizes>();
272 for(
auto const& sz : sg_sizes)
273 std::cout << sz <<
", ";
276 std::cout <<
"Preferred native vector width (char): "
277 << device.get_info<sycl::info::device::preferred_vector_width_char>() <<
'\n';
279 std::cout <<
"Native ISA vector width (char): "
280 << device.get_info<sycl::info::device::native_vector_width_char>() <<
'\n';
282 std::cout <<
"Preferred native vector width (short): "
283 << device.get_info<sycl::info::device::preferred_vector_width_short>() <<
'\n';
285 std::cout <<
"Native ISA vector width (short): "
286 << device.get_info<sycl::info::device::native_vector_width_short>() <<
'\n';
288 std::cout <<
"Preferred native vector width (int): "
289 << device.get_info<sycl::info::device::preferred_vector_width_int>() <<
'\n';
291 std::cout <<
"Native ISA vector width (int): "
292 << device.get_info<sycl::info::device::native_vector_width_int>() <<
'\n';
294 std::cout <<
"Preferred native vector width (long): "
295 << device.get_info<sycl::info::device::preferred_vector_width_long>() <<
'\n';
297 std::cout <<
"Native ISA vector width (long): "
298 << device.get_info<sycl::info::device::native_vector_width_long>() <<
'\n';
300 std::cout <<
"Preferred native vector width (float): "
301 << device.get_info<sycl::info::device::preferred_vector_width_float>() <<
'\n';
303 std::cout <<
"Native ISA vector width (float): "
304 << device.get_info<sycl::info::device::native_vector_width_float>() <<
'\n';
306 if(device.has(sycl::aspect::fp64))
308 std::cout <<
"Preferred native vector width (double): "
309 << device.get_info<sycl::info::device::preferred_vector_width_double>() <<
'\n';
311 std::cout <<
"Native ISA vector width (double): "
312 << device.get_info<sycl::info::device::native_vector_width_double>() <<
'\n';
315 if(device.has(sycl::aspect::fp16))
317 std::cout <<
"Preferred native vector width (half): "
318 << device.get_info<sycl::info::device::preferred_vector_width_half>() <<
'\n';
320 std::cout <<
"Native ISA vector width (half): "
321 << device.get_info<sycl::info::device::native_vector_width_half>() <<
'\n';
324 std::cout <<
"Maximum clock frequency: " << device.get_info<sycl::info::device::max_clock_frequency>()
327 std::cout <<
"Address space size: " << device.get_info<sycl::info::device::address_bits>() <<
"-bit\n";
329 std::cout <<
"Maximum size of memory object allocation: "
330 << device.get_info<sycl::info::device::max_mem_alloc_size>() <<
" bytes\n";
332 if(device.has(sycl::aspect::image))
334 std::cout <<
"Maximum number of simultaneous image object reads per kernel: "
335 << device.get_info<sycl::info::device::max_read_image_args>() <<
'\n';
337 std::cout <<
"Maximum number of simultaneous image writes per kernel: "
338 << device.get_info<sycl::info::device::max_write_image_args>() <<
'\n';
340 std::cout <<
"Maximum 1D/2D image width: " << device.get_info<sycl::info::device::image2d_max_width>()
343 std::cout <<
"Maximum 2D image height: " << device.get_info<sycl::info::device::image2d_max_height>()
346 std::cout <<
"Maximum 3D image width: " << device.get_info<sycl::info::device::image3d_max_width>()
349 std::cout <<
"Maximum 3D image height: " << device.get_info<sycl::info::device::image3d_max_height>()
352 std::cout <<
"Maximum 3D image depth: " << device.get_info<sycl::info::device::image3d_max_depth>()
355 std::cout <<
"Maximum number of samplers per kernel: "
356 << device.get_info<sycl::info::device::max_samplers>() <<
'\n';
359 std::cout <<
"Maximum kernel argument size: " << device.get_info<sycl::info::device::max_parameter_size>()
362 std::cout <<
"Memory base address alignment: "
363 << device.get_info<sycl::info::device::mem_base_addr_align>() <<
" bit\n";
365 auto print_fp_config = [](std::string
const& fp, std::vector<sycl::info::fp_config>
const& conf)
367 std::cout << fp <<
" precision floating-point capabilities:\n";
369 auto find_and_print = [&](sycl::info::fp_config val)
371 auto it = std::find(
begin(conf),
end(conf), val);
372 std::cout << (it ==
std::end(conf) ?
"No" :
"Yes") <<
'\n';
375 std::cout <<
"\t* denorm support: ";
376 find_and_print(sycl::info::fp_config::denorm);
378 std::cout <<
"\t* INF & quiet NaN support: ";
379 find_and_print(sycl::info::fp_config::inf_nan);
381 std::cout <<
"\t* round to nearest even support: ";
382 find_and_print(sycl::info::fp_config::round_to_nearest);
384 std::cout <<
"\t* round to zero support: ";
385 find_and_print(sycl::info::fp_config::round_to_zero);
387 std::cout <<
"\t* round to infinity support: ";
388 find_and_print(sycl::info::fp_config::round_to_inf);
390 std::cout <<
"\t* IEEE754-2008 FMA support: ";
393 std::cout <<
"\t* correctly rounded divide/sqrt support: ";
394 find_and_print(sycl::info::fp_config::correctly_rounded_divide_sqrt);
396 std::cout <<
"\t* software-implemented floating point operations: ";
397 find_and_print(sycl::info::fp_config::soft_float);
400 if(device.has(sycl::aspect::fp16))
402 auto const fp16_conf = device.get_info<sycl::info::device::half_fp_config>();
403 print_fp_config(
"Half", fp16_conf);
406 auto const fp32_conf = device.get_info<sycl::info::device::single_fp_config>();
407 print_fp_config(
"Single", fp32_conf);
409 if(device.has(sycl::aspect::fp64))
411 auto const fp64_conf = device.get_info<sycl::info::device::double_fp_config>();
412 print_fp_config(
"Double", fp64_conf);
415 std::cout <<
"Global memory cache type: ";
416 auto has_global_mem_cache =
false;
417 switch(device.get_info<sycl::info::device::global_mem_cache_type>())
419 case sycl::info::global_mem_cache_type::none:
423 case sycl::info::global_mem_cache_type::read_only:
424 std::cout <<
"read-only";
425 has_global_mem_cache =
true;
428 case sycl::info::global_mem_cache_type::read_write:
429 std::cout <<
"read-write";
430 has_global_mem_cache =
true;
435 if(has_global_mem_cache)
437 std::cout <<
"Global memory cache line size: "
438 << device.get_info<sycl::info::device::global_mem_cache_line_size>() <<
" bytes\n";
440 std::cout <<
"Global memory cache size: "
441 << device.get_info<sycl::info::device::global_mem_cache_size>() / KiB <<
" KiB\n";
444 std::cout <<
"Global memory size: " << device.get_info<sycl::info::device::global_mem_size>() / MiB
445 <<
" MiB" << std::endl;
447 std::cout <<
"Local memory type: ";
448 auto has_local_memory =
false;
449 switch(device.get_info<sycl::info::device::local_mem_type>())
451 case sycl::info::local_mem_type::none:
455 case sycl::info::local_mem_type::local:
456 std::cout <<
"local";
457 has_local_memory =
true;
460 case sycl::info::local_mem_type::global:
461 std::cout <<
"global";
462 has_local_memory =
true;
468 std::cout <<
"Local memory size: " << device.get_info<sycl::info::device::local_mem_size>() / KiB
471 std::cout <<
"Error correction support: "
472 << (device.get_info<sycl::info::device::error_correction_support>() ?
"Yes" :
"No") <<
'\n';
474 auto print_memory_orders = [](std::vector<sycl::memory_order>
const& mem_orders)
476 for(
auto const& cap : mem_orders)
480 case sycl::memory_order::relaxed:
481 std::cout <<
"relaxed";
484 case sycl::memory_order::acquire:
485 std::cout <<
"acquire";
488 case sycl::memory_order::release:
489 std::cout <<
"release";
492 case sycl::memory_order::acq_rel:
493 std::cout <<
"acq_rel";
496 case sycl::memory_order::seq_cst:
497 std::cout <<
"seq_cst";
499 # if defined(BOOST_COMP_ICPX)
501 case sycl::memory_order::__consume_unsupported:
510 std::cout <<
"Supported memory orderings for atomic operations: ";
511 auto const mem_orders = device.get_info<sycl::info::device::atomic_memory_order_capabilities>();
512 print_memory_orders(mem_orders);
514 # if defined(BOOST_COMP_ICPX)
515 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
517 std::cout <<
"Supported memory orderings for sycl::atomic_fence: ";
518 auto const fence_orders = device.get_info<sycl::info::device::atomic_fence_order_capabilities>();
519 print_memory_orders(fence_orders);
523 auto print_memory_scopes = [](std::vector<sycl::memory_scope>
const& mem_scopes)
525 for(
auto const& cap : mem_scopes)
529 case sycl::memory_scope::work_item:
530 std::cout <<
"work-item";
533 case sycl::memory_scope::sub_group:
534 std::cout <<
"sub-group";
537 case sycl::memory_scope::work_group:
538 std::cout <<
"work-group";
541 case sycl::memory_scope::device:
542 std::cout <<
"device";
545 case sycl::memory_scope::system:
546 std::cout <<
"system";
554 std::cout <<
"Supported memory scopes for atomic operations: ";
555 auto const mem_scopes = device.get_info<sycl::info::device::atomic_memory_scope_capabilities>();
556 print_memory_scopes(mem_scopes);
558 # if defined(BOOST_COMP_ICPX)
559 # if BOOST_COMP_ICPX >= BOOST_VERSION_NUMBER(53, 2, 0)
561 std::cout <<
"Supported memory scopes for sycl::atomic_fence: ";
562 auto const fence_scopes = device.get_info<sycl::info::device::atomic_fence_scope_capabilities>();
563 print_memory_scopes(fence_scopes);
567 std::cout <<
"Device timer resolution: "
568 << device.get_info<sycl::info::device::profiling_timer_resolution>() <<
" ns\n";
570 std::cout <<
"Built-in kernels: ";
571 auto const builtins = device.get_info<sycl::info::device::built_in_kernel_ids>();
572 for(
auto const& b : builtins)
573 std::cout << b.get_name() <<
", ";
576 std::cout <<
"Maximum number of subdevices: ";
577 auto const max_subs = device.get_info<sycl::info::device::partition_max_sub_devices>();
578 std::cout << max_subs <<
'\n';
582 std::cout <<
"Supported partition properties: ";
583 auto const part_props = device.get_info<sycl::info::device::partition_properties>();
584 auto has_affinity_domains =
false;
585 for(
auto const& prop : part_props)
589 case sycl::info::partition_property::no_partition:
590 std::cout <<
"no partition";
593 case sycl::info::partition_property::partition_equally:
594 std::cout <<
"equally";
597 case sycl::info::partition_property::partition_by_counts:
598 std::cout <<
"by counts";
601 case sycl::info::partition_property::partition_by_affinity_domain:
602 std::cout <<
"by affinity domain";
603 has_affinity_domains =
true;
605 # if defined(BOOST_COMP_ICPX)
606 case sycl::info::partition_property::ext_intel_partition_by_cslice:
607 std::cout <<
"by compute slice (Intel extension; deprecated)";
615 if(has_affinity_domains)
617 std::cout <<
"Supported partition affinity domains: ";
618 auto const aff_doms = device.get_info<sycl::info::device::partition_affinity_domains>();
619 for(
auto const& dom : aff_doms)
623 case sycl::info::partition_affinity_domain::not_applicable:
624 std::cout <<
"not applicable";
627 case sycl::info::partition_affinity_domain::numa:
631 case sycl::info::partition_affinity_domain::L4_cache:
632 std::cout <<
"L4 cache";
635 case sycl::info::partition_affinity_domain::L3_cache:
636 std::cout <<
"L3 cache";
639 case sycl::info::partition_affinity_domain::L2_cache:
640 std::cout <<
"L2 cache";
643 case sycl::info::partition_affinity_domain::L1_cache:
644 std::cout <<
"L1 cache";
647 case sycl::info::partition_affinity_domain::next_partitionable:
648 std::cout <<
"next partitionable";
656 std::cout <<
"Current partition property: ";
657 switch(device.get_info<sycl::info::device::partition_type_property>())
659 case sycl::info::partition_property::no_partition:
660 std::cout <<
"no partition";
663 case sycl::info::partition_property::partition_equally:
664 std::cout <<
"partitioned equally";
667 case sycl::info::partition_property::partition_by_counts:
668 std::cout <<
"partitioned by counts";
671 case sycl::info::partition_property::partition_by_affinity_domain:
672 std::cout <<
"partitioned by affinity domain";
675 # if defined(BOOST_COMP_ICPX)
676 case sycl::info::partition_property::ext_intel_partition_by_cslice:
677 std::cout <<
"partitioned by compute slice (Intel extension; deprecated)";
683 std::cout <<
"Current partition affinity domain: ";
684 switch(device.get_info<sycl::info::device::partition_type_affinity_domain>())
686 case sycl::info::partition_affinity_domain::not_applicable:
687 std::cout <<
"not applicable";
690 case sycl::info::partition_affinity_domain::numa:
694 case sycl::info::partition_affinity_domain::L4_cache:
695 std::cout <<
"L4 cache";
698 case sycl::info::partition_affinity_domain::L3_cache:
699 std::cout <<
"L3 cache";
702 case sycl::info::partition_affinity_domain::L2_cache:
703 std::cout <<
"L2 cache";
706 case sycl::info::partition_affinity_domain::L1_cache:
707 std::cout <<
"L1 cache";
710 case sycl::info::partition_affinity_domain::next_partitionable:
711 std::cout <<
"next partitionable";
#define ALPAKA_DEBUG_FULL_LOG_SCOPE
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto fma(T const &fma_ctx, Tx const &x, Ty const &y, Tz const &z)
Computes x * y + z as if to infinite precision and rounded only once to fit the result type.
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_ACC auto all(TWarp const &warp, std::int32_t predicate) -> std::int32_t
Evaluates predicate for all active threads of the warp and returns non-zero if and only if predicate ...
The alpaka accelerator library.
ALPAKA_FN_HOST auto getDevCount(TPlatform const &platform)
ALPAKA_FN_HOST auto getDevByIdx(TPlatform const &platform, std::size_t const &devIdx) -> Dev< TPlatform >