alpaka
Abstraction Library for Parallel Kernel Acceleration
KernelExecutionFixture.hpp
Go to the documentation of this file.
1 /* Copyright 2024 Benjamin Worpitz, Andrea Bocci, Bernhard Manfred Gruber, Jan Stephan, Aurora Perego
2  * SPDX-License-Identifier: MPL-2.0
3  */
4 
5 #pragma once
6 
7 #include "alpaka/alpaka.hpp"
8 
9 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
10 # error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
11 #endif
12 
13 #if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
14 # error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
15 #endif
16 
17 #include "alpaka/test/Check.hpp"
19 
20 #include <utility>
21 
22 namespace alpaka::test
23 {
24  //! The fixture for executing a kernel on a given accelerator.
25  template<typename TAcc>
27  {
28  public:
29  using Acc = TAcc;
33  using Device = Dev<Acc>;
36 
37  KernelExecutionFixture(WorkDiv workDiv) : m_queue{m_device}, m_workDiv{std::move(workDiv)}
38  {
39  }
40 
41  template<typename TExtent>
42  KernelExecutionFixture(TExtent const& extent) : m_queue{m_device}
43  , m_extent{extent}
44  {
45  }
46 
48  : m_platform{} // if the platform is not stateless, this is wrong; we ignore it because it is not be used
49  , m_device{alpaka::getDev(queue)}
50  , m_queue{std::move(queue)}
51  , m_workDiv{std::move(workDiv)}
52  {
53  }
54 
55  template<typename TExtent>
56  KernelExecutionFixture(Queue queue, TExtent const& extent)
57  : m_platform{} // if the platform is not stateless, this is wrong; we ignore it because it is not be used
58  , m_device{alpaka::getDev(queue)}
59  , m_queue{std::move(queue)}
60  , m_extent{extent}
61  {
62  }
63 
64  template<typename TKernelFnObj, typename... TArgs>
65  auto operator()(TKernelFnObj kernelFnObj, TArgs&&... args) -> bool
66  {
67  // Allocate the result value
68  auto bufAccResult = allocBuf<bool, Idx>(m_device, static_cast<Idx>(1u));
69  memset(m_queue, bufAccResult, static_cast<std::uint8_t>(true));
70 
71 
72  alpaka::KernelCfg<Acc> const kernelCfg = {m_extent, Vec<Dim, Idx>::ones()};
73 
74  // set workdiv if it is not before
76  m_workDiv = alpaka::getValidWorkDiv(
77  kernelCfg,
78  m_device,
79  kernelFnObj,
80  getPtrNative(bufAccResult),
81  std::forward<TArgs>(args)...);
82 
83  exec<Acc>(m_queue, m_workDiv, kernelFnObj, getPtrNative(bufAccResult), std::forward<TArgs>(args)...);
84 
85  // Copy the result value to the host
86  auto bufHostResult = allocBuf<bool, Idx>(m_devHost, static_cast<Idx>(1u));
87  memcpy(m_queue, bufHostResult, bufAccResult);
88  wait(m_queue);
89 
90  auto const result = *getPtrNative(bufHostResult);
91 
92  return result;
93  }
94 
95  private:
96  PlatformCpu m_platformHost{};
97  DevCpu m_devHost{getDevByIdx(m_platformHost, 0)};
98  Platform m_platform{};
99  Device m_device{getDevByIdx(m_platform, 0)};
100  Queue m_queue;
102  Vec<Dim, Idx> m_extent;
103  };
104 
105 } // namespace alpaka::test
The CPU device handle.
Definition: DevCpu.hpp:56
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto ones() -> Vec< TDim, TVal >
One value constructor.
Definition: Vec.hpp:133
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
Definition: Vec.hpp:116
The fixture for executing a kernel on a given accelerator.
KernelExecutionFixture(Queue queue, WorkDiv workDiv)
KernelExecutionFixture(Queue queue, TExtent const &extent)
auto operator()(TKernelFnObj kernelFnObj, TArgs &&... args) -> bool
The test specifics.
Definition: TestAccs.hpp:27
typename trait::DefaultQueueType< TDev >::type DefaultQueue
The queue type that should be used for the given device.
Definition: Queue.hpp:108
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
Definition: Traits.hpp:29
ALPAKA_FN_HOST auto memcpy(TQueue &queue, alpaka::detail::DevGlobalImplGeneric< TTag, TTypeDst > &viewDst, TViewSrc const &viewSrc) -> void
typename trait::DevType< T >::type Dev
The device type trait alias template to remove the ::type.
Definition: Traits.hpp:56
ALPAKA_FN_HOST auto getValidWorkDiv(KernelCfg< TAcc, TGridElemExtent, TThreadElemExtent > const &kernelCfg, [[maybe_unused]] TDev const &dev, TKernelFnObj const &kernelFnObj, TArgs &&... args) -> WorkDivMembers< Dim< TAcc >, Idx< TAcc >>
ALPAKA_FN_HOST auto getPtrNative(TView const &view) -> Elem< TView > const *
Gets the native pointer of the memory view.
Definition: Traits.hpp:136
ALPAKA_FN_HOST auto memset(TQueue &queue, TViewFwd &&view, std::uint8_t const &byte, TExtent const &extent) -> void
Sets the bytes of the memory of view, described by extent, to the given value.
Definition: Traits.hpp:231
ALPAKA_FN_HOST auto getDevByIdx(TPlatform const &platform, std::size_t const &devIdx) -> Dev< TPlatform >
Definition: Traits.hpp:62
ALPAKA_FN_HOST auto getDev(T const &t)
Definition: Traits.hpp:68
ALPAKA_FN_HOST auto wait(TAwaited const &awaited) -> void
Waits the thread for the completion of the given awaited action to complete.
Definition: Traits.hpp:34
typename trait::PlatformType< T >::type Platform
The platform type trait alias template to remove the ::type.
Definition: Traits.hpp:51
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
Definition: Traits.hpp:19
Kernel start configuration to determine a valid work division.
The CPU device platform.
Definition: PlatformCpu.hpp:18