alpaka
Abstraction Library for Parallel Kernel Acceleration
Loading...
Searching...
No Matches
alpaka::detail Namespace Reference

Classes

struct  AtomicHierarchyConceptType
 
struct  AtomicHierarchyConceptType< hierarchy::Blocks >
 
struct  AtomicHierarchyConceptType< hierarchy::Grids >
 
struct  AtomicHierarchyConceptType< hierarchy::Threads >
 
struct  BlockSharedMemDynMemberStatic
 "namespace" for static constexpr members that should be in BlockSharedMemDynMember but cannot be because having a static const member breaks GCC 10 OpenMP target: type not mappable. More...
 
class  BlockSharedMemStMemberImpl
 Implementation of static block shared memory provider. More...
 
class  BufCpuImpl
 The CPU memory buffer. More...
 
class  BufUniformCudaHipRtImpl
 The Uniform Cuda/HIP memory buffer implementation. More...
 
struct  CheckFnReturnType
 Check that the return of TKernelFnObj is void. More...
 
struct  CheckFnReturnType< AccGpuUniformCudaHipRt< TApi, TDim, TIdx > >
 specialization of the TKernelFnObj return type evaluation More...
 
struct  DevGenericImpl
 The CPU/GPU device queue registry implementation. More...
 
struct  DevGlobalImplGeneric
 
struct  DevGlobalTrait
 
struct  DevGlobalTrait< TagCpuOmp2Blocks, T >
 
struct  DevGlobalTrait< TagCpuOmp2Threads, T >
 
struct  DevGlobalTrait< TagCpuSerial, T >
 
struct  DevGlobalTrait< TagCpuTbbBlocks, T >
 
struct  DevGlobalTrait< TagCpuThreads, T >
 
struct  DevGlobalTrait< TagGpuCudaRt, T >
 
struct  DevGlobalTrait< TagGpuHipRt, T >
 
class  IndependentGroupsAlong
 
struct  ParallelFor
 Executor of parallel OpenMP loop. More...
 
struct  ParallelFor< TKernel, omp::Schedule >
 Executor of parallel OpenMP loop. More...
 
struct  ParallelFor< TKernel, TSchedule, UseScheduleKind< TKernel, TSchedule > >
 Executor of parallel OpenMP loop. More...
 
struct  ParallelForDynamicImpl
 Helper executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForDynamicImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForGuidedImpl
 Helper executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForGuidedImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl
 Executor of parallel OpenMP loop with the given schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Dynamic >
 Executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Guided >
 Executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Static >
 Executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Auto >
 Executor of parallel OpenMP loop with auto schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Dynamic >
 Executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Guided >
 Executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::NoSchedule >
 Executor of parallel OpenMP loop with no schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Runtime >
 Executor of parallel OpenMP loop with runtime schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Static >
 Executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForStaticImpl
 Helper executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForStaticImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the static schedule. More...
 
struct  PitchHolder
 
struct  PitchHolder< TDim, std::enable_if_t< TDim::value >=2 >
 
struct  Print
 
struct  Print< DimInt< Dim< TView >::value - 1u >, TView >
 
struct  TaskCopyCpu
 The CPU device ND memory copy task. More...
 
struct  TaskCopyCpu< DimInt< 0u >, TViewDst, TViewSrc, TExtent >
 The CPU device scalar memory copy task. More...
 
struct  TaskCopyCpu< DimInt< 1u >, TViewDst, TViewSrc, TExtent >
 The CPU device 1D memory copy task. More...
 
struct  TaskCopyCpuBase
 The CPU device memory copy task base. More...
 
struct  TaskCopyUniformCudaHip
 The CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 0u >, TViewDst, TViewSrc, TExtent >
 The scalar CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 1u >, TViewDst, TViewSrc, TExtent >
 The 1D CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 2u >, TViewDst, TViewSrc, TExtent >
 The 2D CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 3u >, TViewDst, TViewSrc, TExtent >
 The 3D CUDA/HIP memory copy trait. More...
 
struct  TaskSetCpu
 The CPU device ND memory set task. More...
 
struct  TaskSetCpu< DimInt< 0u >, TView, TExtent >
 The CPU device scalar memory set task. More...
 
struct  TaskSetCpu< DimInt< 1u >, TView, TExtent >
 The CPU device 1D memory set task. More...
 
struct  TaskSetCpuBase
 The CPU device ND memory set task base. More...
 
struct  TaskSetUniformCudaHip
 The CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 0u >, TView, TExtent >
 The scalar CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 1u >, TView, TExtent >
 The 1D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 2u >, TView, TExtent >
 The 2D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 3u >, TView, TExtent >
 The 3D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHipBase
 The CUDA/HIP memory set task base. More...
 
class  UniformElementsAlong
 

Typedefs

template<typename T >
using atomic_ref = boost::atomic_ref< T >
 
using CudaHipBuiltinTypes = meta::Concatenate< CudaHipBuiltinTypes1, CudaHipBuiltinTypes2, CudaHipBuiltinTypes3, CudaHipBuiltinTypes4 >
 
using CudaHipBuiltinTypes1 = std::tuple< char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1 >
 
using CudaHipBuiltinTypes2 = std::tuple< char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2 >
 
using CudaHipBuiltinTypes3 = std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >
 
using CudaHipBuiltinTypes4 = std::tuple< char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4 >
 
template<typename TKernel >
using HasScheduleChunkSize = std::void_t< decltype(TKernel::ompScheduleChunkSize)>
 Helper type to check if TKernel has member ompScheduleChunkSize.
 
template<typename TSchedule >
using IsOmpScheduleTraitSpecialized = std::integral_constant< bool, std::is_same< TSchedule, omp::Schedule >::value >
 Helper type to check if TSchedule is a type originating from OmpSchedule trait definition.
 
template<typename TKernel , typename TSchedule >
using UseScheduleKind = std::enable_if_t< sizeof(TKernel::ompScheduleKind) &&!IsOmpScheduleTraitSpecialized< TSchedule >::value >
 Helper type to check if member ompScheduleKind of TKernel should be used.
 

Functions

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto allDivisorsLessOrEqual (T const &val, T const &maxDivisor) -> std::set< T >
 
template<typename T >
void assertKernelArgIsTriviallyCopyable ()
 
template<typename TElem , typename TDim , typename TIdx >
ALPAKA_FN_HOST_ACC constexpr auto calculatePitchesFromExtents (Vec< TDim, TIdx > const &extent)
 Calculate the pitches purely from the extents.
 
template<typename TValue >
static auto ffsFallback (TValue value) -> std::int32_t
 Fallback implementation of ffs.
 
template<typename TKernelFnObj , typename TAcc , typename... TArgs>
__global__ void gpuKernel (Vec< Dim< TAcc >, Idx< TAcc > > const threadElemExtent, TKernelFnObj const kernelFnObj, TArgs... args)
 The GPU CUDA/HIP kernel entry point.
 
template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto nextDivisorLowerOrEqual (T const &dividend, T const &maxDivisor) -> T
 Finds the largest divisor where divident % divisor == 0.
 
template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >
ALPAKA_FN_HOST ALPAKA_FN_INLINE void parallelFor (TKernel const &kernel, TLoopBody &&loopBody, TIdx const numIterations, TSchedule const &schedule)
 Run parallel OpenMP loop.
 
template<typename TValue >
static auto popcountFallback (TValue value) -> std::int32_t
 Fallback implementation of popcount.
 
auto trim (std::string s) -> std::string
 

Variables

template<typename T >
constexpr auto isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value
 
template<typename TKernelFnObj , typename TAcc , typename... TArgs>
void(* kernelName )(Vec< Dim< TAcc >, Idx< TAcc > > const, TKernelFnObj const, remove_restrict_t< std::decay_t< TArgs > >...) = gpuKernel<TKernelFnObj, TAcc, TArgs...>
 

Typedef Documentation

◆ atomic_ref

template<typename T >
using alpaka::detail::atomic_ref = typedef boost::atomic_ref<T>

Definition at line 28 of file AtomicAtomicRef.hpp.

◆ CudaHipBuiltinTypes

◆ CudaHipBuiltinTypes1

using alpaka::detail::CudaHipBuiltinTypes1 = typedef std:: tuple<char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1>

Definition at line 33 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes2

using alpaka::detail::CudaHipBuiltinTypes2 = typedef std:: tuple<char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2>

Definition at line 35 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes3

using alpaka::detail::CudaHipBuiltinTypes3 = typedef std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >

Definition at line 37 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes4

using alpaka::detail::CudaHipBuiltinTypes4 = typedef std:: tuple<char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4>

Definition at line 61 of file CudaHipCommon.hpp.

◆ HasScheduleChunkSize

template<typename TKernel >
using alpaka::detail::HasScheduleChunkSize = typedef std::void_t<decltype(TKernel::ompScheduleChunkSize)>

Helper type to check if TKernel has member ompScheduleChunkSize.

Is void for those types, ill-formed otherwise.

Template Parameters
TKernelThe kernel type.

Definition at line 197 of file TaskKernelCpuOmp2Blocks.hpp.

◆ IsOmpScheduleTraitSpecialized

template<typename TSchedule >
using alpaka::detail::IsOmpScheduleTraitSpecialized = typedef std::integral_constant<bool, std::is_same<TSchedule, omp::Schedule>::value>

Helper type to check if TSchedule is a type originating from OmpSchedule trait definition.

Template Parameters
TScheduleThe schedule type.

Definition at line 716 of file TaskKernelCpuOmp2Blocks.hpp.

◆ UseScheduleKind

template<typename TKernel , typename TSchedule >
using alpaka::detail::UseScheduleKind = typedef std::enable_if_t<sizeof(TKernel::ompScheduleKind) && !IsOmpScheduleTraitSpecialized<TSchedule>::value>

Helper type to check if member ompScheduleKind of TKernel should be used.

For that it has to be present, and no OmpSchedule trait specialized. Is void for those types, ill-formed otherwise.

Template Parameters
TKernelThe kernel type.
TScheduleThe schedule type.

Definition at line 727 of file TaskKernelCpuOmp2Blocks.hpp.

Function Documentation

◆ allDivisorsLessOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto alpaka::detail::allDivisorsLessOrEqual ( T const &  val,
T const &  maxDivisor 
) -> std::set<T>
Parameters
valThe value to find divisors of.
maxDivisorThe maximum.
Returns
A list of all divisors less then or equal to the given maximum.

Definition at line 66 of file WorkDivHelpers.hpp.

◆ assertKernelArgIsTriviallyCopyable()

template<typename T >
void alpaka::detail::assertKernelArgIsTriviallyCopyable ( )
inline

Definition at line 288 of file Traits.hpp.

◆ calculatePitchesFromExtents()

template<typename TElem , typename TDim , typename TIdx >
ALPAKA_FN_HOST_ACC constexpr auto alpaka::detail::calculatePitchesFromExtents ( Vec< TDim, TIdx > const &  extent)
inlineconstexpr

Calculate the pitches purely from the extents.

Definition at line 36 of file Traits.hpp.

◆ ffsFallback()

template<typename TValue >
static auto alpaka::detail::ffsFallback ( TValue  value) -> std::int32_t
static

Fallback implementation of ffs.

Definition at line 28 of file IntrinsicFallback.hpp.

◆ gpuKernel()

template<typename TKernelFnObj , typename TAcc , typename... TArgs>
__global__ void alpaka::detail::gpuKernel ( Vec< Dim< TAcc >, Idx< TAcc > > const  threadElemExtent,
TKernelFnObj const  kernelFnObj,
TArgs...  args 
)

The GPU CUDA/HIP kernel entry point.

Definition at line 63 of file TaskKernelGpuUniformCudaHipRt.hpp.

◆ nextDivisorLowerOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto alpaka::detail::nextDivisorLowerOrEqual ( T const &  dividend,
T const &  maxDivisor 
) -> T

Finds the largest divisor where divident % divisor == 0.

Parameters
dividendThe dividend.
maxDivisorThe maximum divisor.
Returns
The biggest number that satisfies the following conditions: 1) dividendret==0 2) ret<=maxDivisor

Definition at line 50 of file WorkDivHelpers.hpp.

◆ parallelFor()

template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >
ALPAKA_FN_HOST ALPAKA_FN_INLINE void alpaka::detail::parallelFor ( TKernel const &  kernel,
TLoopBody &&  loopBody,
TIdx const  numIterations,
TSchedule const &  schedule 
)

Run parallel OpenMP loop.

Template Parameters
TKernelThe kernel type.
TLoopBodyThe loop body functor type.
TIdxThe index type.
TScheduleThe schedule type (not necessarily omp::Schedule).
Parameters
kernelThe kernel instance reference, not perfect=forwarded to shorten SFINAE internally.
loopBodyThe loop body functor instance, takes iteration index as input.
numIterationsThe number of loop iterations.
scheduleThe schedule object.

Definition at line 778 of file TaskKernelCpuOmp2Blocks.hpp.

◆ popcountFallback()

template<typename TValue >
static auto alpaka::detail::popcountFallback ( TValue  value) -> std::int32_t
static

Fallback implementation of popcount.

Definition at line 15 of file IntrinsicFallback.hpp.

◆ trim()

auto alpaka::detail::trim ( std::string  s) -> std::string
inline

Definition at line 76 of file Traits.hpp.

Variable Documentation

◆ isCudaHipBuiltInType

template<typename T >
constexpr auto alpaka::detail::isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value
inlineconstexpr

Definition at line 67 of file CudaHipCommon.hpp.

◆ kernelName

template<typename TKernelFnObj , typename TAcc , typename... TArgs>
void(* alpaka::detail::kernelName) (Vec< Dim< TAcc >, Idx< TAcc > > const, TKernelFnObj const, remove_restrict_t< std::decay_t< TArgs > >...) ( Vec< Dim< TAcc >, Idx< TAcc > > const  ,
TKernelFnObj const  ,
remove_restrict_t< std::decay_t< TArgs > >  ... 
) = gpuKernel<TKernelFnObj, TAcc, TArgs...>
inline

Definition at line 83 of file TaskKernelGpuUniformCudaHipRt.hpp.