alpaka
Abstraction Library for Parallel Kernel Acceleration
alpaka::detail Namespace Reference

Classes

struct  AtomicHierarchyConceptType
 
struct  AtomicHierarchyConceptType< hierarchy::Blocks >
 
struct  AtomicHierarchyConceptType< hierarchy::Grids >
 
struct  AtomicHierarchyConceptType< hierarchy::Threads >
 
struct  BlockSharedMemDynMemberStatic
 "namespace" for static constexpr members that should be in BlockSharedMemDynMember but cannot be because having a static const member breaks GCC 10 OpenMP target: type not mappable. More...
 
class  BlockSharedMemStMemberImpl
 Implementation of static block shared memory provider. More...
 
class  BufCpuImpl
 The CPU memory buffer. More...
 
struct  CheckFnReturnType
 Check that the return of TKernelFnObj is void. More...
 
struct  CheckFnReturnType< AccGpuUniformCudaHipRt< TApi, TDim, TIdx > >
 specialization of the TKernelFnObj return type evaluation More...
 
struct  DevGlobalImplGeneric
 
struct  DevGlobalTrait
 
struct  DevGlobalTrait< TagCpuOmp2Blocks, T >
 
struct  DevGlobalTrait< TagCpuOmp2Threads, T >
 
struct  DevGlobalTrait< TagCpuSerial, T >
 
struct  DevGlobalTrait< TagCpuTbbBlocks, T >
 
struct  DevGlobalTrait< TagCpuThreads, T >
 
struct  DevGlobalTrait< TagGpuCudaRt, T >
 
struct  DevGlobalTrait< TagGpuHipRt, T >
 
struct  ParallelFor
 Executor of parallel OpenMP loop. More...
 
struct  ParallelFor< TKernel, omp::Schedule >
 Executor of parallel OpenMP loop. More...
 
struct  ParallelFor< TKernel, TSchedule, UseScheduleKind< TKernel, TSchedule > >
 Executor of parallel OpenMP loop. More...
 
struct  ParallelForDynamicImpl
 Helper executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForDynamicImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForGuidedImpl
 Helper executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForGuidedImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl
 Executor of parallel OpenMP loop with the given schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Dynamic >
 Executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Guided >
 Executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Static >
 Executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Auto >
 Executor of parallel OpenMP loop with auto schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Dynamic >
 Executor of parallel OpenMP loop with the dynamic schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Guided >
 Executor of parallel OpenMP loop with the guided schedule. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::NoSchedule >
 Executor of parallel OpenMP loop with no schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Runtime >
 Executor of parallel OpenMP loop with runtime schedule set. More...
 
struct  ParallelForImpl< TKernel, TSchedule, omp::Schedule::Static >
 Executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForStaticImpl
 Helper executor of parallel OpenMP loop with the static schedule. More...
 
struct  ParallelForStaticImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
 Helper executor of parallel OpenMP loop with the static schedule. More...
 
struct  PitchHolder
 
struct  PitchHolder< TDim, std::enable_if_t< TDim::value >=2 >
 
struct  Print
 
struct  Print< DimInt< Dim< TView >::value - 1u >, TView >
 
struct  QueueRegistry
 The CPU/GPU device queue registry implementation. More...
 
struct  TaskCopyCpu
 The CPU device ND memory copy task. More...
 
struct  TaskCopyCpu< DimInt< 0u >, TViewDst, TViewSrc, TExtent >
 The CPU device scalar memory copy task. More...
 
struct  TaskCopyCpu< DimInt< 1u >, TViewDst, TViewSrc, TExtent >
 The CPU device 1D memory copy task. More...
 
struct  TaskCopyCpuBase
 The CPU device memory copy task base. More...
 
struct  TaskCopyUniformCudaHip
 The CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 0u >, TViewDst, TViewSrc, TExtent >
 The scalar CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 1u >, TViewDst, TViewSrc, TExtent >
 The 1D CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 2u >, TViewDst, TViewSrc, TExtent >
 The 2D CUDA/HIP memory copy trait. More...
 
struct  TaskCopyUniformCudaHip< TApi, DimInt< 3u >, TViewDst, TViewSrc, TExtent >
 The 3D CUDA/HIP memory copy trait. More...
 
struct  TaskSetCpu
 The CPU device ND memory set task. More...
 
struct  TaskSetCpu< DimInt< 0u >, TView, TExtent >
 The CPU device scalar memory set task. More...
 
struct  TaskSetCpu< DimInt< 1u >, TView, TExtent >
 The CPU device 1D memory set task. More...
 
struct  TaskSetCpuBase
 The CPU device ND memory set task base. More...
 
struct  TaskSetUniformCudaHip
 The CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 0u >, TView, TExtent >
 The scalar CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 1u >, TView, TExtent >
 The 1D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 2u >, TView, TExtent >
 The 2D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHip< TApi, DimInt< 3u >, TView, TExtent >
 The 3D CUDA/HIP memory set task. More...
 
struct  TaskSetUniformCudaHipBase
 The CUDA/HIP memory set task base. More...
 

Typedefs

template<typename T >
using atomic_ref = boost::atomic_ref< T >
 
using CudaHipBuiltinTypes = meta::Concatenate< CudaHipBuiltinTypes1, CudaHipBuiltinTypes2, CudaHipBuiltinTypes3, CudaHipBuiltinTypes4 >
 
using CudaHipBuiltinTypes1 = std::tuple< char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1 >
 
using CudaHipBuiltinTypes2 = std::tuple< char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2 >
 
using CudaHipBuiltinTypes3 = std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >
 
using CudaHipBuiltinTypes4 = std::tuple< char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4 >
 
template<typename TKernel >
using HasScheduleChunkSize = std::void_t< decltype(TKernel::ompScheduleChunkSize)>
 Helper type to check if TKernel has member ompScheduleChunkSize. More...
 
template<typename TSchedule >
using IsOmpScheduleTraitSpecialized = std::integral_constant< bool, std::is_same< TSchedule, omp::Schedule >::value >
 Helper type to check if TSchedule is a type originating from OmpSchedule trait definition. More...
 
template<typename TKernel , typename TSchedule >
using UseScheduleKind = std::enable_if_t< sizeof(TKernel::ompScheduleKind) &&!IsOmpScheduleTraitSpecialized< TSchedule >::value >
 Helper type to check if member ompScheduleKind of TKernel should be used. More...
 

Functions

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto allDivisorsLessOrEqual (T const &val, T const &maxDivisor) -> std::set< T >
 
template<typename T >
void assertKernelArgIsTriviallyCopyable ()
 
template<typename TElem , typename TDim , typename TIdx >
constexpr ALPAKA_FN_HOST_ACC auto calculatePitchesFromExtents (Vec< TDim, TIdx > const &extent)
 Calculate the pitches purely from the extents. More...
 
template<typename TValue >
static auto ffsFallback (TValue value) -> std::int32_t
 Fallback implementation of ffs. More...
 
template<typename TKernelFnObj , typename TApi , typename TAcc , typename TDim , typename TIdx , typename... TArgs>
__global__ void gpuKernel (Vec< TDim, TIdx > const threadElemExtent, TKernelFnObj const kernelFnObj, TArgs... args)
 The GPU CUDA/HIP kernel entry point. More...
 
template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto nextDivisorLowerOrEqual (T const &dividend, T const &maxDivisor) -> T
 Finds the largest divisor where divident % divisor == 0. More...
 
template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >
ALPAKA_FN_HOST ALPAKA_FN_INLINE void parallelFor (TKernel const &kernel, TLoopBody &&loopBody, TIdx const numIterations, TSchedule const &schedule)
 Run parallel OpenMP loop. More...
 
template<typename TValue >
static auto popcountFallback (TValue value) -> std::int32_t
 Fallback implementation of popcount. More...
 
auto trim (std::string s) -> std::string
 

Variables

template<typename T >
constexpr auto isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value
 

Typedef Documentation

◆ atomic_ref

template<typename T >
using alpaka::detail::atomic_ref = typedef boost::atomic_ref<T>

Definition at line 28 of file AtomicAtomicRef.hpp.

◆ CudaHipBuiltinTypes

◆ CudaHipBuiltinTypes1

using alpaka::detail::CudaHipBuiltinTypes1 = typedef std:: tuple<char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1>

Definition at line 33 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes2

using alpaka::detail::CudaHipBuiltinTypes2 = typedef std:: tuple<char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2>

Definition at line 35 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes3

using alpaka::detail::CudaHipBuiltinTypes3 = typedef std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >

Definition at line 37 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes4

using alpaka::detail::CudaHipBuiltinTypes4 = typedef std:: tuple<char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4>

Definition at line 61 of file CudaHipCommon.hpp.

◆ HasScheduleChunkSize

template<typename TKernel >
using alpaka::detail::HasScheduleChunkSize = typedef std::void_t<decltype(TKernel::ompScheduleChunkSize)>

Helper type to check if TKernel has member ompScheduleChunkSize.

Is void for those types, ill-formed otherwise.

Template Parameters
TKernelThe kernel type.

Definition at line 190 of file TaskKernelCpuOmp2Blocks.hpp.

◆ IsOmpScheduleTraitSpecialized

template<typename TSchedule >
using alpaka::detail::IsOmpScheduleTraitSpecialized = typedef std::integral_constant<bool, std::is_same<TSchedule, omp::Schedule>::value>

Helper type to check if TSchedule is a type originating from OmpSchedule trait definition.

Template Parameters
TScheduleThe schedule type.

Definition at line 709 of file TaskKernelCpuOmp2Blocks.hpp.

◆ UseScheduleKind

template<typename TKernel , typename TSchedule >
using alpaka::detail::UseScheduleKind = typedef std::enable_if_t<sizeof(TKernel::ompScheduleKind) && !IsOmpScheduleTraitSpecialized<TSchedule>::value>

Helper type to check if member ompScheduleKind of TKernel should be used.

For that it has to be present, and no OmpSchedule trait specialized. Is void for those types, ill-formed otherwise.

Template Parameters
TKernelThe kernel type.
TScheduleThe schedule type.

Definition at line 720 of file TaskKernelCpuOmp2Blocks.hpp.

Function Documentation

◆ allDivisorsLessOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto alpaka::detail::allDivisorsLessOrEqual ( T const &  val,
T const &  maxDivisor 
) -> std::set<T>
Parameters
valThe value to find divisors of.
maxDivisorThe maximum.
Returns
A list of all divisors less then or equal to the given maximum.

Definition at line 59 of file WorkDivHelpers.hpp.

◆ assertKernelArgIsTriviallyCopyable()

template<typename T >
void alpaka::detail::assertKernelArgIsTriviallyCopyable ( )
inline

Definition at line 245 of file Traits.hpp.

◆ calculatePitchesFromExtents()

template<typename TElem , typename TDim , typename TIdx >
constexpr ALPAKA_FN_HOST_ACC auto alpaka::detail::calculatePitchesFromExtents ( Vec< TDim, TIdx > const &  extent)
inlineconstexpr

Calculate the pitches purely from the extents.

Definition at line 36 of file Traits.hpp.

◆ ffsFallback()

template<typename TValue >
static auto alpaka::detail::ffsFallback ( TValue  value) -> std::int32_t
static

Fallback implementation of ffs.

Definition at line 28 of file IntrinsicFallback.hpp.

◆ gpuKernel()

template<typename TKernelFnObj , typename TApi , typename TAcc , typename TDim , typename TIdx , typename... TArgs>
__global__ void alpaka::detail::gpuKernel ( Vec< TDim, TIdx > const  threadElemExtent,
TKernelFnObj const  kernelFnObj,
TArgs...  args 
)

The GPU CUDA/HIP kernel entry point.

Definition at line 62 of file TaskKernelGpuUniformCudaHipRt.hpp.

◆ nextDivisorLowerOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto alpaka::detail::nextDivisorLowerOrEqual ( T const &  dividend,
T const &  maxDivisor 
) -> T

Finds the largest divisor where divident % divisor == 0.

Parameters
dividendThe dividend.
maxDivisorThe maximum divisor.
Returns
The biggest number that satisfies the following conditions: 1) dividendret==0 2) ret<=maxDivisor

Definition at line 43 of file WorkDivHelpers.hpp.

◆ parallelFor()

template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >
ALPAKA_FN_HOST ALPAKA_FN_INLINE void alpaka::detail::parallelFor ( TKernel const &  kernel,
TLoopBody &&  loopBody,
TIdx const  numIterations,
TSchedule const &  schedule 
)

Run parallel OpenMP loop.

Template Parameters
TKernelThe kernel type.
TLoopBodyThe loop body functor type.
TIdxThe index type.
TScheduleThe schedule type (not necessarily omp::Schedule).
Parameters
kernelThe kernel instance reference, not perfect=forwarded to shorten SFINAE internally.
loopBodyThe loop body functor instance, takes iteration index as input.
numIterationsThe number of loop iterations.
scheduleThe schedule object.

Definition at line 771 of file TaskKernelCpuOmp2Blocks.hpp.

◆ popcountFallback()

template<typename TValue >
static auto alpaka::detail::popcountFallback ( TValue  value) -> std::int32_t
static

Fallback implementation of popcount.

Definition at line 15 of file IntrinsicFallback.hpp.

◆ trim()

auto alpaka::detail::trim ( std::string  s) -> std::string
inline

Definition at line 76 of file Traits.hpp.

Variable Documentation

◆ isCudaHipBuiltInType

template<typename T >
constexpr auto alpaka::detail::isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value
inlineconstexpr

Definition at line 67 of file CudaHipCommon.hpp.