Classes
struct	AtomicHierarchyConceptType

struct	AtomicHierarchyConceptType< hierarchy::Blocks >

struct	AtomicHierarchyConceptType< hierarchy::Grids >

struct	AtomicHierarchyConceptType< hierarchy::Threads >

struct	BlockSharedMemDynMemberStatic
	"namespace" for static constexpr members that should be in BlockSharedMemDynMember but cannot be because having a static const member breaks GCC 10 OpenMP target: type not mappable. More...

class	BlockSharedMemStMemberImpl
	Implementation of static block shared memory provider. More...

class	BufCpuImpl
	The CPU memory buffer. More...

class	BufUniformCudaHipRtImpl
	The Uniform Cuda/HIP memory buffer implementation. More...

struct	CheckFnReturnType
	Check that the return of TKernelFnObj is void. More...

struct	CheckFnReturnType< AccGpuUniformCudaHipRt< TApi, TDim, TIdx > >
	specialization of the TKernelFnObj return type evaluation More...

struct	DevGenericImpl
	The CPU/GPU device queue registry implementation. More...

struct	DevGlobalImplGeneric

struct	DevGlobalTrait

struct	DevGlobalTrait< TagCpuOmp2Blocks, T >

struct	DevGlobalTrait< TagCpuOmp2Threads, T >

struct	DevGlobalTrait< TagCpuSerial, T >

struct	DevGlobalTrait< TagCpuTbbBlocks, T >

struct	DevGlobalTrait< TagCpuThreads, T >

struct	DevGlobalTrait< TagGpuCudaRt, T >

struct	DevGlobalTrait< TagGpuHipRt, T >

class	IndependentGroupsAlong

struct	ParallelFor
	Executor of parallel OpenMP loop. More...

struct	ParallelFor< TKernel, omp::Schedule >
	Executor of parallel OpenMP loop. More...

struct	ParallelFor< TKernel, TSchedule, UseScheduleKind< TKernel, TSchedule > >
	Executor of parallel OpenMP loop. More...

struct	ParallelForDynamicImpl
	Helper executor of parallel OpenMP loop with the dynamic schedule. More...

struct	ParallelForDynamicImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
	Helper executor of parallel OpenMP loop with the dynamic schedule. More...

struct	ParallelForGuidedImpl
	Helper executor of parallel OpenMP loop with the guided schedule. More...

struct	ParallelForGuidedImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
	Helper executor of parallel OpenMP loop with the guided schedule. More...

struct	ParallelForImpl
	Executor of parallel OpenMP loop with the given schedule. More...

struct	ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Dynamic >
	Executor of parallel OpenMP loop with the dynamic schedule. More...

struct	ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Guided >
	Executor of parallel OpenMP loop with the guided schedule. More...

struct	ParallelForImpl< TKernel, omp::Schedule, omp::Schedule::Static >
	Executor of parallel OpenMP loop with the static schedule. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::Auto >
	Executor of parallel OpenMP loop with auto schedule set. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::Dynamic >
	Executor of parallel OpenMP loop with the dynamic schedule. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::Guided >
	Executor of parallel OpenMP loop with the guided schedule. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::NoSchedule >
	Executor of parallel OpenMP loop with no schedule set. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::Runtime >
	Executor of parallel OpenMP loop with runtime schedule set. More...

struct	ParallelForImpl< TKernel, TSchedule, omp::Schedule::Static >
	Executor of parallel OpenMP loop with the static schedule. More...

struct	ParallelForStaticImpl
	Helper executor of parallel OpenMP loop with the static schedule. More...

struct	ParallelForStaticImpl< TKernel, TSchedule, HasScheduleChunkSize< TKernel > >
	Helper executor of parallel OpenMP loop with the static schedule. More...

struct	PitchHolder

struct	PitchHolder< TDim, std::enable_if_t< TDim::value >=2 >

struct	Print

struct	Print< DimInt< Dim< TView >::value - 1u >, TView >

struct	TaskCopyCpu
	The CPU device ND memory copy task. More...

struct	TaskCopyCpu< DimInt< 0u >, TViewDst, TViewSrc, TExtent >
	The CPU device scalar memory copy task. More...

struct	TaskCopyCpu< DimInt< 1u >, TViewDst, TViewSrc, TExtent >
	The CPU device 1D memory copy task. More...

struct	TaskCopyCpuBase
	The CPU device memory copy task base. More...

struct	TaskCopyUniformCudaHip
	The CUDA/HIP memory copy trait. More...

struct	TaskCopyUniformCudaHip< TApi, DimInt< 0u >, TViewDst, TViewSrc, TExtent >
	The scalar CUDA/HIP memory copy trait. More...

struct	TaskCopyUniformCudaHip< TApi, DimInt< 1u >, TViewDst, TViewSrc, TExtent >
	The 1D CUDA/HIP memory copy trait. More...

struct	TaskCopyUniformCudaHip< TApi, DimInt< 2u >, TViewDst, TViewSrc, TExtent >
	The 2D CUDA/HIP memory copy trait. More...

struct	TaskCopyUniformCudaHip< TApi, DimInt< 3u >, TViewDst, TViewSrc, TExtent >
	The 3D CUDA/HIP memory copy trait. More...

struct	TaskSetCpu
	The CPU device ND memory set task. More...

struct	TaskSetCpu< DimInt< 0u >, TView, TExtent >
	The CPU device scalar memory set task. More...

struct	TaskSetCpu< DimInt< 1u >, TView, TExtent >
	The CPU device 1D memory set task. More...

struct	TaskSetCpuBase
	The CPU device ND memory set task base. More...

struct	TaskSetUniformCudaHip
	The CUDA/HIP memory set task. More...

struct	TaskSetUniformCudaHip< TApi, DimInt< 0u >, TView, TExtent >
	The scalar CUDA/HIP memory set task. More...

struct	TaskSetUniformCudaHip< TApi, DimInt< 1u >, TView, TExtent >
	The 1D CUDA/HIP memory set task. More...

struct	TaskSetUniformCudaHip< TApi, DimInt< 2u >, TView, TExtent >
	The 2D CUDA/HIP memory set task. More...

struct	TaskSetUniformCudaHip< TApi, DimInt< 3u >, TView, TExtent >
	The 3D CUDA/HIP memory set task. More...

struct	TaskSetUniformCudaHipBase
	The CUDA/HIP memory set task base. More...

class	UniformElementsAlong

Typedefs
template<typename T >
using	atomic_ref = boost::atomic_ref< T >

using	CudaHipBuiltinTypes = meta::Concatenate< CudaHipBuiltinTypes1, CudaHipBuiltinTypes2, CudaHipBuiltinTypes3, CudaHipBuiltinTypes4 >

using	CudaHipBuiltinTypes1 = std::tuple< char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1 >

using	CudaHipBuiltinTypes2 = std::tuple< char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2 >

using	CudaHipBuiltinTypes3 = std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >

using	CudaHipBuiltinTypes4 = std::tuple< char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4 >

template<typename TKernel >
using	HasScheduleChunkSize = std::void_t< decltype(TKernel::ompScheduleChunkSize)>
	Helper type to check if TKernel has member ompScheduleChunkSize.

template<typename TSchedule >
using	IsOmpScheduleTraitSpecialized = std::integral_constant< bool, std::is_same< TSchedule, omp::Schedule >::value >
	Helper type to check if TSchedule is a type originating from OmpSchedule trait definition.

template<typename TKernel , typename TSchedule >
using	UseScheduleKind = std::enable_if_t< sizeof(TKernel::ompScheduleKind) &&!IsOmpScheduleTraitSpecialized< TSchedule >::value >
	Helper type to check if member ompScheduleKind of TKernel should be used.

Functions
template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto	allDivisorsLessOrEqual (T const &val, T const &maxDivisor) -> std::set< T >

template<typename T >
void	assertKernelArgIsTriviallyCopyable ()

template<typename TElem , typename TDim , typename TIdx >
ALPAKA_FN_HOST_ACC constexpr auto	calculatePitchesFromExtents (Vec< TDim, TIdx > const &extent)
	Calculate the pitches purely from the extents.

template<typename TValue >
static auto	ffsFallback (TValue value) -> std::int32_t
	Fallback implementation of ffs.

template<typename TKernelFnObj , typename TAcc , typename... TArgs>
__global__ void	gpuKernel (Vec< Dim< TAcc >, Idx< TAcc > > const threadElemExtent, TKernelFnObj const kernelFnObj, TArgs... args)
	The GPU CUDA/HIP kernel entry point.

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>
ALPAKA_FN_HOST auto	nextDivisorLowerOrEqual (T const &dividend, T const &maxDivisor) -> T
	Finds the largest divisor where divident % divisor == 0.

template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >
ALPAKA_FN_HOST ALPAKA_FN_INLINE void	parallelFor (TKernel const &kernel, TLoopBody &&loopBody, TIdx const numIterations, TSchedule const &schedule)
	Run parallel OpenMP loop.

template<typename TValue >
static auto	popcountFallback (TValue value) -> std::int32_t
	Fallback implementation of popcount.

auto	trim (std::string s) -> std::string

Variables
template<typename T >
constexpr auto	isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value

template<typename TKernelFnObj , typename TAcc , typename... TArgs>
void(*	kernelName )(Vec< Dim< TAcc >, Idx< TAcc > > const, TKernelFnObj const, remove_restrict_t< std::decay_t< TArgs > >...) = gpuKernel<TKernelFnObj, TAcc, TArgs...>

Typedef Documentation

◆ atomic_ref

template<typename T >

using alpaka::detail::atomic_ref = typedef boost::atomic_ref<T>

Definition at line 28 of file AtomicAtomicRef.hpp.

◆ CudaHipBuiltinTypes

using alpaka::detail::CudaHipBuiltinTypes = typedef meta:: Concatenate<CudaHipBuiltinTypes1, CudaHipBuiltinTypes2, CudaHipBuiltinTypes3, CudaHipBuiltinTypes4>

Definition at line 63 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes1

using alpaka::detail::CudaHipBuiltinTypes1 = typedef std:: tuple<char1, double1, float1, int1, long1, longlong1, short1, uchar1, uint1, ulong1, ulonglong1, ushort1>

Definition at line 33 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes2

using alpaka::detail::CudaHipBuiltinTypes2 = typedef std:: tuple<char2, double2, float2, int2, long2, longlong2, short2, uchar2, uint2, ulong2, ulonglong2, ushort2>

Definition at line 35 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes3

using alpaka::detail::CudaHipBuiltinTypes3 = typedef std::tuple< char3, dim3, double3, float3, int3, long3, longlong3, short3, uchar3, uint3, ulong3, ulonglong3, ushort3 >

Definition at line 37 of file CudaHipCommon.hpp.

◆ CudaHipBuiltinTypes4

using alpaka::detail::CudaHipBuiltinTypes4 = typedef std:: tuple<char4, double4, float4, int4, long4, longlong4, short4, uchar4, uint4, ulong4, ulonglong4, ushort4>

Definition at line 61 of file CudaHipCommon.hpp.

◆ HasScheduleChunkSize

template<typename TKernel >

using alpaka::detail::HasScheduleChunkSize = typedef std::void_t<decltype(TKernel::ompScheduleChunkSize)>

Helper type to check if TKernel has member ompScheduleChunkSize.

Is void for those types, ill-formed otherwise.

Template Parameters

TKernel The kernel type.

Definition at line 197 of file TaskKernelCpuOmp2Blocks.hpp.

◆ IsOmpScheduleTraitSpecialized

template<typename TSchedule >

using alpaka::detail::IsOmpScheduleTraitSpecialized = typedef std::integral_constant<bool, std::is_same<TSchedule, omp::Schedule>::value>

Helper type to check if TSchedule is a type originating from OmpSchedule trait definition.

Template Parameters

TSchedule The schedule type.

Definition at line 716 of file TaskKernelCpuOmp2Blocks.hpp.

◆ UseScheduleKind

template<typename TKernel , typename TSchedule >

using alpaka::detail::UseScheduleKind = typedef std::enable_if_t<sizeof(TKernel::ompScheduleKind) && !IsOmpScheduleTraitSpecialized<TSchedule>::value>

Helper type to check if member ompScheduleKind of TKernel should be used.

For that it has to be present, and no OmpSchedule trait specialized. Is void for those types, ill-formed otherwise.

Template Parameters

TKernel	The kernel type.
TSchedule	The schedule type.

Definition at line 727 of file TaskKernelCpuOmp2Blocks.hpp.

Function Documentation

◆ allDivisorsLessOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>

ALPAKA_FN_HOST auto alpaka::detail::allDivisorsLessOrEqual	(	T const &	val,
		T const &	maxDivisor
	)		-> std::set<T>

Parameters

val	The value to find divisors of.
maxDivisor	The maximum.

Returns: A list of all divisors less then or equal to the given maximum.

Definition at line 66 of file WorkDivHelpers.hpp.

◆ assertKernelArgIsTriviallyCopyable()

template<typename T >

void alpaka::detail::assertKernelArgIsTriviallyCopyable ( )

inline

Definition at line 288 of file Traits.hpp.

◆ calculatePitchesFromExtents()

template<typename TElem , typename TDim , typename TIdx >

ALPAKA_FN_HOST_ACC constexpr auto alpaka::detail::calculatePitchesFromExtents ( Vec< TDim, TIdx > const & extent )

inlineconstexpr

Calculate the pitches purely from the extents.

Definition at line 36 of file Traits.hpp.

◆ ffsFallback()

template<typename TValue >

static auto alpaka::detail::ffsFallback ( TValue value ) -> std::int32_t

static

Fallback implementation of ffs.

Definition at line 28 of file IntrinsicFallback.hpp.

◆ gpuKernel()

template<typename TKernelFnObj , typename TAcc , typename... TArgs>

__global__ void alpaka::detail::gpuKernel	(	Vec< Dim< TAcc >, Idx< TAcc > > const	threadElemExtent,
		TKernelFnObj const	kernelFnObj,
		TArgs...	args
	)

The GPU CUDA/HIP kernel entry point.

Definition at line 63 of file TaskKernelGpuUniformCudaHipRt.hpp.

◆ nextDivisorLowerOrEqual()

template<typename T , typename = std::enable_if_t<std::is_integral_v<T>>>

ALPAKA_FN_HOST auto alpaka::detail::nextDivisorLowerOrEqual	(	T const &	dividend,
		T const &	maxDivisor
	)		-> T

Finds the largest divisor where divident % divisor == 0.

Parameters

dividend	The dividend.
maxDivisor	The maximum divisor.

Returns: The biggest number that satisfies the following conditions: 1) dividendret==0 2) ret<=maxDivisor

Definition at line 50 of file WorkDivHelpers.hpp.

◆ parallelFor()

template<typename TKernel , typename TLoopBody , typename TIdx , typename TSchedule >

ALPAKA_FN_HOST ALPAKA_FN_INLINE void alpaka::detail::parallelFor	(	TKernel const &	kernel,
		TLoopBody &&	loopBody,
		TIdx const	numIterations,
		TSchedule const &	schedule
	)

Run parallel OpenMP loop.

Template Parameters

TKernel	The kernel type.
TLoopBody	The loop body functor type.
TIdx	The index type.
TSchedule	The schedule type (not necessarily omp::Schedule).

Parameters

kernel	The kernel instance reference, not perfect=forwarded to shorten SFINAE internally.
loopBody	The loop body functor instance, takes iteration index as input.
numIterations	The number of loop iterations.
schedule	The schedule object.

Definition at line 778 of file TaskKernelCpuOmp2Blocks.hpp.

◆ popcountFallback()

template<typename TValue >

static auto alpaka::detail::popcountFallback ( TValue value ) -> std::int32_t

static

Fallback implementation of popcount.

Definition at line 15 of file IntrinsicFallback.hpp.

◆ trim()

auto alpaka::detail::trim ( std::string s ) -> std::string

inline

Definition at line 76 of file Traits.hpp.

Variable Documentation

◆ isCudaHipBuiltInType

template<typename T >

constexpr auto alpaka::detail::isCudaHipBuiltInType = meta::Contains<CudaHipBuiltinTypes, T>::value

inlineconstexpr

Definition at line 67 of file CudaHipCommon.hpp.

◆ kernelName

template<typename TKernelFnObj , typename TAcc , typename... TArgs>

void(* alpaka::detail::kernelName) (Vec< Dim< TAcc >, Idx< TAcc > > const, TKernelFnObj const, remove_restrict_t< std::decay_t< TArgs > >...)	(	Vec< Dim< TAcc >, Idx< TAcc > > const	,
		TKernelFnObj const	,
		remove_restrict_t< std::decay_t< TArgs > >	...
	)		= gpuKernel<TKernelFnObj, TAcc, TArgs...>

inline

Definition at line 83 of file TaskKernelGpuUniformCudaHipRt.hpp.

Classes

Typedefs

Functions

Variables

Typedef Documentation

◆ atomic_ref

◆ CudaHipBuiltinTypes

◆ CudaHipBuiltinTypes1

◆ CudaHipBuiltinTypes2

◆ CudaHipBuiltinTypes3

◆ CudaHipBuiltinTypes4

◆ HasScheduleChunkSize

◆ IsOmpScheduleTraitSpecialized

◆ UseScheduleKind

Function Documentation

◆ allDivisorsLessOrEqual()

◆ assertKernelArgIsTriviallyCopyable()

◆ calculatePitchesFromExtents()

◆ ffsFallback()

◆ gpuKernel()

◆ nextDivisorLowerOrEqual()

◆ parallelFor()

◆ popcountFallback()

◆ trim()

Variable Documentation

◆ isCudaHipBuiltInType

◆ kernelName