21 #include <type_traits>
42 template<
typename T,
typename = std::enable_if_t<std::is_
integral_v<T>>>
49 T divisor = maxDivisor;
50 while(dividend % divisor != 0)
58 template<
typename T,
typename = std::enable_if_t<std::is_
integral_v<T>>>
61 std::set<T> divisorSet;
67 for(T i(1); i <=
std::min(val, maxDivisor); ++i)
71 divisorSet.insert(
static_cast<T
>(val / i));
83 template<
typename TDim,
typename TIdx>
87 if((accDevProps.m_gridBlockCountMax < 1) || (accDevProps.m_blockThreadCountMax < 1)
88 || (accDevProps.m_threadElemCountMax < 1))
94 auto const gridBlockExtentMax = subVecEnd<TDim>(accDevProps.m_gridBlockExtentMax);
95 auto const blockThreadExtentMax = subVecEnd<TDim>(accDevProps.m_blockThreadExtentMax);
96 auto const threadElemExtentMax = subVecEnd<TDim>(accDevProps.m_threadElemExtentMax);
99 for(
typename TDim::value_type i(0); i < TDim::value; ++i)
102 if((gridBlockExtentMax[i] < 1) || (blockThreadExtentMax[i] < 1) || (threadElemExtentMax[i] < 1))
128 template<
typename TDim,
typename TIdx>
133 bool blockThreadMustDivideGridThreadExtent =
true,
138 using DimLoopInd =
typename TDim::value_type;
140 for(DimLoopInd i(0); i < TDim::value; ++i)
144 ALPAKA_ASSERT(threadElemExtent[i] <= accDevProps.m_threadElemExtentMax[i]);
146 ALPAKA_ASSERT(threadElemExtent.prod() <= accDevProps.m_threadElemCountMax);
151 auto const clippedThreadElemExtent =
elementwise_min(threadElemExtent, gridElemExtent);
152 auto const gridThreadExtent = [&]
155 for(DimLoopInd i(0u); i < TDim::value; ++i)
156 r[i] =
core::divCeil(gridElemExtent[i], clippedThreadElemExtent[i]);
166 auto blockThreadExtent =
elementwise_min(accDevProps.m_blockThreadExtentMax, gridThreadExtent);
171 blockThreadExtent =
Vec::all(blockThreadExtent.min());
174 auto const& blockThreadCountMax = accDevProps.m_blockThreadCountMax;
175 if(blockThreadExtent.prod() > blockThreadCountMax)
177 switch(gridBlockExtentSubDivRestrictions)
184 while(blockThreadExtent.prod() > blockThreadCountMax)
185 blockThreadExtent[blockThreadExtent.maxElem()] /= TIdx{2};
189 while(blockThreadExtent.prod() > blockThreadCountMax)
191 auto const it = std::min_element(
192 blockThreadExtent.begin(),
193 blockThreadExtent.end() - 1,
194 [](TIdx
const& a, TIdx
const& b)
209 if(blockThreadMustDivideGridThreadExtent)
211 switch(gridBlockExtentSubDivRestrictions)
218 std::array<std::set<TIdx>, TDim::value> gridThreadExtentDivisors;
219 for(DimLoopInd i(0u); i < TDim::value; ++i)
221 gridThreadExtentDivisors[i]
225 std::set<TIdx> intersects[2u];
226 for(DimLoopInd i(1u); i < TDim::value; ++i)
228 intersects[(i - 1u) % 2u] = gridThreadExtentDivisors[0];
229 intersects[(i) % 2u].clear();
232 std::end(intersects[(i - 1u) % 2u]),
234 std::end(gridThreadExtentDivisors[i]),
235 std::inserter(intersects[i % 2],
std::begin(intersects[i % 2u])));
237 TIdx
const maxCommonDivisor = *(--
std::end(intersects[(TDim::value - 1) % 2u]));
238 blockThreadExtent =
Vec::all(maxCommonDivisor);
244 for(DimLoopInd i(0u); i < TDim::value; ++i)
251 auto const gridBlockExtent = [&]
254 for(DimLoopInd i = 0; i < TDim::value; ++i)
255 r[i] =
core::divCeil(gridThreadExtent[i], blockThreadExtent[i]);
259 return WorkDivMembers<TDim, TIdx>(gridBlockExtent, blockThreadExtent, clippedThreadElemExtent);
282 typename TGridElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>,
283 typename TThreadElemExtent = Vec<Dim<TAcc>, Idx<TAcc>>>
285 [[maybe_unused]] TDev
const& dev,
288 [[maybe_unused]]
bool blockThreadMustDivideGridThreadExtent =
true,
290 = GridBlockExtentSubDivRestrictions::Unrestricted)
295 "The dimension of TAcc and the dimension of TGridElemExtent have to be identical!");
298 "The dimension of TAcc and the dimension of TThreadElemExtent have to be identical!");
301 "The idx type of TAcc and the idx type of TGridElemExtent have to be identical!");
304 "The idx type of TAcc and the idx type of TThreadElemExtent have to be identical!");
317 getAccDevProps<TAcc>(dev),
318 blockThreadMustDivideGridThreadExtent,
319 gridBlockExtentSubDivRestrictions);
330 template<
typename TDim,
typename TIdx,
typename TWorkDiv>
334 auto const gridBlockExtent = getWorkDiv<Grid, Blocks>(workDiv);
335 auto const blockThreadExtent = getWorkDiv<Block, Threads>(workDiv);
336 auto const threadElemExtent = getWorkDiv<Block, Threads>(workDiv);
339 if(accDevProps.m_gridBlockCountMax < gridBlockExtent.prod())
343 if(accDevProps.m_blockThreadCountMax < blockThreadExtent.prod())
347 if(accDevProps.m_threadElemCountMax < threadElemExtent.prod())
356 auto const gridBlockExtentMax = subVecEnd<Dim<TWorkDiv>>(accDevProps.m_gridBlockExtentMax);
357 auto const blockThreadExtentMax = subVecEnd<Dim<TWorkDiv>>(accDevProps.m_blockThreadExtentMax);
358 auto const threadElemExtentMax = subVecEnd<Dim<TWorkDiv>>(accDevProps.m_threadElemExtentMax);
363 if((gridBlockExtent[i] < 1) || (blockThreadExtent[i] < 1) || (threadElemExtent[i] < 1)
364 || (gridBlockExtentMax[i] < gridBlockExtent[i]) || (blockThreadExtentMax[i] < blockThreadExtent[i])
365 || (threadElemExtentMax[i] < threadElemExtent[i]))
379 template<
typename TAcc,
typename TDev,
typename TWorkDiv>
#define ALPAKA_ASSERT(...)
The assert can be explicit disabled by defining NDEBUG.
#define ALPAKA_UNREACHABLE(...)
Before CUDA 11.5 nvcc is unable to correctly identify return statements in 'if constexpr' branches....
ALPAKA_NO_HOST_ACC_WARNING static constexpr ALPAKA_FN_HOST_ACC auto all(TVal const &val) -> Vec< TDim, TVal >
Single value constructor.
A basic class holding the work division as grid block extent, block thread and thread element extent.
constexpr ALPAKA_FN_HOST_ACC auto divCeil(Integral a, Integral b) -> Integral
Returns the ceiling of a / b, as integer.
constexpr ALPAKA_FN_HOST_ACC auto nthRootFloor(Integral value, Integral n) -> Integral
Computes the floor of the nth root of value, in integers.
ALPAKA_NO_HOST_ACC_WARNING constexpr ALPAKA_FN_HOST_ACC auto assertValueUnsigned(TArg const &arg) -> void
This method checks integral values if they are greater or equal zero. The implementation prevents war...
ALPAKA_FN_HOST auto nextDivisorLowerOrEqual(T const ÷nd, T const &maxDivisor) -> T
Finds the largest divisor where divident % divisor == 0.
ALPAKA_FN_HOST auto allDivisorsLessOrEqual(T const &val, T const &maxDivisor) -> std::set< T >
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto min(T const &min_ctx, Tx const &x, Ty const &y)
Returns the smaller of two arguments. NaNs are treated as missing data (between a NaN and a numeric v...
ALPAKA_FN_HOST auto end(TView &view) -> Iterator< TView >
ALPAKA_FN_HOST auto begin(TView &view) -> Iterator< TView >
The alpaka accelerator library.
typename trait::IdxType< T >::type Idx
ALPAKA_FN_HOST auto getValidWorkDiv([[maybe_unused]] TDev const &dev, [[maybe_unused]] TGridElemExtent const &gridElemExtent=Vec< Dim< TAcc >, Idx< TAcc >>::ones(), [[maybe_unused]] TThreadElemExtent const &threadElemExtents=Vec< Dim< TAcc >, Idx< TAcc >>::ones(), [[maybe_unused]] bool blockThreadMustDivideGridThreadExtent=true, [[maybe_unused]] GridBlockExtentSubDivRestrictions gridBlockExtentSubDivRestrictions=GridBlockExtentSubDivRestrictions::Unrestricted) -> WorkDivMembers< Dim< TGridElemExtent >, Idx< TGridElemExtent >>
ALPAKA_FN_HOST auto isValidWorkDiv(TDev const &dev, TWorkDiv const &workDiv) -> bool
ALPAKA_NO_HOST_ACC_WARNING ALPAKA_FN_HOST_ACC auto getExtents(T const &object) -> Vec< Dim< T >, Idx< T >>
ALPAKA_FN_HOST auto isValidAccDevProps(AccDevProps< TDim, TIdx > const &accDevProps) -> bool
ALPAKA_FN_HOST auto subDivideGridElems(Vec< TDim, TIdx > const &gridElemExtent, Vec< TDim, TIdx > const &threadElemExtent, AccDevProps< TDim, TIdx > const &accDevProps, bool blockThreadMustDivideGridThreadExtent=true, GridBlockExtentSubDivRestrictions gridBlockExtentSubDivRestrictions=GridBlockExtentSubDivRestrictions::Unrestricted) -> WorkDivMembers< TDim, TIdx >
Subdivides the given grid thread extent into blocks restricted by the maxima allowed.
GridBlockExtentSubDivRestrictions
The grid block extent subdivision restrictions.
@ Unrestricted
The block thread extent will not have any restrictions.
@ CloseToEqualExtent
The block thread extent will be as close to equal as possible in all dimensions.
@ EqualExtent
The block thread extent will be equal in all dimensions.
ALPAKA_NO_HOST_ACC_WARNING constexpr ALPAKA_FN_HOST_ACC auto elementwise_min(Vec< TDim, TVal > const &p, Vecs const &... qs) -> Vec< TDim, TVal >
Vec(TFirstIndex &&, TRestIndices &&...) -> Vec< DimInt< 1+sizeof...(TRestIndices)>, std::decay_t< TFirstIndex >>
typename trait::DimType< T >::type Dim
The dimension type trait alias template to remove the ::type.
The acceleration properties on a device.