13 #include <type_traits>
31 template<
typename Simd,
typename SFINAE =
void>
34 static_assert(
sizeof(
Simd) == 0,
"Please specialize SimdTraits for the type Simd");
39 struct SimdTraits<T, std::enable_if_t<std::is_arithmetic_v<T>>>
43 inline static constexpr std::size_t lanes = 1;
57 return mem[indices[0]];
69 template<
typename Simd,
typename SFINAE =
void>
77 template<
typename RecordDim,
template<
typename>
typename MakeSimd,
typename BinaryReductionFunction>
81 std::size_t lanes = simdLanes<MakeSimd<mp_first<FRD>>>;
82 mp_for_each_inline<mp_transform<std::add_pointer_t, mp_drop_c<FRD, 1>>>(
85 using T = std::remove_reference_t<decltype(*t)>;
86 lanes = reduce(lanes,
simdLanes<MakeSimd<T>>);
99 template<
typename RecordDim,
template<
typename>
typename MakeSimd>
101 = chooseSimdLanes<RecordDim, MakeSimd>([](
auto a,
auto b) {
return std::max(a, b); });
110 template<
typename RecordDim,
template<
typename>
typename MakeSimd>
112 = chooseSimdLanes<RecordDim, MakeSimd>([](
auto a,
auto b) {
return std::min(a, b); });
116 template<std::size_t N,
template<
typename,
auto>
typename MakeSizedSimd>
120 using fn = MakeSizedSimd<U, N>;
126 template<
typename,
auto>
127 typename MakeSizedSimd>
133 template<
typename RecordDim,
template<
typename,
auto>
typename MakeSizedSimd>
144 template<
typename RecordDim, std::size_t N,
template<
typename,
auto>
typename MakeSizedSimd>
150 template<
typename RecordDim,
template<
typename>
typename MakeSimd>
158 template<
typename T, std::size_t N,
template<
typename,
auto>
typename MakeSizedSimd>
159 using SimdN =
typename std::conditional_t<
161 std::conditional_t<N == 1, mp_identity<One<T>>, mp_identity<One<SimdizeN<T, N, MakeSizedSimd>>>>,
162 std::conditional_t<N == 1, mp_identity<T>, mp_identity<SimdizeN<T, N, MakeSizedSimd>>>>::type;
167 template<
typename T,
template<
typename>
typename MakeSimd>
169 conditional_t<isRecordDim<T>, mp_identity<One<Simdize<T, MakeSimd>>>, mp_identity<Simdize<T, MakeSimd>>>::type;
173 template<std::
size_t S>
176 template<
typename Simd>
177 using fn = std::bool_constant<simdLanes<Simd> == S>;
184 template<
typename Simd>
185 inline constexpr std::size_t simdLanes<Simd, std::enable_if_t<isRecordRef<Simd>>> = []
188 using FirstFieldType = mp_first<FRD>;
190 return simdLanes<FirstFieldType>;
195 template<
typename AoSMapping,
typename ElementType, std::
size_t Lanes>
201 /
sizeof(ElementType);
202 std::array<int, Lanes> indices{};
203 for(
int i = 0; i < static_cast<int>(Lanes); i++)
204 indices[i] = i * stride;
208 template<
typename T,
typename Simd,
typename SrcRC,
typename DstRC>
212 using ElementSimd = std::decay_t<decltype(dstSimd(dstRC))>;
215 auto loadElementWise = [&]
218 for(std::size_t i = 0; i < Traits::lanes; i++)
219 reinterpret_cast<FieldType*
>(&dstSimd(dstRC))[i]
220 = srcRef.view(*b++)(
cat(
typename T::BoundRecordCoord{}, srcRC));
224 using Mapping =
typename T::View::Mapping;
225 if constexpr(mapping::isSoA<Mapping>)
228 dstSimd(dstRC) = Traits::loadUnaligned(&srcRef(srcRC));
231 else if constexpr(mapping::isAoSoA<typename T::View::Mapping>)
234 if(T::View::Mapping::ArrayExtents::rank == 1 && srcRef.arrayIndex()[0] % Traits::lanes == 0
235 && T::View::Mapping::lanes >= Traits::lanes)
238 dstSimd(dstRC) = Traits::loadUnaligned(&srcRef(srcRC));
244 else if constexpr(mapping::isAoS<Mapping>)
247 dstSimd(dstRC) = Traits::gather(&srcRef(srcRC), aosStridedIndices<Mapping, FieldType, Traits::lanes>);
254 template<
typename Simd,
typename TFwd,
typename SrcRC,
typename DstRC>
257 using T = std::remove_reference_t<TFwd>;
259 using ElementSimd = std::decay_t<decltype(srcSimd(srcRC))>;
262 auto storeElementWise = [&]
267 for(std::size_t i = 0; i < Traits::lanes; i++)
268 dstRef.view (*b++)(
cat(
typename T::BoundRecordCoord{}, dstRC))
269 =
reinterpret_cast<const FieldType*
>(&srcSimd(srcRC))[i];
273 using Mapping =
typename std::remove_reference_t<T>::View::Mapping;
274 if constexpr(mapping::isSoA<Mapping>)
277 Traits::storeUnaligned(srcSimd(srcRC), &dstRef(dstRC));
280 else if constexpr(mapping::isAoSoA<typename T::View::Mapping>)
283 if(T::View::Mapping::ArrayExtents::rank == 1 && dstRef.arrayIndex()[0] % Traits::lanes == 0
284 && T::View::Mapping::lanes >= Traits::lanes)
287 Traits::storeUnaligned(srcSimd(srcRC), &dstRef(dstRC));
293 else if constexpr(mapping::isAoS<Mapping>)
296 Traits::scatter(srcSimd(srcRC), &dstRef(dstRC), aosStridedIndices<Mapping, FieldType, Traits::lanes>);
309 template<
typename T,
typename Simd>
313 if constexpr(isRecordRef<Simd> && isRecordRef<T>)
315 if constexpr(simdLanes<Simd> == simdLanes<T>)
319 using SrcARD =
typename T::AccessibleRecordDim;
320 using DstArd =
typename Simd::AccessibleRecordDim;
321 if constexpr(std::is_same_v<SrcARD, DstArd>)
328 forEachLeafCoord<SrcARD>(
331 using SrcInnerCoord = decltype(srcRC);
332 forEachLeafCoord<DstArd>(
335 using DstInnerCoord = decltype(dstRC);
336 if constexpr(hasSameTags<SrcARD, SrcInnerCoord, DstArd, DstInnerCoord>)
346 else if constexpr(!isRecordRef<Simd> && !isRecordRef<T>)
355 static_assert(
sizeof(
Simd) == 0,
"Invalid combination of Simd type and reference type");
364 template<
typename Simd,
typename TFwd>
367 using T = std::decay_t<TFwd>;
369 if constexpr(isRecordRef<Simd> && isRecordRef<T>)
371 if constexpr(simdLanes<Simd> == simdLanes<T>)
375 using SrcARD =
typename Simd::AccessibleRecordDim;
376 using DstArd =
typename T::AccessibleRecordDim;
377 if constexpr(std::is_same_v<SrcARD, DstArd>)
384 forEachLeafCoord<SrcARD>(
387 using SrcInnerCoord = decltype(srcRC);
388 forEachLeafCoord<DstArd>(
391 using DstInnerCoord = decltype(dstRC);
392 if constexpr(hasSameTags<SrcARD, SrcInnerCoord, DstArd, DstInnerCoord>)
402 else if constexpr(!isRecordRef<Simd> && !isRecordRef<T>)
411 static_assert(
sizeof(
Simd) == 0,
"Invalid combination of Simd type and reference type");
418 template<
typename,
auto>
419 typename MakeSizedSimd,
421 typename UnarySimdFunction>
424 using IndexType =
typename View::Mapping::ArrayExtents::value_type;
426 auto it = view.
begin();
429 while(i + IndexType{N} <= total)
433 if constexpr(std::is_void_v<decltype(f(simd))>)
444 if constexpr(std::is_void_v<decltype(f(scalar))>)
457 template<
typename,
auto>
458 typename MakeSizedSimd,
460 typename UnarySimdFunction>
463 constexpr
auto n = llama::simdLanesWithFullVectorsFor<typename View::RecordDim, MakeSimd>;
464 simdForEachN<n, MakeSizedSimd>(view, f);
#define LLAMA_LAMBDA_INLINE
Gives strong indication to the compiler to inline the attributed lambda.
#define LLAMA_BEGIN_SUPPRESS_HOST_DEVICE_WARNING
#define LLAMA_CONSTEVAL
Expands to consteval if the compilers supports the keyword, otherwise to constexpr.
#define LLAMA_FN_HOST_ACC_INLINE
#define LLAMA_END_SUPPRESS_HOST_DEVICE_WARNING
void loadSimdFromField(const T &srcRef, Simd &dstSimd, SrcRC srcRC, DstRC dstRC)
void storeSimdToField(const Simd &srcSimd, TFwd &&dstRef, SrcRC srcRC, DstRC dstRC)
constexpr auto aosStridedIndices
typename internal::FlattenRecordDimImpl< RecordDim >::type FlatRecordDim
Returns a flat type list containing all leaf field types of the given record dimension.
typename std::conditional_t< isRecordDim< T >, std::conditional_t< N==1, mp_identity< One< T > >, mp_identity< One< SimdizeN< T, N, MakeSizedSimd > >> >, std::conditional_t< N==1, mp_identity< T >, mp_identity< SimdizeN< T, N, MakeSizedSimd > >> >::type SimdN
constexpr std::size_t simdLanesWithFullVectorsFor
constexpr auto cat(RecordCoords...)
Concatenate a set of RecordCoords instances.
void simdForEachN(View &view, UnarySimdFunction f)
void loadSimd(const T &srcRef, Simd &dstSimd)
TransformLeavesWithCoord< RecordDim, internal::MakePassSecond< FieldTypeFunctor >::template fn > TransformLeaves
typename internal::SimdizeNImpl< RecordDim, N, MakeSizedSimd >::type SimdizeN
constexpr auto product(Array< T, N > a) -> T
constexpr auto chooseSimdLanes(BinaryReductionFunction reduce) -> std::size_t
constexpr std::size_t flatSizeOf
The size of a type list if its elements would be in a normal struct.
constexpr std::size_t simdLanesWithLeastRegistersFor
typename internal::GetTypeImpl< RecordDim, RecordCoordOrTags... >::type GetType
void simdForEach(View &view, UnarySimdFunction f)
void storeSimd(const Simd &srcSimd, TFwd &&dstRef)
typename std::conditional_t< isRecordDim< T >, mp_identity< One< Simdize< T, MakeSimd > >>, mp_identity< Simdize< T, MakeSimd > >>::type Simd
TransformLeaves< RecordDim, MakeSimd > Simdize
Iterator supporting ArrayIndexRange.
static auto gather(const value_type *mem, std::array< int, lanes > indices) -> T
static void scatter(T t, value_type *mem, std::array< int, lanes > indices)
static void storeUnaligned(T t, T *mem)
static auto loadUnaligned(const T *mem) -> T
auto mapping() -> Mapping &
TransformLeaves< RecordDim, internal::BindMakeSizedSimd< N, MakeSizedSimd >::template fn > type
std::bool_constant< simdLanes< Simd >==S > fn