CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const { static_assert(is_poweroftwo(N), "N must be a power of two"); vec<T, N> result; static_cast<const expression_vtable<T, N>*>(vtable)->get(instance, index, result); return result; }
#include "function.hpp" #include <algorithm> #include <utility> namespace kfr { namespace internal { template <typename T, typename ReduceFn> CMT_INLINE T horizontal_impl(const vec<T, 1>& value, ReduceFn&&) { return T(value[0]); } template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && is_poweroftwo(N))> CMT_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) { return horizontal_impl(reduce(low(value), high(value)), std::forward<ReduceFn>(reduce)); } template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && !is_poweroftwo(N))> CMT_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) { const T initial = reduce(initialvalue<T>()); return horizontal_impl(widen<next_poweroftwo(N)>(value, initial), std::forward<ReduceFn>(reduce)); } } template <typename T, size_t N, typename ReduceFn> CMT_INLINE T horizontal(const vec<T, N>& value, ReduceFn&& reduce) {
template <size_t N, typename T> KFR_INTRINSIC simd<T, N> simd_read(const T* src) CMT_NOEXCEPT { return reinterpret_cast<typename simd_storage<T, N, false>::const_pointer>(src)->value; } template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> KFR_INTRINSIC vec<T, N> read(cunaligned_t, csize_t<N>, const T* src) CMT_NOEXCEPT { // Clang requires a separate function returning vector (simd). // Direct returning vec causes aligned read instruction return simd_read<N>(src); } template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N)), typename = void> KFR_INTRINSIC vec<T, N> read(cunaligned_t, csize_t<N>, const T* src) CMT_NOEXCEPT { constexpr size_t first = prev_poweroftwo(N); return concat(read(cunaligned, csize<first>, src), read(cunaligned, csize<N - first>, src + first)); } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> KFR_INTRINSIC void write(cunaligned_t, T* dest, const vec<T, N>& x) CMT_NOEXCEPT { reinterpret_cast<typename simd_storage<T, N, A>::pointer>(dest)->value = x.v; } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N)), size_t Nlow = prev_poweroftwo(N - 1)> KFR_INTRINSIC void write(cunaligned_t, T* dest, const vec<T, N>& x) CMT_NOEXCEPT