template<bool align> SIMD_INLINE v128_u8 BlurRow(const Buffer & buffer, size_t offset) { v128_u16 lo = DivideBy16(BinomialSum(Load<align>(buffer.src0 + offset), Load<align>(buffer.src1 + offset), Load<align>(buffer.src2 + offset))); offset += HA; v128_u16 hi = DivideBy16(BinomialSum(Load<align>(buffer.src0 + offset), Load<align>(buffer.src1 + offset), Load<align>(buffer.src2 + offset))); return vec_pack(lo, hi); }
template<bool abs,int sequence> SIMD_INLINE v8i16 SobelDx(v16u8 a[3][3]) { v8i16 d0,d1,d2; d0 = ExtendSub<sequence>(a[0][2],a[0][0]); d1 = ExtendSub<sequence>(a[1][2],a[1][0]); d2 = ExtendSub<sequence>(a[2][2],a[2][0]); v8i16 b = BinomialSum(d0,d1,d2); return ConditionalAbs<abs>(b); }
template<bool align> SIMD_INLINE void BlurCol(v128_u8 a[3], uint16_t * b) { Store<align>(b, BinomialSum(UnpackLoU8(a[0]), UnpackLoU8(a[1]), UnpackLoU8(a[2]))); Store<align>(b + HA, BinomialSum(UnpackHiU8(a[0]), UnpackHiU8(a[1]), UnpackHiU8(a[2]))); }
template <bool compensation> SIMD_INLINE v128_u8 ReduceRow(const v128_u16 lo[3], const v128_u16 hi[3]) { return vec_packsu( DivideBy16<compensation>(BinomialSum(lo[0], lo[1], lo[2])), DivideBy16<compensation>(BinomialSum(hi[0], hi[1], hi[2]))); }
template<bool align> SIMD_INLINE v128_u16 ReduceColBody(const uint8_t * p) { const v128_u8 t0 = Load<false>(p - 1); const v128_u8 t1 = Load<align>(p); return BinomialSum(vec_mule(t0, K8_01), vec_mule(t1, K8_01), vec_mulo(t1, K8_01)); }
template<bool align> SIMD_INLINE v128_u16 ReduceColNose(const uint8_t * p) { const v128_u8 t = Load<align>(p); return BinomialSum(vec_mule(LoadBeforeFirst<1>(t), K8_01), vec_mule(t, K8_01), vec_mulo(t, K8_01)); }