template<bool align> SIMD_INLINE v128_u8 BlurRow(const Buffer & buffer, size_t offset) { v128_u16 lo = DivideBy16(BinomialSum(Load<align>(buffer.src0 + offset), Load<align>(buffer.src1 + offset), Load<align>(buffer.src2 + offset))); offset += HA; v128_u16 hi = DivideBy16(BinomialSum(Load<align>(buffer.src0 + offset), Load<align>(buffer.src1 + offset), Load<align>(buffer.src2 + offset))); return vec_pack(lo, hi); }
template<bool align> SIMD_INLINE __m128i BlurRow16(const Buffer & buffer, size_t offset) { return DivideBy16(BinomialSum16( Load<align>((__m128i*)(buffer.src0 + offset)), Load<align>((__m128i*)(buffer.src1 + offset)), Load<align>((__m128i*)(buffer.src2 + offset)))); }