Пример #1
0
		template <bool align> SIMD_INLINE uint8x8_t ReduceRow(const Buffer & buffer, size_t offset)
		{
			return vmovn_u16(DivideBy64(BinomialSum16(
				Load<align>(buffer.src0 + offset), Load<align>(buffer.src1 + offset),
				Load<align>(buffer.src2 + offset), Load<align>(buffer.src3 + offset))));
		}
 template <bool align> SIMD_INLINE __m128i ReduceRow(const Buffer & buffer, size_t offset)
 {
     return _mm_packus_epi16(_mm_and_si128(DivideBy64(BinomialSum16(
         Load<align>((__m128i*)(buffer.src0 + offset)), Load<align>((__m128i*)(buffer.src1 + offset)),
         Load<align>((__m128i*)(buffer.src2 + offset)), Load<align>((__m128i*)(buffer.src3 + offset)))), K16_00FF), K_ZERO);
 }
 template <bool align> SIMD_INLINE __m256i ReduceRow16(const Buffer & buffer, size_t offset)
 {
     return _mm256_and_si256(DivideBy64(BinomialSum16(
         Load<align>((__m256i*)(buffer.src0 + offset)), Load<align>((__m256i*)(buffer.src1 + offset)),
         Load<align>((__m256i*)(buffer.src2 + offset)), Load<align>((__m256i*)(buffer.src3 + offset)))), K16_00FF);
 }