static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0, const __m128i* q1, int thresh, __m128i *mask) { __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1) *mask = _mm_set1_epi8(0xFE); t1 = _mm_and_si128(t1, *mask); // set lsb of each byte to zero t1 = _mm_srli_epi16(t1, 1); // abs(p1 - q1) / 2 *mask = MM_ABS(*p0, *q0); // abs(p0 - q0) *mask = _mm_adds_epu8(*mask, *mask); // abs(p0 - q0) * 2 *mask = _mm_adds_epu8(*mask, t1); // abs(p0 - q0) * 2 + abs(p1 - q1) / 2 t1 = _mm_set1_epi8(thresh); *mask = _mm_subs_epu8(*mask, t1); // mask <= thresh *mask = _mm_cmpeq_epi8(*mask, _mm_setzero_si128()); }
// input/output is uint8_t static WEBP_INLINE void GetNotHEV(const __m128i* const p1, const __m128i* const p0, const __m128i* const q0, const __m128i* const q1, int hev_thresh, __m128i* const not_hev) { const __m128i zero = _mm_setzero_si128(); const __m128i t_1 = MM_ABS(*p1, *p0); const __m128i t_2 = MM_ABS(*q1, *q0); const __m128i h = _mm_set1_epi8(hev_thresh); const __m128i t_3 = _mm_subs_epu8(t_1, h); // abs(p1 - p0) - hev_tresh const __m128i t_4 = _mm_subs_epu8(t_2, h); // abs(q1 - q0) - hev_tresh *not_hev = _mm_or_si128(t_3, t_4); *not_hev = _mm_cmpeq_epi8(*not_hev, zero); // not_hev <= t1 && not_hev <= t2 }
// input pixels are uint8_t static WEBP_INLINE void NeedsFilter(const __m128i* const p1, const __m128i* const p0, const __m128i* const q0, const __m128i* const q1, int thresh, __m128i* const mask) { const __m128i m_thresh = _mm_set1_epi8(thresh); const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1) const __m128i kFE = _mm_set1_epi8(0xFE); const __m128i t2 = _mm_and_si128(t1, kFE); // set lsb of each byte to zero const __m128i t3 = _mm_srli_epi16(t2, 1); // abs(p1 - q1) / 2 const __m128i t4 = MM_ABS(*p0, *q0); // abs(p0 - q0) const __m128i t5 = _mm_adds_epu8(t4, t4); // abs(p0 - q0) * 2 const __m128i t6 = _mm_adds_epu8(t5, t3); // abs(p0-q0)*2 + abs(p1-q1)/2 const __m128i t7 = _mm_subs_epu8(t6, m_thresh); // mask <= m_thresh *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128()); }