Example #1
static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
                        const __m128i* q1, int thresh, __m128i *mask) {
    __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
    *mask = _mm_set1_epi8(0xFE);
    t1 = _mm_and_si128(t1, *mask);        // set lsb of each byte to zero
    t1 = _mm_srli_epi16(t1, 1);           // abs(p1 - q1) / 2

    *mask = MM_ABS(*p0, *q0);             // abs(p0 - q0)
    *mask = _mm_adds_epu8(*mask, *mask);  // abs(p0 - q0) * 2
    *mask = _mm_adds_epu8(*mask, t1);     // abs(p0 - q0) * 2 + abs(p1 - q1) / 2

    t1 = _mm_set1_epi8(thresh);
    *mask = _mm_subs_epu8(*mask, t1);     // mask <= thresh
    *mask = _mm_cmpeq_epi8(*mask, _mm_setzero_si128());
Example #2
// input/output is uint8_t
static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
                                  const __m128i* const p0,
                                  const __m128i* const q0,
                                  const __m128i* const q1,
                                  int hev_thresh, __m128i* const not_hev) {
    const __m128i zero = _mm_setzero_si128();
    const __m128i t_1 = MM_ABS(*p1, *p0);
    const __m128i t_2 = MM_ABS(*q1, *q0);

    const __m128i h = _mm_set1_epi8(hev_thresh);
    const __m128i t_3 = _mm_subs_epu8(t_1, h);  // abs(p1 - p0) - hev_tresh
    const __m128i t_4 = _mm_subs_epu8(t_2, h);  // abs(q1 - q0) - hev_tresh

    *not_hev = _mm_or_si128(t_3, t_4);
    *not_hev = _mm_cmpeq_epi8(*not_hev, zero);  // not_hev <= t1 && not_hev <= t2
Example #3
// input pixels are uint8_t
static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
                                    const __m128i* const p0,
                                    const __m128i* const q0,
                                    const __m128i* const q1,
                                    int thresh, __m128i* const mask) {
    const __m128i m_thresh = _mm_set1_epi8(thresh);
    const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
    const __m128i kFE = _mm_set1_epi8(0xFE);
    const __m128i t2 = _mm_and_si128(t1, kFE);  // set lsb of each byte to zero
    const __m128i t3 = _mm_srli_epi16(t2, 1);   // abs(p1 - q1) / 2

    const __m128i t4 = MM_ABS(*p0, *q0);        // abs(p0 - q0)
    const __m128i t5 = _mm_adds_epu8(t4, t4);   // abs(p0 - q0) * 2
    const __m128i t6 = _mm_adds_epu8(t5, t3);   // abs(p0-q0)*2 + abs(p1-q1)/2

    const __m128i t7 = _mm_subs_epu8(t6, m_thresh);  // mask <= m_thresh
    *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());