コード例 #1
0
ファイル: dec_sse2.c プロジェクト: keenliu/cuzySample
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
static WEBP_INLINE void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
                                  __m128i* q0, __m128i* q1, __m128i *q2,
                                  const __m128i* mask, int hev_thresh) {
    __m128i a, not_hev;
    const __m128i sign_bit = _mm_set1_epi8(0x80);

    // compute hev mask
    GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);

    // convert to signed values
    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
    FLIP_SIGN_BIT2(*p2, *q2);

    GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);

    {   // do simple filter on pixels with hev
        const __m128i m = _mm_andnot_si128(not_hev, *mask);
        const __m128i f = _mm_and_si128(a, m);
        DO_SIMPLE_FILTER(*p0, *q0, f);
    }
    {   // do strong filter on pixels with not hev
        const __m128i zero = _mm_setzero_si128();
        const __m128i nine = _mm_set1_epi16(0x0900);
        const __m128i sixty_three = _mm_set1_epi16(63);

        const __m128i m = _mm_and_si128(not_hev, *mask);
        const __m128i f = _mm_and_si128(a, m);
        const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
        const __m128i f_hi = _mm_unpackhi_epi8(zero, f);

        const __m128i f9_lo = _mm_mulhi_epi16(f_lo, nine);   // Filter (lo) * 9
        const __m128i f9_hi = _mm_mulhi_epi16(f_hi, nine);   // Filter (hi) * 9
        const __m128i f18_lo = _mm_add_epi16(f9_lo, f9_lo);  // Filter (lo) * 18
        const __m128i f18_hi = _mm_add_epi16(f9_hi, f9_hi);  // Filter (hi) * 18

        const __m128i a2_lo = _mm_add_epi16(f9_lo, sixty_three);  // Filter * 9 + 63
        const __m128i a2_hi = _mm_add_epi16(f9_hi, sixty_three);  // Filter * 9 + 63

        const __m128i a1_lo = _mm_add_epi16(f18_lo, sixty_three);  // F... * 18 + 63
        const __m128i a1_hi = _mm_add_epi16(f18_hi, sixty_three);  // F... * 18 + 63

        const __m128i a0_lo = _mm_add_epi16(f18_lo, a2_lo);  // Filter * 27 + 63
        const __m128i a0_hi = _mm_add_epi16(f18_hi, a2_hi);  // Filter * 27 + 63

        UPDATE_2PIXELS(*p2, *q2, a2_lo, a2_hi);
        UPDATE_2PIXELS(*p1, *q1, a1_lo, a1_hi);
        UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi);
    }

    // unoffset
    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
    FLIP_SIGN_BIT2(*p2, *q2);
}
コード例 #2
0
ファイル: dec_sse2.c プロジェクト: 8l/insieme
// Applies filter on 2 pixels (p0 and q0)
static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
                                  __m128i* const q0, __m128i* const q1,
                                  int thresh) {
    __m128i a, mask;
    const __m128i sign_bit = _mm_set1_epi8(0x80);
    // convert p1/q1 to int8_t (for GetBaseDelta)
    __m128i p1s = _mm_xor_si128(*p1, sign_bit);   // insieme: dropped const
    __m128i q1s = _mm_xor_si128(*q1, sign_bit);   // insieme: dropped const

    NeedsFilter(p1, p0, q0, q1, thresh, &mask);

    FLIP_SIGN_BIT2(*p0, *q0);
    GetBaseDelta(&p1s, p0, q0, &q1s, &a);
    a = _mm_and_si128(a, mask);     // mask filter values we don't care about
    DoSimpleFilter(p0, q0, &a);
    FLIP_SIGN_BIT2(*p0, *q0);
}
コード例 #3
0
ファイル: dec_sse2.c プロジェクト: keenliu/cuzySample
// Applies filter on 2 pixels (p0 and q0)
static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
                                  const __m128i* q1, int thresh) {
    __m128i a, mask;
    const __m128i sign_bit = _mm_set1_epi8(0x80);
    const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
    const __m128i q1s = _mm_xor_si128(*q1, sign_bit);

    NeedsFilter(p1, p0, q0, q1, thresh, &mask);

    // convert to signed values
    FLIP_SIGN_BIT2(*p0, *q0);

    GET_BASE_DELTA(p1s, *p0, *q0, q1s, a);
    a = _mm_and_si128(a, mask);     // mask filter values we don't care about
    DO_SIMPLE_FILTER(*p0, *q0, a);

    // unoffset
    FLIP_SIGN_BIT2(*p0, *q0);
}
コード例 #4
0
ファイル: dec_sse2.c プロジェクト: 8l/insieme
// Applies filter on 4 pixels (p1, p0, q0 and q1)
static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
                                  __m128i* const q0, __m128i* const q1,
                                  const __m128i* const mask, int hev_thresh) {
    const __m128i sign_bit = _mm_set1_epi8(0x80);
    const __m128i k64 = _mm_set1_epi8(0x40);
    const __m128i zero = _mm_setzero_si128();
    __m128i not_hev;
    __m128i t1, t2, t3;

    // compute hev mask
    GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);

    // convert to signed values
    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);

    t1 = _mm_subs_epi8(*p1, *q1);        // p1 - q1
    t1 = _mm_andnot_si128(not_hev, t1);  // hev(p1 - q1)
    t2 = _mm_subs_epi8(*q0, *p0);        // q0 - p0
    t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 1 * (q0 - p0)
    t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 2 * (q0 - p0)
    t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
    t1 = _mm_and_si128(t1, *mask);       // mask filter values we don't care about

    t2 = _mm_set1_epi8(3);
    t3 = _mm_set1_epi8(4);
    t2 = _mm_adds_epi8(t1, t2);        // 3 * (q0 - p0) + (p1 - q1) + 3
    t3 = _mm_adds_epi8(t1, t3);        // 3 * (q0 - p0) + (p1 - q1) + 4
    SignedShift8b(&t2);                // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
    SignedShift8b(&t3);                // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
    *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
    *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
    FLIP_SIGN_BIT2(*p0, *q0);

    // this is equivalent to signed (a + 1) >> 1 calculation
    t2 = _mm_add_epi8(t3, sign_bit);
    t3 = _mm_avg_epu8(t2, zero);
    t3 = _mm_sub_epi8(t3, k64);

    t3 = _mm_and_si128(not_hev, t3);   // if !hev
    *q1 = _mm_subs_epi8(*q1, t3);      // q1 -= t3
    *p1 = _mm_adds_epi8(*p1, t3);      // p1 += t3
    FLIP_SIGN_BIT2(*p1, *q1);
}
コード例 #5
0
ファイル: dec_sse2.c プロジェクト: 8l/insieme
// Updates values of 2 pixels at MB edge during complex filtering.
// Update operations:
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
                                      const __m128i* const a0_lo,
                                      const __m128i* const a0_hi) {
    const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
    const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
    const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
    const __m128i sign_bit = _mm_set1_epi8(0x80);
    *pi = _mm_adds_epi8(*pi, delta);
    *qi = _mm_subs_epi8(*qi, delta);
    FLIP_SIGN_BIT2(*pi, *qi);
}
コード例 #6
0
ファイル: dec_sse2.c プロジェクト: 8l/insieme
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
                                  __m128i* const p0, __m128i* const q0,
                                  __m128i* const q1, __m128i* const q2,
                                  const __m128i* const mask, int hev_thresh) {
    const __m128i zero = _mm_setzero_si128();
    const __m128i sign_bit = _mm_set1_epi8(0x80);
    __m128i a, not_hev;

    // compute hev mask
    GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);

    FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
    FLIP_SIGN_BIT2(*p2, *q2);
    GetBaseDelta(p1, p0, q0, q1, &a);

    {   // do simple filter on pixels with hev
        const __m128i m = _mm_andnot_si128(not_hev, *mask);
        __m128i f = _mm_and_si128(a, m);   // insieme: dropped const
        DoSimpleFilter(p0, q0, &f);
    }

    {   // do strong filter on pixels with not hev
        const __m128i k9 = _mm_set1_epi16(0x0900);
        const __m128i k63 = _mm_set1_epi16(63);

        const __m128i m = _mm_and_si128(not_hev, *mask);
        const __m128i f = _mm_and_si128(a, m);

        const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
        const __m128i f_hi = _mm_unpackhi_epi8(zero, f);

        const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9);    // Filter (lo) * 9
        const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9);    // Filter (hi) * 9

        __m128i a2_lo = _mm_add_epi16(f9_lo, k63);    // Filter * 9 + 63   // insieme: dropped const
        __m128i a2_hi = _mm_add_epi16(f9_hi, k63);    // Filter * 9 + 63   // insieme: dropped const

        __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo);  // Filter * 18 + 63   // insieme: dropped const
        __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi);  // Filter * 18 + 63   // insieme: dropped const

        __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo);  // Filter * 27 + 63   // insieme: dropped const
        __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi);  // Filter * 27 + 63   // insieme: dropped const

        Update2Pixels(p2, q2, &a2_lo, &a2_hi);
        Update2Pixels(p1, q1, &a1_lo, &a1_hi);
        Update2Pixels(p0, q0, &a0_lo, &a0_hi);
    }
}