Пример #1
0
// Transpose back and store
static WEBP_INLINE void Store16x4(const __m128i* const p1,
                                  const __m128i* const p0,
                                  const __m128i* const q0,
                                  const __m128i* const q1,
                                  uint8_t* r0, uint8_t* r8,
                                  int stride) {
    __m128i t1, p1_s, p0_s, q0_s, q1_s;

    // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
    // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
    t1 = *p0;
    p0_s = _mm_unpacklo_epi8(*p1, t1);
    p1_s = _mm_unpackhi_epi8(*p1, t1);

    // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
    // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
    t1 = *q0;
    q0_s = _mm_unpacklo_epi8(t1, *q1);
    q1_s = _mm_unpackhi_epi8(t1, *q1);

    // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
    // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
    t1 = p0_s;
    p0_s = _mm_unpacklo_epi16(t1, q0_s);
    q0_s = _mm_unpackhi_epi16(t1, q0_s);

    // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
    // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
    t1 = p1_s;
    p1_s = _mm_unpacklo_epi16(t1, q1_s);
    q1_s = _mm_unpackhi_epi16(t1, q1_s);

    Store4x4(&p0_s, r0, stride);
    r0 += 4 * stride;
    Store4x4(&q0_s, r0, stride);

    Store4x4(&p1_s, r8, stride);
    r8 += 4 * stride;
    Store4x4(&q1_s, r8, stride);
}
Пример #2
0
// Transpose back and store
static WEBP_INLINE void Store16x4(uint8_t* r0, uint8_t* r8, int stride,
                                  __m128i* p1, __m128i* p0,
                                  __m128i* q0, __m128i* q1) {
    __m128i t1;

    // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
    // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
    t1 = *p0;
    *p0 = _mm_unpacklo_epi8(*p1, t1);
    *p1 = _mm_unpackhi_epi8(*p1, t1);

    // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
    // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
    t1 = *q0;
    *q0 = _mm_unpacklo_epi8(t1, *q1);
    *q1 = _mm_unpackhi_epi8(t1, *q1);

    // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
    // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
    t1 = *p0;
    *p0 = _mm_unpacklo_epi16(t1, *q0);
    *q0 = _mm_unpackhi_epi16(t1, *q0);

    // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
    // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
    t1 = *p1;
    *p1 = _mm_unpacklo_epi16(t1, *q1);
    *q1 = _mm_unpackhi_epi16(t1, *q1);

    Store4x4(p0, r0, stride);
    r0 += 4 * stride;
    Store4x4(q0, r0, stride);

    Store4x4(p1, r8, stride);
    r8 += 4 * stride;
    Store4x4(q1, r8, stride);
}