Exemple #1
0
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
                                  uint8_t* dst) {
  const __m128i* in = (const __m128i*)src;
  __m128i* out = (__m128i*)dst;

  while (num_pixels >= 32) {
    // Load the BGRA buffers.
    __m128i in0 = _mm_loadu_si128(in + 0);
    __m128i in1 = _mm_loadu_si128(in + 1);
    __m128i in2 = _mm_loadu_si128(in + 2);
    __m128i in3 = _mm_loadu_si128(in + 3);
    __m128i in4 = _mm_loadu_si128(in + 4);
    __m128i in5 = _mm_loadu_si128(in + 5);
    __m128i in6 = _mm_loadu_si128(in + 6);
    __m128i in7 = _mm_loadu_si128(in + 7);
    VP8L32bToPlanar_SSE2(&in0, &in1, &in2, &in3);
    VP8L32bToPlanar_SSE2(&in4, &in5, &in6, &in7);
    // At this points, in1/in5 contains red only, in2/in6 green only ...
    // Pack the colors in 24b RGB.
    VP8PlanarTo24b_SSE2(&in1, &in5, &in2, &in6, &in3, &in7);
    _mm_storeu_si128(out + 0, in1);
    _mm_storeu_si128(out + 1, in5);
    _mm_storeu_si128(out + 2, in2);
    _mm_storeu_si128(out + 3, in6);
    _mm_storeu_si128(out + 4, in3);
    _mm_storeu_si128(out + 5, in7);
    in += 8;
    out += 6;
    num_pixels -= 32;
  }
  // left-overs
  if (num_pixels > 0) {
    VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
  }
}
static void ConvertBGRAToRGB(const uint32_t* src,
                             int num_pixels, uint8_t* dst) {
  const uint32_t* const end = src + (num_pixels & ~15);
  for (; src < end; src += 16) {
    const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
    const uint8x16x3_t tmp = { { pixel.val[2], pixel.val[1], pixel.val[0] } };
    vst3q_u8(dst, tmp);
    dst += 48;
  }
  VP8LConvertBGRAToRGB_C(src, num_pixels & 15, dst);  // left-overs
}
static void ConvertBGRAToRGB(const uint32_t* src,
                             int num_pixels, uint8_t* dst) {
  const uint32_t* const end = src + (num_pixels & ~7);
  const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
  const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
  const uint8x8_t shuffle2 = vld1_u8(kRGBShuffle[2]);
  for (; src < end; src += 8) {
    uint8x8x4_t pixels;
    INIT_VECTOR4(pixels,
                 vld1_u8((const uint8_t*)(src + 0)),
                 vld1_u8((const uint8_t*)(src + 2)),
                 vld1_u8((const uint8_t*)(src + 4)),
                 vld1_u8((const uint8_t*)(src + 6)));
    vst1_u8(dst +  0, vtbl4_u8(pixels, shuffle0));
    vst1_u8(dst +  8, vtbl4_u8(pixels, shuffle1));
    vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
    dst += 8 * 3;
  }
  VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst);  // left-overs
}