Example #1
0
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
  const int max_width = width & ~31;
  int i;
  for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
    __m128i bgr_plane[6];
    int j;

    RGB24PackedToPlanar_SSE41(bgr, bgr_plane);

    for (j = 0; j < 2; ++j, i += 16) {
      const __m128i zero = _mm_setzero_si128();
      __m128i r, g, b, Y0, Y1;

      // Convert to 16-bit Y.
      b = _mm_unpacklo_epi8(bgr_plane[0 + j], zero);
      g = _mm_unpacklo_epi8(bgr_plane[2 + j], zero);
      r = _mm_unpacklo_epi8(bgr_plane[4 + j], zero);
      ConvertRGBToY_SSE41(&r, &g, &b, &Y0);

      // Convert to 16-bit Y.
      b = _mm_unpackhi_epi8(bgr_plane[0 + j], zero);
      g = _mm_unpackhi_epi8(bgr_plane[2 + j], zero);
      r = _mm_unpackhi_epi8(bgr_plane[4 + j], zero);
      ConvertRGBToY_SSE41(&r, &g, &b, &Y1);

      // Cast to 8-bit and store.
      STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
    }
  }
  for (; i < width; ++i, bgr += 3) {  // left-over
    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
  }
}
Example #2
0
static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
  int i;
  for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
    const uint8x8x3_t BGR = vld3_u8(bgr);
    const uint8x8_t Y = ConvertRGBToY_NEON(BGR.val[2], BGR.val[1], BGR.val[0]);
    vst1_u8(y + i, Y);
  }
  for (; i < width; ++i, bgr += 3) {  // left-over
    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
  }
}
Example #3
0
static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
  int i;
  for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
    const uint8x8x3_t RGB = vld3_u8(rgb);
    const uint8x8_t Y = ConvertRGBToY_NEON(RGB.val[0], RGB.val[1], RGB.val[2]);
    vst1_u8(y + i, Y);
  }
  for (; i < width; ++i, rgb += 3) {   // left-over
    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
  }
}
Example #4
0
void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
  const int red = (background_rgb >> 16) & 0xff;
  const int green = (background_rgb >> 8) & 0xff;
  const int blue = (background_rgb >> 0) & 0xff;
  int x, y;
  if (pic == NULL) return;
  if (!pic->use_argb) {
    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
    const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
    // VP8RGBToU/V expects the u/v values summed over four pixels
    const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
    const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
    if (!has_alpha || pic->a == NULL) return;    // nothing to do
    for (y = 0; y < pic->height; ++y) {
      // Luma blending
      uint8_t* const y_ptr = pic->y + y * pic->y_stride;
      uint8_t* const a_ptr = pic->a + y * pic->a_stride;
      for (x = 0; x < pic->width; ++x) {
        const int alpha = a_ptr[x];
        if (alpha < 0xff) {
          y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
        }
      }
      // Chroma blending every even line
      if ((y & 1) == 0) {
        uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
        uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
        uint8_t* const a_ptr2 =
            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
        for (x = 0; x < uv_width; ++x) {
          // Average four alpha values into a single blending weight.
          // TODO(skal): might lead to visible contouring. Can we do better?
          const int alpha =
              a_ptr[2 * x + 0] + a_ptr[2 * x + 1] +
              a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
          u[x] = BLEND_10BIT(U0, u[x], alpha);
          v[x] = BLEND_10BIT(V0, v[x], alpha);
        }
        if (pic->width & 1) {   // rightmost pixel
          const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
          u[x] = BLEND_10BIT(U0, u[x], alpha);
          v[x] = BLEND_10BIT(V0, v[x], alpha);
        }
      }
      memset(a_ptr, 0xff, pic->width);
    }
  } else {