Beispiel #1
0
// Predictor10: average of (average of (L,TL), average of (T, TR)).
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
                                int num_pixels, uint32_t* out) {
  int i, j;
  __m128i L = _mm_cvtsi32_si128(out[-1]);
  for (i = 0; i + 4 <= num_pixels; i += 4) {
    __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
    __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
    const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
    const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]);
    __m128i avgTTR;
    Average2_m128i(&T, &TR, &avgTTR);
    for (j = 0; j < 4; ++j) {
      __m128i avgLTL, avg;
      Average2_m128i(&L, &TL, &avgLTL);
      Average2_m128i(&avgTTR, &avgLTL, &avg);
      L = _mm_add_epi8(avg, src);
      out[i + j] = _mm_cvtsi128_si32(L);
      // Rotate the pre-computed values for the next iteration.
      avgTTR = _mm_srli_si128(avgTTR, 4);
      TL = _mm_srli_si128(TL, 4);
      src = _mm_srli_si128(src, 4);
    }
  }
  if (i != num_pixels) {
    VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
  }
}
Beispiel #2
0
// Predictor5: avg2(avg2(L, TR), T)
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
                               int num_pixels, uint32_t* out) {
  int i;
  for (i = 0; i + 4 <= num_pixels; i += 4) {
    const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
    const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
    const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]);
    const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
    __m128i avg, pred, res;
    Average2_m128i(&L, &TR, &avg);
    Average2_m128i(&avg, &T, &pred);
    res = _mm_sub_epi8(src, pred);
    _mm_storeu_si128((__m128i*)&out[i], res);
  }
  if (i != num_pixels) {
    VP8LPredictorsSub_C[5](in + i, upper + i, num_pixels - i, out + i);
  }
}
Beispiel #3
0
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
                                int num_pixels, uint32_t* out) {
  int i;
  __m128i L = _mm_cvtsi32_si128(out[-1]);
  for (i = 0; i + 4 <= num_pixels; i += 4) {
    __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
    __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
    const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
    const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]);
    __m128i avgTTR;
    Average2_m128i(&T, &TR, &avgTTR);
    DO_PRED10(0);
    DO_PRED10_SHIFT;
    DO_PRED10(1);
    DO_PRED10_SHIFT;
    DO_PRED10(2);
    DO_PRED10_SHIFT;
    DO_PRED10(3);
  }
  if (i != num_pixels) {
    VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
  }
}