// Predictor10: average of (average of (L,TL), average of (T, TR)). static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i, j; __m128i L = _mm_cvtsi32_si128(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]); __m128i avgTTR; Average2_m128i(&T, &TR, &avgTTR); for (j = 0; j < 4; ++j) { __m128i avgLTL, avg; Average2_m128i(&L, &TL, &avgLTL); Average2_m128i(&avgTTR, &avgLTL, &avg); L = _mm_add_epi8(avg, src); out[i + j] = _mm_cvtsi128_si32(L); // Rotate the pre-computed values for the next iteration. avgTTR = _mm_srli_si128(avgTTR, 4); TL = _mm_srli_si128(TL, 4); src = _mm_srli_si128(src, 4); } } if (i != num_pixels) { VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i); } }
// Predictor5: avg2(avg2(L, TR), T) static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]); const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]); const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); __m128i avg, pred, res; Average2_m128i(&L, &TR, &avg); Average2_m128i(&avg, &T, &pred); res = _mm_sub_epi8(src, pred); _mm_storeu_si128((__m128i*)&out[i], res); } if (i != num_pixels) { VP8LPredictorsSub_C[5](in + i, upper + i, num_pixels - i, out + i); } }
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; __m128i L = _mm_cvtsi32_si128(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); const __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); const __m128i TR = _mm_loadu_si128((const __m128i*)&upper[i + 1]); __m128i avgTTR; Average2_m128i(&T, &TR, &avgTTR); DO_PRED10(0); DO_PRED10_SHIFT; DO_PRED10(1); DO_PRED10_SHIFT; DO_PRED10(2); DO_PRED10_SHIFT; DO_PRED10(3); } if (i != num_pixels) { VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i); } }