Exemplo n.º 1
0
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
                             int start_block, int end_block,
                             VP8Histogram* const histo) {
  int j;
  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
    {
      int k;
      v8i16 coeff0, coeff1;
      const v8i16 zero = { 0 };
      const v8i16 max_coeff_thr = __msa_ldi_h(MAX_COEFF_THRESH);
      LD_SH2(&out[0], 8, coeff0, coeff1);
      coeff0 = __msa_add_a_h(coeff0, zero);
      coeff1 = __msa_add_a_h(coeff1, zero);
      SRAI_H2_SH(coeff0, coeff1, 3);
      coeff0 = __msa_min_s_h(coeff0, max_coeff_thr);
      coeff1 = __msa_min_s_h(coeff1, max_coeff_thr);
      ST_SH2(coeff0, coeff1, &out[0], 8);
      for (k = 0; k < 16; ++k) {
        ++distribution[out[k]];
      }
    }
  }
  VP8SetHistogramData(distribution, histo);
}
Exemplo n.º 2
0
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
                             int start_block, int end_block,
                             VP8Histogram* const histo) {
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;

    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

    // Convert coefficients to bin (within out[]).
    {
      // Load.
      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
      // sign(out) = out >> 15  (0x0000 if positive, 0xffff if negative)
      const __m128i sign0 = _mm_srai_epi16(out0, 15);
      const __m128i sign1 = _mm_srai_epi16(out1, 15);
      // abs(out) = (out ^ sign) - sign
      const __m128i xor0 = _mm_xor_si128(out0, sign0);
      const __m128i xor1 = _mm_xor_si128(out1, sign1);
      const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
      const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
      // v = abs(out) >> 3
      const __m128i v0 = _mm_srai_epi16(abs0, 3);
      const __m128i v1 = _mm_srai_epi16(abs1, 3);
      // bin = min(v, MAX_COEFF_THRESH)
      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
      // Store.
      _mm_storeu_si128((__m128i*)&out[0], bin0);
      _mm_storeu_si128((__m128i*)&out[8], bin1);
    }

    // Convert coefficients to bin.
    for (k = 0; k < 16; ++k) {
      ++distribution[out[k]];
    }
  }
  VP8SetHistogramData(distribution, histo);
}
Exemplo n.º 3
0
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
                                   int start_block, int end_block,
                                   VP8Histogram* const histo) {
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
  int j;
  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
  for (j = start_block; j < end_block; ++j) {
    int16_t out[16];
    int k;

    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

    // Convert coefficients to bin (within out[]).
    {
      // Load.
      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
      // v = abs(out) >> 3
      const __m128i abs0 = _mm_abs_epi16(out0);
      const __m128i abs1 = _mm_abs_epi16(out1);
      const __m128i v0 = _mm_srai_epi16(abs0, 3);
      const __m128i v1 = _mm_srai_epi16(abs1, 3);
      // bin = min(v, MAX_COEFF_THRESH)
      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
      // Store.
      _mm_storeu_si128((__m128i*)&out[0], bin0);
      _mm_storeu_si128((__m128i*)&out[8], bin1);
    }

    // Convert coefficients to bin.
    for (k = 0; k < 16; ++k) {
      ++distribution[out[k]];
    }
  }
  VP8SetHistogramData(distribution, histo);
}