static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) { int j; int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { int16_t out[16]; VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); { int k; v8i16 coeff0, coeff1; const v8i16 zero = { 0 }; const v8i16 max_coeff_thr = __msa_ldi_h(MAX_COEFF_THRESH); LD_SH2(&out[0], 8, coeff0, coeff1); coeff0 = __msa_add_a_h(coeff0, zero); coeff1 = __msa_add_a_h(coeff1, zero); SRAI_H2_SH(coeff0, coeff1, 3); coeff0 = __msa_min_s_h(coeff0, max_coeff_thr); coeff1 = __msa_min_s_h(coeff1, max_coeff_thr); ST_SH2(coeff0, coeff1, &out[0], 8); for (k = 0; k < 16; ++k) { ++distribution[out[k]]; } } } VP8SetHistogramData(distribution, histo); }
static void FTransformWHT(const int16_t* in, int16_t* out) { v8i16 in0 = { 0 }; v8i16 in1 = { 0 }; v8i16 tmp0, tmp1, tmp2, tmp3; v8i16 out0, out1; const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 }; const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 }; const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 }; const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 }; in0 = __msa_insert_h(in0, 0, in[ 0]); in0 = __msa_insert_h(in0, 1, in[ 64]); in0 = __msa_insert_h(in0, 2, in[128]); in0 = __msa_insert_h(in0, 3, in[192]); in0 = __msa_insert_h(in0, 4, in[ 16]); in0 = __msa_insert_h(in0, 5, in[ 80]); in0 = __msa_insert_h(in0, 6, in[144]); in0 = __msa_insert_h(in0, 7, in[208]); in1 = __msa_insert_h(in1, 0, in[ 48]); in1 = __msa_insert_h(in1, 1, in[112]); in1 = __msa_insert_h(in1, 2, in[176]); in1 = __msa_insert_h(in1, 3, in[240]); in1 = __msa_insert_h(in1, 4, in[ 32]); in1 = __msa_insert_h(in1, 5, in[ 96]); in1 = __msa_insert_h(in1, 6, in[160]); in1 = __msa_insert_h(in1, 7, in[224]); ADDSUB2(in0, in1, tmp0, tmp1); VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3); ADDSUB2(tmp2, tmp3, tmp0, tmp1); VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask2, mask3, in0, in1); ADDSUB2(in0, in1, tmp0, tmp1); VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3); ADDSUB2(tmp2, tmp3, out0, out1); SRAI_H2_SH(out0, out1, 1); ST_SH2(out0, out1, out, 8); }