Пример #1
0
static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
                                      int32_t src_stride, uint8_t *dst_ptr,
                                      int32_t dst_stride, int32_t src_weight) {
    int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
    int32_t row;
    v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
    v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;

    src_wt = __msa_fill_h(src_weight);
    dst_wt = __msa_fill_h(dst_weight);

    for (row = 4; row--;) {
        LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
        src_ptr += (4 * src_stride);
        LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);

        UNPCK_UB_SH(src0, src_r, src_l);
        UNPCK_UB_SH(dst0, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src1, src_r, src_l);
        UNPCK_UB_SH(dst1, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src2, src_r, src_l);
        UNPCK_UB_SH(dst2, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src3, src_r, src_l);
        UNPCK_UB_SH(dst3, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;
    }
}
Пример #2
0
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
                                      uint8_t* dst) {
  v8i16 input0, input1;
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
  v16i8 dest0, dest1, dest2, dest3;
  const v16i8 zero = { 0 };

  LD_SH2(in, 8, input0, input1);
  UNPCK_SH_SW(input0, in0, in1);
  UNPCK_SH_SW(input1, in2, in3);
  IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
  TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
  IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(ref, BPS, dest0, dest1, dest2, dest3);
  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
             res0, res1, res2, res3);
  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
             res0, res1, res2, res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
  res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1);
  ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
}