Ejemplo n.º 1
0
static void avc_dct4x4dc_msa( int16_t *p_src, int16_t *p_dst,
                              int32_t i_src_stride )
{
    v8i16 src0, src1, src2, src3, ver_res0, ver_res1, ver_res2, ver_res3;
    v4i32 src0_r, src1_r, src2_r, src3_r, tmp0, tmp1, tmp2, tmp3;
    v4i32 hor_res0, hor_res1, hor_res2, hor_res3;
    v4i32 ver_res0_r, ver_res1_r, ver_res2_r, ver_res3_r;

    LD_SH4( p_src, i_src_stride, src0, src1, src2, src3 );
    UNPCK_R_SH_SW( src0, src0_r );
    UNPCK_R_SH_SW( src1, src1_r );
    UNPCK_R_SH_SW( src2, src2_r );
    UNPCK_R_SH_SW( src3, src3_r );
    BUTTERFLY_4( src0_r, src2_r, src3_r, src1_r,
                 tmp0, tmp3, tmp2, tmp1 );
    BUTTERFLY_4( tmp0, tmp1, tmp2, tmp3,
                 hor_res0, hor_res3, hor_res2, hor_res1 );
    TRANSPOSE4x4_SW_SW( hor_res0, hor_res1, hor_res2, hor_res3,
                        hor_res0, hor_res1, hor_res2, hor_res3 );
    BUTTERFLY_4( hor_res0, hor_res2, hor_res3, hor_res1,
                 tmp0, tmp3, tmp2, tmp1 );
    BUTTERFLY_4( tmp0, tmp1, tmp2, tmp3,
                 ver_res0_r, ver_res3_r, ver_res2_r, ver_res1_r );
    SRARI_W4_SW( ver_res0_r, ver_res1_r, ver_res2_r, ver_res3_r, 1 );
    PCKEV_H4_SH( ver_res0_r, ver_res0_r, ver_res1_r, ver_res1_r,
                 ver_res2_r, ver_res2_r, ver_res3_r, ver_res3_r,
                 ver_res0, ver_res1, ver_res2, ver_res3 );
    PCKOD_D2_SH( ver_res1, ver_res0, ver_res3, ver_res2, ver_res0, ver_res2 );
    ST_SH2( ver_res0, ver_res2, p_dst, 8 );
}
Ejemplo n.º 2
0
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
                                      uint8_t* dst) {
  v8i16 input0, input1;
  v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
  v4i32 res0, res1, res2, res3;
  v16i8 dest0, dest1, dest2, dest3;
  const v16i8 zero = { 0 };

  LD_SH2(in, 8, input0, input1);
  UNPCK_SH_SW(input0, in0, in1);
  UNPCK_SH_SW(input1, in2, in3);
  IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
  TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
  IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
  SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
  TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
  LD_SB4(ref, BPS, dest0, dest1, dest2, dest3);
  ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
             res0, res1, res2, res3);
  ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
             res0, res1, res2, res3);
  ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
  CLIP_SW4_0_255(res0, res1, res2, res3);
  PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
  res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1);
  ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
}