Exemplo n.º 1
0
static int TTransform(const uint8_t* in, const uint16_t* w) {
  int sum;
  uint32_t in0_m, in1_m, in2_m, in3_m;
  v16i8 src0;
  v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
  v4i32 dst0, dst1;
  const v16i8 zero = { 0 };
  const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
  const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
  const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
  const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };

  LW4(in, BPS, in0_m, in1_m, in2_m, in3_m);
  INSERT_W4_SB(in0_m, in1_m, in2_m, in3_m, src0);
  ILVRL_B2_SH(zero, src0, tmp0, tmp1);
  VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask2, mask3, in0, in1);
  ADDSUB2(in0, in1, tmp0, tmp1);
  VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
  ADDSUB2(tmp2, tmp3, tmp0, tmp1);
  VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask2, mask3, in0, in1);
  ADDSUB2(in0, in1, tmp0, tmp1);
  VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
  ADDSUB2(tmp2, tmp3, tmp0, tmp1);
  tmp0 = __msa_add_a_h(tmp0, (v8i16)zero);
  tmp1 = __msa_add_a_h(tmp1, (v8i16)zero);
  LD_SH2(w, 8, tmp2, tmp3);
  DOTP_SH2_SW(tmp0, tmp1, tmp2, tmp3, dst0, dst1);
  dst0 = dst0 + dst1;
  sum = HADD_SW_S32(dst0);
  return sum;
}
Exemplo n.º 2
0
static void avc_sub4x4_dct_msa( uint8_t *p_src, int32_t i_src_stride,
                                uint8_t *p_ref, int32_t i_dst_stride,
                                int16_t *p_dst )
{
    uint32_t i_src0, i_src1, i_src2, i_src3;
    uint32_t i_ref0, i_ref1, i_ref2, i_ref3;
    v16i8 src = { 0 };
    v16i8 ref = { 0 };
    v16u8 inp0, inp1;
    v8i16 diff0, diff1, diff2, diff3;
    v8i16 temp0, temp1, temp2, temp3;

    LW4( p_src, i_src_stride, i_src0, i_src1, i_src2, i_src3 );
    LW4( p_ref, i_dst_stride, i_ref0, i_ref1, i_ref2, i_ref3 );

    INSERT_W4_SB( i_src0, i_src1, i_src2, i_src3, src );
    INSERT_W4_SB( i_ref0, i_ref1, i_ref2, i_ref3, ref );

    ILVRL_B2_UB( src, ref, inp0, inp1 );

    HSUB_UB2_SH( inp0, inp1, diff0, diff2 );

    diff1 = ( v8i16 ) __msa_ilvl_d( ( v2i64 ) diff0, ( v2i64 ) diff0 );
    diff3 = ( v8i16 ) __msa_ilvl_d( ( v2i64 ) diff2, ( v2i64 ) diff2 );

    BUTTERFLY_4( diff0, diff1, diff2, diff3, temp0, temp1, temp2, temp3 );

    diff0 = temp0 + temp1;
    diff1 = ( temp3 << 1 ) + temp2;
    diff2 = temp0 - temp1;
    diff3 = temp3 - ( temp2 << 1 );

    TRANSPOSE4x4_SH_SH( diff0, diff1, diff2, diff3,
                        temp0, temp1, temp2, temp3 );
    BUTTERFLY_4( temp0, temp1, temp2, temp3, diff0, diff1, diff2, diff3 );

    temp0 = diff0 + diff1;
    temp1 = ( diff3 << 1 ) + diff2;
    temp2 = diff0 - diff1;
    temp3 = diff3 - ( diff2 << 1 );

    ILVR_D2_UB( temp1, temp0, temp3, temp2, inp0, inp1 );
    ST_UB2( inp0, inp1, p_dst, 8 );
}
Exemplo n.º 3
0
static int32_t subtract_sum4x4_msa( uint8_t *p_src, int32_t i_src_stride,
                                    uint8_t *pred_ptr, int32_t i_pred_stride )
{
    int16_t i_sum;
    uint32_t i_src0, i_src1, i_src2, i_src3;
    uint32_t i_pred0, i_pred1, i_pred2, i_pred3;
    v16i8 src = { 0 };
    v16i8 pred = { 0 };
    v16u8 src_l0, src_l1;
    v8i16 diff0, diff1;

    LW4( p_src, i_src_stride, i_src0, i_src1, i_src2, i_src3 );
    LW4( pred_ptr, i_pred_stride, i_pred0, i_pred1, i_pred2, i_pred3 );
    INSERT_W4_SB( i_src0, i_src1, i_src2, i_src3, src );
    INSERT_W4_SB( i_pred0, i_pred1, i_pred2, i_pred3, pred );
    ILVRL_B2_UB( src, pred, src_l0, src_l1 );
    HSUB_UB2_SH( src_l0, src_l1, diff0, diff1 );
    i_sum = HADD_UH_U32( diff0 + diff1 );

    return i_sum;
}