void aom_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3; /* load vector elements of 4x4 block */ LD4x4_SH(input, in0, in1, in2, in3); /* rows */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* columns */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); AOM_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* rounding (add 2^3, divide by 2^4) */ SRARI_H4_SH(in0, in1, in2, in3, 4); ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); }
void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3; /* load vector elements of 4x4 block */ LD4x4_SH(input, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); switch (tx_type) { case DCT_DCT: /* DCT in horizontal */ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* DCT in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_DCT: /* DCT in horizontal */ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* ADST in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case DCT_ADST: /* ADST in horizontal */ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* DCT in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_ADST: /* ADST in horizontal */ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* ADST in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; default: assert(0); break; } /* final rounding (add 2^3, divide by 2^4) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 4); /* add block and store 4x4 */ ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); }
static void avc_idct4x4_addblk_msa( uint8_t *p_dst, int16_t *p_src, int32_t i_dst_stride ) { v8i16 src0, src1, src2, src3; v8i16 hres0, hres1, hres2, hres3; v8i16 vres0, vres1, vres2, vres3; v8i16 zeros = { 0 }; LD4x4_SH( p_src, src0, src1, src2, src3 ); AVC_ITRANS_H( src0, src1, src2, src3, hres0, hres1, hres2, hres3 ); TRANSPOSE4x4_SH_SH( hres0, hres1, hres2, hres3, hres0, hres1, hres2, hres3 ); AVC_ITRANS_H( hres0, hres1, hres2, hres3, vres0, vres1, vres2, vres3 ); SRARI_H4_SH( vres0, vres1, vres2, vres3, 6 ); ADDBLK_ST4x4_UB( vres0, vres1, vres2, vres3, p_dst, i_dst_stride ); ST_SH2( zeros, zeros, p_src, 8 ); }
void vp10_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; /* load vector elements of 8x8 block */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); switch (tx_type) { case DCT_DCT: /* DCT in horizontal */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* DCT in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_DCT: /* DCT in horizontal */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* ADST in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case DCT_ADST: /* ADST in horizontal */ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* DCT in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_ADST: /* ADST in horizontal */ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* ADST in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; default: assert(0); break; } /* final rounding (add 2^4, divide by 2^5) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 5); SRARI_H4_SH(in4, in5, in6, in7, 5); /* add block and store 8x8 */ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); dst += (4 * dst_stride); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); }