static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left, const uint8_t* top) { int DC; v16u8 out; if (top != NULL && left != NULL) { const v16u8 rtop = LD_UB(top); const v8u16 dctop = __msa_hadd_u_h(rtop, rtop); const v16u8 rleft = LD_UB(left); const v8u16 dcleft = __msa_hadd_u_h(rleft, rleft); const v8u16 dctemp = dctop + dcleft; DC = HADD_UH_U32(dctemp); DC = (DC + 16) >> 5; } else if (left != NULL) { // left but no top
static int32_t subtract_sum4x4_msa( uint8_t *p_src, int32_t i_src_stride, uint8_t *pred_ptr, int32_t i_pred_stride ) { int16_t i_sum; uint32_t i_src0, i_src1, i_src2, i_src3; uint32_t i_pred0, i_pred1, i_pred2, i_pred3; v16i8 src = { 0 }; v16i8 pred = { 0 }; v16u8 src_l0, src_l1; v8i16 diff0, diff1; LW4( p_src, i_src_stride, i_src0, i_src1, i_src2, i_src3 ); LW4( pred_ptr, i_pred_stride, i_pred0, i_pred1, i_pred2, i_pred3 ); INSERT_W4_SB( i_src0, i_src1, i_src2, i_src3, src ); INSERT_W4_SB( i_pred0, i_pred1, i_pred2, i_pred3, pred ); ILVRL_B2_UB( src, pred, src_l0, src_l1 ); HSUB_UB2_SH( src_l0, src_l1, diff0, diff1 ); i_sum = HADD_UH_U32( diff0 + diff1 ); return i_sum; }
static int32_t sum_u8src_16width_msa(uint8_t *src, int32_t stride) { uint32_t sum = 0; v16u8 in0, in1, in2, in3, in4, in5, in6, in7; v16u8 in8, in9, in10, in11, in12, in13, in14, in15; LD_UB8(src, stride, in0, in1, in2, in3, in4, in5, in6, in7); src += (8 * stride); LD_UB8(src, stride, in8, in9, in10, in11, in12, in13, in14, in15); HADD_UB4_UB(in0, in1, in2, in3, in0, in1, in2, in3); HADD_UB4_UB(in4, in5, in6, in7, in4, in5, in6, in7); HADD_UB4_UB(in8, in9, in10, in11, in8, in9, in10, in11); HADD_UB4_UB(in12, in13, in14, in15, in12, in13, in14, in15); sum = HADD_UH_U32(in0); sum += HADD_UH_U32(in1); sum += HADD_UH_U32(in2); sum += HADD_UH_U32(in3); sum += HADD_UH_U32(in4); sum += HADD_UH_U32(in5); sum += HADD_UH_U32(in6); sum += HADD_UH_U32(in7); sum += HADD_UH_U32(in8); sum += HADD_UH_U32(in9); sum += HADD_UH_U32(in10); sum += HADD_UH_U32(in11); sum += HADD_UH_U32(in12); sum += HADD_UH_U32(in13); sum += HADD_UH_U32(in14); sum += HADD_UH_U32(in15); return sum; }
static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left, const uint8_t* top) { int DC; v16u8 out; if (top != NULL && left != NULL) { const v16u8 rtop = LD_UB(top); const v8u16 dctop = __msa_hadd_u_h(rtop, rtop); const v16u8 rleft = LD_UB(left); const v8u16 dcleft = __msa_hadd_u_h(rleft, rleft); const v8u16 dctemp = dctop + dcleft; DC = HADD_UH_U32(dctemp); DC = (DC + 16) >> 5; } else if (left != NULL) { // left but no top const v16u8 rleft = LD_UB(left); const v8u16 dcleft = __msa_hadd_u_h(rleft, rleft); DC = HADD_UH_U32(dcleft); DC = (DC + DC + 16) >> 5; } else if (top != NULL) { // top but no left const v16u8 rtop = LD_UB(top); const v8u16 dctop = __msa_hadd_u_h(rtop, rtop); DC = HADD_UH_U32(dctop); DC = (DC + DC + 16) >> 5; } else { // no top, no left, nothing. DC = 0x80; } out = (v16u8)__msa_fill_b(DC); STORE16x16(out, dst); } static void Intra16Preds(uint8_t* dst, const uint8_t* left, const uint8_t* top) {