static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput, const uint8_t* ppred, uint8_t* poutput, int stride, int size) { int w; const v16i8 zero = { 0 }; while (size >= 16) { v16u8 pred0, dst0; v8i16 a0, a1, b0, b1, c0, c1; const v16u8 tmp0 = LD_UB(ppred - 1); const v16u8 tmp1 = LD_UB(ppred - stride); const v16u8 tmp2 = LD_UB(ppred - stride - 1); const v16u8 src0 = LD_UB(pinput); ILVRL_B2_SH(zero, tmp0, a0, a1); ILVRL_B2_SH(zero, tmp1, b0, b1); ILVRL_B2_SH(zero, tmp2, c0, c1); ADD2(a0, b0, a1, b1, a0, a1); SUB2(a0, c0, a1, c1, a0, a1); CLIP_SH2_0_255(a0, a1); pred0 = (v16u8)__msa_pckev_b((v16i8)a1, (v16i8)a0); dst0 = src0 - pred0; ST_UB(dst0, poutput); ppred += 16; pinput += 16; poutput += 16; size -= 16; } for (w = 0; w < size; ++w) { const int pred = ppred[w - 1] + ppred[w - stride] - ppred[w - stride - 1]; poutput[w] = pinput[w] - (pred < 0 ? 0 : pred > 255 ? 255 : pred); } }
static void hevc_addblk_4x4_msa(int16_t *coeffs, uint8_t *dst, int32_t stride) { uint32_t dst0, dst1, dst2, dst3; v8i16 dst_r0, dst_l0, in0, in1; v4i32 dst_vec = { 0 }; v16u8 zeros = { 0 }; LD_SH2(coeffs, 8, in0, in1); LW4(dst, stride, dst0, dst1, dst2, dst3); INSERT_W4_SW(dst0, dst1, dst2, dst3, dst_vec); ILVRL_B2_SH(zeros, dst_vec, dst_r0, dst_l0); ADD2(dst_r0, in0, dst_l0, in1, dst_r0, dst_l0); CLIP_SH2_0_255(dst_r0, dst_l0); dst_vec = (v4i32) __msa_pckev_b((v16i8) dst_l0, (v16i8) dst_r0); ST4x4_UB(dst_vec, dst_vec, 0, 1, 2, 3, dst, stride); }
static void avc_idct4x4_addblk_dc_msa( uint8_t *p_dst, int16_t *p_src, int32_t i_dst_stride ) { int16_t i_dc; uint32_t i_src0, i_src1, i_src2, i_src3; v16u8 pred = { 0 }; v16i8 out; v8i16 input_dc, pred_r, pred_l; i_dc = ( p_src[0] + 32 ) >> 6; input_dc = __msa_fill_h( i_dc ); p_src[ 0 ] = 0; LW4( p_dst, i_dst_stride, i_src0, i_src1, i_src2, i_src3 ); INSERT_W4_UB( i_src0, i_src1, i_src2, i_src3, pred ); UNPCK_UB_SH( pred, pred_r, pred_l ); pred_r += input_dc; pred_l += input_dc; CLIP_SH2_0_255( pred_r, pred_l ); out = __msa_pckev_b( ( v16i8 ) pred_l, ( v16i8 ) pred_r ); ST4x4_UB( out, out, 0, 1, 2, 3, p_dst, i_dst_stride ); }