static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) { const v16u8 out = (v16u8)__msa_ldi_b(128); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); }
static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst, int32_t dst_stride) { v16u8 out = LD_UB(src); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); }
static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { v16u8 data, out; v8u16 sum_h; v4u32 sum_w; v2u64 sum_d; data = LD_UB(src); sum_h = __msa_hadd_u_h(data, data); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); }
static void intra_predict_dc_16x16_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { v16u8 top, left, out; v8u16 sum_h, sum_top, sum_left; v4u32 sum_w; v2u64 sum_d; top = LD_UB(src_top); left = LD_UB(src_left); HADD_UB2_UH(top, left, sum_top, sum_left); sum_h = sum_top + sum_left; sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); }