Example #1
0
static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
                                      int32_t src_stride, uint8_t *dst_ptr,
                                      int32_t dst_stride, int32_t src_weight) {
    int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
    int32_t row;
    v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
    v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;

    src_wt = __msa_fill_h(src_weight);
    dst_wt = __msa_fill_h(dst_weight);

    for (row = 4; row--;) {
        LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
        src_ptr += (4 * src_stride);
        LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);

        UNPCK_UB_SH(src0, src_r, src_l);
        UNPCK_UB_SH(dst0, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src1, src_r, src_l);
        UNPCK_UB_SH(dst1, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src2, src_r, src_l);
        UNPCK_UB_SH(dst2, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;

        UNPCK_UB_SH(src3, src_r, src_l);
        UNPCK_UB_SH(dst3, dst_r, dst_l);
        res_h_r = (src_r * src_wt);
        res_h_r += (dst_r * dst_wt);
        res_h_l = (src_l * src_wt);
        res_h_l += (dst_l * dst_wt);
        SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
        PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
        dst_ptr += dst_stride;
    }
}
Example #2
0
static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
                                       const uint8_t *src_left,
                                       uint8_t *dst, int32_t dst_stride) {
  uint8_t top_left = src_top[-1];
  uint32_t loop_cnt;
  v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
  v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;

  LD_SB2(src_top, 16, src_top0, src_top1);
  src_top_left = (v8u16)__msa_fill_h(top_left);

  for (loop_cnt = 8; loop_cnt--;) {
    src_left0 = __msa_fill_b(src_left[0]);
    src_left1 = __msa_fill_b(src_left[1]);
    src_left2 = __msa_fill_b(src_left[2]);
    src_left3 = __msa_fill_b(src_left[3]);
    src_left += 4;

    ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
    ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
    PCKEV_ST_SB(res_r0, res_l0, dst);
    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
    dst += dst_stride;

    ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
    ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
    PCKEV_ST_SB(res_r0, res_l0, dst);
    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
    dst += dst_stride;

    ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
    ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
    PCKEV_ST_SB(res_r0, res_l0, dst);
    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
    dst += dst_stride;

    ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
    ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
    HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
    SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
    PCKEV_ST_SB(res_r0, res_l0, dst);
    PCKEV_ST_SB(res_r1, res_l1, dst + 16);
    dst += dst_stride;
  }
}
Example #3
0
static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
                                       const uint8_t *src_left,
                                       uint8_t *dst, int32_t dst_stride) {
  uint8_t top_left = src_top_ptr[-1];
  uint32_t loop_cnt;
  v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
  v8u16 src_top_left, res_r, res_l;

  src_top = LD_SB(src_top_ptr);
  src_top_left = (v8u16)__msa_fill_h(top_left);

  for (loop_cnt = 4; loop_cnt--;) {
    src_left0 = __msa_fill_b(src_left[0]);
    src_left1 = __msa_fill_b(src_left[1]);
    src_left2 = __msa_fill_b(src_left[2]);
    src_left3 = __msa_fill_b(src_left[3]);
    src_left += 4;

    ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
    HADD_UB2_UH(res_r, res_l, res_r, res_l);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);

    SAT_UH2_UH(res_r, res_l, 7);
    PCKEV_ST_SB(res_r, res_l, dst);
    dst += dst_stride;

    ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
    HADD_UB2_UH(res_r, res_l, res_r, res_l);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
    SAT_UH2_UH(res_r, res_l, 7);
    PCKEV_ST_SB(res_r, res_l, dst);
    dst += dst_stride;

    ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
    HADD_UB2_UH(res_r, res_l, res_r, res_l);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
    SAT_UH2_UH(res_r, res_l, 7);
    PCKEV_ST_SB(res_r, res_l, dst);
    dst += dst_stride;

    ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
    HADD_UB2_UH(res_r, res_l, res_r, res_l);
    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
    SAT_UH2_UH(res_r, res_l, 7);
    PCKEV_ST_SB(res_r, res_l, dst);
    dst += dst_stride;
  }
}