static void loop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v,
                                             int32_t pitch,
                                             const uint8_t b_limit_in,
                                             const uint8_t limit_in,
                                             const uint8_t thresh_in) {
  uint8_t *temp_src_u, *temp_src_v;
  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
  v16u8 mask, hev, flat, thresh, limit, b_limit;
  v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
  v16u8 row9, row10, row11, row12, row13, row14, row15;
  v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;

  thresh = (v16u8)__msa_fill_b(thresh_in);
  limit = (v16u8)__msa_fill_b(limit_in);
  b_limit = (v16u8)__msa_fill_b(b_limit_in);

  LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
  LD_UB8(src_v - 4, pitch, row8, row9, row10, row11, row12, row13, row14,
         row15);
  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
                      p1, p0, q0, q1, q2, q3);

  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
               mask, flat);
  VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
  ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
  ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
  tmp0 = (v4i32)__msa_ilvl_b((v16i8)p0, (v16i8)p1);
  tmp1 = (v4i32)__msa_ilvl_b((v16i8)q1, (v16i8)q0);
  ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5);

  temp_src_u = src_u - 2;
  ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch);
  temp_src_u += 4 * pitch;
  ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch);

  temp_src_v = src_v - 2;
  ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch);
  temp_src_v += 4 * pitch;
  ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch);
}
Example #2
0
static void hevc_idct_4x4_msa(int16_t *coeffs)
{
    v8i16 in0, in1;
    v4i32 in_r0, in_l0, in_r1, in_l1;
    v4i32 sum0, sum1, sum2, sum3;
    v8i16 zeros = { 0 };

    LD_SH2(coeffs, 8, in0, in1);
    ILVRL_H2_SW(zeros, in0, in_r0, in_l0);
    ILVRL_H2_SW(zeros, in1, in_r1, in_l1);

    HEVC_IDCT4x4_COL(in_r0, in_l0, in_r1, in_l1, sum0, sum1, sum2, sum3, 7);
    TRANSPOSE4x4_SW_SW(sum0, sum1, sum2, sum3, in_r0, in_l0, in_r1, in_l1);
    HEVC_IDCT4x4_COL(in_r0, in_l0, in_r1, in_l1, sum0, sum1, sum2, sum3, 12);

    /* Pack and transpose */
    PCKEV_H2_SH(sum2, sum0, sum3, sum1, in0, in1);
    ILVRL_H2_SW(in1, in0, sum0, sum1);
    ILVRL_W2_SH(sum1, sum0, in0, in1);

    ST_SH2(in0, in1, coeffs, 8);
}
Example #3
0
static void hevc_idct_luma_4x4_msa(int16_t *coeffs)
{
    v8i16 in0, in1, dst0, dst1;
    v4i32 in_r0, in_l0, in_r1, in_l1, res0, res1, res2, res3;

    LD_SH2(coeffs, 8, in0, in1);
    UNPCK_SH_SW(in0, in_r0, in_l0);
    UNPCK_SH_SW(in1, in_r1, in_l1);
    HEVC_IDCT_LUMA4x4_COL(in_r0, in_l0, in_r1, in_l1, res0, res1, res2, res3,
                          7);
    TRANSPOSE4x4_SW_SW(res0, res1, res2, res3, in_r0, in_l0, in_r1, in_l1);
    HEVC_IDCT_LUMA4x4_COL(in_r0, in_l0, in_r1, in_l1, res0, res1, res2, res3,
                          12);

    /* Pack and transpose */
    PCKEV_H2_SH(res2, res0, res3, res1, dst0, dst1);
    ILVRL_H2_SW(dst1, dst0, res0, res1);
    ILVRL_W2_SH(res1, res0, dst0, dst1);

    ST_SH2(dst0, dst1, coeffs, 8);
}