Exemplo n.º 1
0
static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
                                             int32_t src_stride, uint8_t *dst,
                                             int32_t dst_stride, int8_t *filter,
                                             int32_t height) {
  uint32_t loop_cnt;
  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
  v16u8 dst0, dst1, dst2, dst3, out;
  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
  v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
  v16i8 src10998, filt0, filt1, filt2, filt3;
  v8i16 filt, out10, out32;

  src -= (3 * src_stride);

  filt = LD_SH(filter);
  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);

  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
  src += (7 * src_stride);

  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
             src54_r, src21_r);
  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
  ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
             src4332, src6554);
  XORI_B3_128_SB(src2110, src4332, src6554);

  for (loop_cnt = (height >> 2); loop_cnt--;) {
Exemplo n.º 2
0
static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
                                                  int32_t src_stride,
                                                  uint8_t *dst,
                                                  int32_t dst_stride,
                                                  int8_t *filter_horiz,
                                                  int8_t *filter_vert,
                                                  int32_t height) {
  uint32_t loop_cnt;
  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
  v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3, tmp0, tmp1;
  v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
  v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
  v8i16 hz_out7, hz_out8, hz_out9, res0, res1, vec0, vec1, vec2, vec3, vec4;
  v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;

  mask0 = LD_UB(&mc_filt_mask_arr[16]);
  src -= (3 + 3 * src_stride);

  /* rearranging filter */
  filt = LD_SH(filter_horiz);
  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);

  mask1 = mask0 + 2;
  mask2 = mask0 + 4;
  mask3 = mask0 + 6;

  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
  src += (7 * src_stride);

  hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
                            filt_hz1, filt_hz2, filt_hz3);
  hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
                            filt_hz1, filt_hz2, filt_hz3);
  hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
                            filt_hz1, filt_hz2, filt_hz3);
  hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
                            filt_hz1, filt_hz2, filt_hz3);
  SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);

  filt = LD_SH(filter_vert);
  SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);

  ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
  vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4);

  for (loop_cnt = (height >> 2); loop_cnt--;) {