Ejemplo n.º 1
0
void vpx_lpf_horizontal_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
                               const uint8_t *limit, const uint8_t *thresh,
                               int count) {
  int i;
  uint8_t *s, *psrc;
  uint8x8_t dblimit, dlimit, dthresh;
  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
  uint8x8_t d16u8, d17u8, d18u8;

  if (count == 0)  // end_vpx_mblf_h_edge
    return;

  dblimit = vld1_u8(blimit);
  dlimit = vld1_u8(limit);
  dthresh = vld1_u8(thresh);

  psrc = src - (pitch << 2);
  for (i = 0; i < count; i++) {
    s = psrc + i * 8;

    d3u8 = vld1_u8(s);
    s += pitch;
    d4u8 = vld1_u8(s);
    s += pitch;
    d5u8 = vld1_u8(s);
    s += pitch;
    d6u8 = vld1_u8(s);
    s += pitch;
    d7u8 = vld1_u8(s);
    s += pitch;
    d16u8 = vld1_u8(s);
    s += pitch;
    d17u8 = vld1_u8(s);
    s += pitch;
    d18u8 = vld1_u8(s);

    mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
                       d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
                       &d5u8);

    s -= (pitch * 6);
    vst1_u8(s, d0u8);
    s += pitch;
    vst1_u8(s, d1u8);
    s += pitch;
    vst1_u8(s, d2u8);
    s += pitch;
    vst1_u8(s, d3u8);
    s += pitch;
    vst1_u8(s, d4u8);
    s += pitch;
    vst1_u8(s, d5u8);
  }
  return;
}
Ejemplo n.º 2
0
void aom_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit,
                             const uint8_t *limit, const uint8_t *thresh) {
  int i;
  uint8_t *s;
  uint8x8_t dblimit, dlimit, dthresh;
  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
  uint8x8_t d16u8, d17u8, d18u8;
  uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3;
  uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7;
  uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
  uint8x8x4_t d4Result;
  uint8x8x2_t d2Result;

  dblimit = vld1_u8(blimit);
  dlimit = vld1_u8(limit);
  dthresh = vld1_u8(thresh);

  for (i = 0; i < 1; i++) {
    s = src + (i * (pitch << 3)) - 4;

    d3u8 = vld1_u8(s);
    s += pitch;
    d4u8 = vld1_u8(s);
    s += pitch;
    d5u8 = vld1_u8(s);
    s += pitch;
    d6u8 = vld1_u8(s);
    s += pitch;
    d7u8 = vld1_u8(s);
    s += pitch;
    d16u8 = vld1_u8(s);
    s += pitch;
    d17u8 = vld1_u8(s);
    s += pitch;
    d18u8 = vld1_u8(s);

    d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8));
    d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8));
    d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8));
    d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8));

    d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]),
                      vreinterpret_u16_u32(d2tmp2.val[0]));
    d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]),
                      vreinterpret_u16_u32(d2tmp3.val[0]));
    d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]),
                      vreinterpret_u16_u32(d2tmp2.val[1]));
    d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]),
                      vreinterpret_u16_u32(d2tmp3.val[1]));

    d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]),
                     vreinterpret_u8_u16(d2tmp5.val[0]));
    d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]),
                     vreinterpret_u8_u16(d2tmp5.val[1]));
    d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]),
                      vreinterpret_u8_u16(d2tmp7.val[0]));
    d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]),
                      vreinterpret_u8_u16(d2tmp7.val[1]));

    d3u8 = d2tmp8.val[0];
    d4u8 = d2tmp8.val[1];
    d5u8 = d2tmp9.val[0];
    d6u8 = d2tmp9.val[1];
    d7u8 = d2tmp10.val[0];
    d16u8 = d2tmp10.val[1];
    d17u8 = d2tmp11.val[0];
    d18u8 = d2tmp11.val[1];

    mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8,
                       d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8,
                       &d5u8);

    d4Result.val[0] = d0u8;
    d4Result.val[1] = d1u8;
    d4Result.val[2] = d2u8;
    d4Result.val[3] = d3u8;

    d2Result.val[0] = d4u8;
    d2Result.val[1] = d5u8;

    s = src - 3;
    vst4_lane_u8(s, d4Result, 0);
    s += pitch;
    vst4_lane_u8(s, d4Result, 1);
    s += pitch;
    vst4_lane_u8(s, d4Result, 2);
    s += pitch;
    vst4_lane_u8(s, d4Result, 3);
    s += pitch;
    vst4_lane_u8(s, d4Result, 4);
    s += pitch;
    vst4_lane_u8(s, d4Result, 5);
    s += pitch;
    vst4_lane_u8(s, d4Result, 6);
    s += pitch;
    vst4_lane_u8(s, d4Result, 7);

    s = src + 1;
    vst2_lane_u8(s, d2Result, 0);
    s += pitch;
    vst2_lane_u8(s, d2Result, 1);
    s += pitch;
    vst2_lane_u8(s, d2Result, 2);
    s += pitch;
    vst2_lane_u8(s, d2Result, 3);
    s += pitch;
    vst2_lane_u8(s, d2Result, 4);
    s += pitch;
    vst2_lane_u8(s, d2Result, 5);
    s += pitch;
    vst2_lane_u8(s, d2Result, 6);
    s += pitch;
    vst2_lane_u8(s, d2Result, 7);
  }
  return;
}