void f_vst4_lane_u8 (uint8_t * p, uint8x8x4_t v) { /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ vst4_lane_u8 (p, v, 8); /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ vst4_lane_u8 (p, v, -1); return; }
void test_vst4_laneu8 (void) { uint8_t *arg0_uint8_t; uint8x8x4_t arg1_uint8x8x4_t; vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1); }
void aom_lpf_vertical_8_neon(uint8_t *src, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; uint8x8_t d16u8, d17u8, d18u8; uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; uint8x8x2_t d2Result; dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); for (i = 0; i < 1; i++) { s = src + (i * (pitch << 3)) - 4; d3u8 = vld1_u8(s); s += pitch; d4u8 = vld1_u8(s); s += pitch; d5u8 = vld1_u8(s); s += pitch; d6u8 = vld1_u8(s); s += pitch; d7u8 = vld1_u8(s); s += pitch; d16u8 = vld1_u8(s); s += pitch; d17u8 = vld1_u8(s); s += pitch; d18u8 = vld1_u8(s); d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8)); d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8)); d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8)); d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8)); d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), vreinterpret_u16_u32(d2tmp2.val[0])); d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), vreinterpret_u16_u32(d2tmp3.val[0])); d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), vreinterpret_u16_u32(d2tmp2.val[1])); d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), vreinterpret_u16_u32(d2tmp3.val[1])); d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), vreinterpret_u8_u16(d2tmp5.val[0])); d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), vreinterpret_u8_u16(d2tmp5.val[1])); d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), vreinterpret_u8_u16(d2tmp7.val[0])); d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), vreinterpret_u8_u16(d2tmp7.val[1])); d3u8 = d2tmp8.val[0]; d4u8 = d2tmp8.val[1]; d5u8 = d2tmp9.val[0]; d6u8 = d2tmp9.val[1]; d7u8 = d2tmp10.val[0]; d16u8 = d2tmp10.val[1]; d17u8 = d2tmp11.val[0]; d18u8 = d2tmp11.val[1]; mbloop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, &d0u8, &d1u8, &d2u8, &d3u8, &d4u8, &d5u8); d4Result.val[0] = d0u8; d4Result.val[1] = d1u8; d4Result.val[2] = d2u8; d4Result.val[3] = d3u8; d2Result.val[0] = d4u8; d2Result.val[1] = d5u8; s = src - 3; vst4_lane_u8(s, d4Result, 0); s += pitch; vst4_lane_u8(s, d4Result, 1); s += pitch; vst4_lane_u8(s, d4Result, 2); s += pitch; vst4_lane_u8(s, d4Result, 3); s += pitch; vst4_lane_u8(s, d4Result, 4); s += pitch; vst4_lane_u8(s, d4Result, 5); s += pitch; vst4_lane_u8(s, d4Result, 6); s += pitch; vst4_lane_u8(s, d4Result, 7); s = src + 1; vst2_lane_u8(s, d2Result, 0); s += pitch; vst2_lane_u8(s, d2Result, 1); s += pitch; vst2_lane_u8(s, d2Result, 2); s += pitch; vst2_lane_u8(s, d2Result, 3); s += pitch; vst2_lane_u8(s, d2Result, 4); s += pitch; vst2_lane_u8(s, d2Result, 5); s += pitch; vst2_lane_u8(s, d2Result, 6); s += pitch; vst2_lane_u8(s, d2Result, 7); } return; }
void vp9_lpf_vertical_4_neon( unsigned char *src, int pitch, unsigned char *blimit, unsigned char *limit, unsigned char *thresh, int count) { int i, pitch8; uint8_t *s; uint8x8_t dblimit, dlimit, dthresh; uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8; uint32x2x2_t d2tmp0, d2tmp1, d2tmp2, d2tmp3; uint16x4x2_t d2tmp4, d2tmp5, d2tmp6, d2tmp7; uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x4_t d4Result; if (count == 0) // end_vp9_lf_h_edge return; dblimit = vld1_u8(blimit); dlimit = vld1_u8(limit); dthresh = vld1_u8(thresh); pitch8 = pitch * 8; for (i = 0; i < count; i++, src += pitch8) { s = src - (i + 1) * 4; d3u8 = vld1_u8(s); s += pitch; d4u8 = vld1_u8(s); s += pitch; d5u8 = vld1_u8(s); s += pitch; d6u8 = vld1_u8(s); s += pitch; d7u8 = vld1_u8(s); s += pitch; d16u8 = vld1_u8(s); s += pitch; d17u8 = vld1_u8(s); s += pitch; d18u8 = vld1_u8(s); d2tmp0 = vtrn_u32(vreinterpret_u32_u8(d3u8), vreinterpret_u32_u8(d7u8)); d2tmp1 = vtrn_u32(vreinterpret_u32_u8(d4u8), vreinterpret_u32_u8(d16u8)); d2tmp2 = vtrn_u32(vreinterpret_u32_u8(d5u8), vreinterpret_u32_u8(d17u8)); d2tmp3 = vtrn_u32(vreinterpret_u32_u8(d6u8), vreinterpret_u32_u8(d18u8)); d2tmp4 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[0]), vreinterpret_u16_u32(d2tmp2.val[0])); d2tmp5 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[0]), vreinterpret_u16_u32(d2tmp3.val[0])); d2tmp6 = vtrn_u16(vreinterpret_u16_u32(d2tmp0.val[1]), vreinterpret_u16_u32(d2tmp2.val[1])); d2tmp7 = vtrn_u16(vreinterpret_u16_u32(d2tmp1.val[1]), vreinterpret_u16_u32(d2tmp3.val[1])); d2tmp8 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[0]), vreinterpret_u8_u16(d2tmp5.val[0])); d2tmp9 = vtrn_u8(vreinterpret_u8_u16(d2tmp4.val[1]), vreinterpret_u8_u16(d2tmp5.val[1])); d2tmp10 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[0]), vreinterpret_u8_u16(d2tmp7.val[0])); d2tmp11 = vtrn_u8(vreinterpret_u8_u16(d2tmp6.val[1]), vreinterpret_u8_u16(d2tmp7.val[1])); d3u8 = d2tmp8.val[0]; d4u8 = d2tmp8.val[1]; d5u8 = d2tmp9.val[0]; d6u8 = d2tmp9.val[1]; d7u8 = d2tmp10.val[0]; d16u8 = d2tmp10.val[1]; d17u8 = d2tmp11.val[0]; d18u8 = d2tmp11.val[1]; vp9_loop_filter_neon(dblimit, dlimit, dthresh, d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8, &d4u8, &d5u8, &d6u8, &d7u8); d4Result.val[0] = d4u8; d4Result.val[1] = d5u8; d4Result.val[2] = d6u8; d4Result.val[3] = d7u8; src -= 2; vst4_lane_u8(src, d4Result, 0); src += pitch; vst4_lane_u8(src, d4Result, 1); src += pitch; vst4_lane_u8(src, d4Result, 2); src += pitch; vst4_lane_u8(src, d4Result, 3); src += pitch; vst4_lane_u8(src, d4Result, 4); src += pitch; vst4_lane_u8(src, d4Result, 5); src += pitch; vst4_lane_u8(src, d4Result, 6); src += pitch; vst4_lane_u8(src, d4Result, 7); } return; }