int64_t av1_block_error_fp_neon(const int16_t *coeff, const int16_t *dqcoeff, int block_size) { int64x2_t error = vdupq_n_s64(0); assert(block_size >= 8); assert((block_size % 8) == 0); do { const int16x8_t c = vld1q_s16(coeff); const int16x8_t d = vld1q_s16(dqcoeff); const int16x8_t diff = vsubq_s16(c, d); const int16x4_t diff_lo = vget_low_s16(diff); const int16x4_t diff_hi = vget_high_s16(diff); // diff is 15-bits, the squares 30, so we can store 2 in 31-bits before // accumulating them in 64-bits. const int32x4_t err0 = vmull_s16(diff_lo, diff_lo); const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); const int64x2_t err2 = vaddl_s32(vget_low_s32(err1), vget_high_s32(err1)); error = vaddq_s64(error, err2); coeff += 8; dqcoeff += 8; block_size -= 8; } while (block_size != 0); return vgetq_lane_s64(error, 0) + vgetq_lane_s64(error, 1); }
int main (void) { int64x2_t out_int64x2_t = {0, 0}; int64_t arg0_int64_t = (int64_t) 0xdeadbeef; out_int64x2_t = vdupq_n_s64 (arg0_int64_t); if (vgetq_lane_s64 (out_int64x2_t, 0) != arg0_int64_t) abort(); if (vgetq_lane_s64 (out_int64x2_t, 1) != arg0_int64_t) abort(); return 0; }
void test_vgetQ_lanes64 (void) { register int64_t out_int64_t asm ("r0"); int64x2_t arg0_int64x2_t; out_int64_t = vgetq_lane_s64 (arg0_int64x2_t, 0); }
int64_t test_vgetq_lane_s64(int64x2_t v1) { // CHECK: test_vgetq_lane_s64 return vgetq_lane_s64(v1, 1); // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] }
int64_t test_vgetq_lane_s64(int64x2_t a) { // CHECK-LABEL: test_vgetq_lane_s64: // CHECK-NEXT: mov.d x0, v0[1] // CHECK-NEXT: ret return vgetq_lane_s64(a, 1); }
int64_t test_vgetq_lane_s64_before (int64x2_t in) { /* { dg-error "lane -1 out of range 0 - 1" "" {target *-*-*} 0 } */ return vgetq_lane_s64 (in, -1); }
int64_t test_vgetq_lane_s64_beyond (int64x2_t in) { /* { dg-error "lane 2 out of range 0 - 1" "" {target *-*-*} 0 } */ return vgetq_lane_s64 (in, 2); }
// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 // CHECK: ret i64 [[VGETQ_LANE]] int64_t test_vgetq_lane_s64(int64x2_t a) { return vgetq_lane_s64(a, 1); }