static WEBP_INLINE uint32_t Average2(const uint32_t* const a, const uint32_t* const b) { const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a)); const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b)); const uint8x8_t avg = vhadd_u8(a0, b0); return vget_lane_u32(vreinterpret_u32_u8(avg), 0); }
static WEBP_INLINE uint32_t Average3(const uint32_t* const a, const uint32_t* const b, const uint32_t* const c) { const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a)); const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b)); const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c)); const uint8x8_t avg1 = vhadd_u8(a0, c0); const uint8x8_t avg2 = vhadd_u8(avg1, b0); return vget_lane_u32(vreinterpret_u32_u8(avg2), 0); }
static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0, const uint32_t* const c1, const uint32_t* const c2) { const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0)); const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1)); const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2)); const uint16x8_t sum0 = vaddl_u8(p0, p1); // add and widen const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2)); // widen and subtract const uint8x8_t out = vqmovn_u16(sum1); // narrow and clamp return vget_lane_u32(vreinterpret_u32_u8(out), 0); }
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(const uint32_t* const c0, const uint32_t* const c1, const uint32_t* const c2) { const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0)); const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1)); const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2)); const uint8x8_t avg = vhadd_u8(p0, p1); // Average(c0,c1) const uint8x8_t ab = vshr_n_u8(vqsub_u8(avg, p2), 1); // (a-b)>>1 saturated const uint8x8_t ba = vshr_n_u8(vqsub_u8(p2, avg), 1); // (b-a)>>1 saturated const uint8x8_t out = vqsub_u8(vqadd_u8(avg, ab), ba); return vget_lane_u32(vreinterpret_u32_u8(out), 0); }
static WEBP_INLINE uint32_t Average4(const uint32_t* const a, const uint32_t* const b, const uint32_t* const c, const uint32_t* const d) { const uint8x8_t a0 = vreinterpret_u8_u64(vcreate_u64(*a)); const uint8x8_t b0 = vreinterpret_u8_u64(vcreate_u64(*b)); const uint8x8_t c0 = vreinterpret_u8_u64(vcreate_u64(*c)); const uint8x8_t d0 = vreinterpret_u8_u64(vcreate_u64(*d)); const uint8x8_t avg1 = vhadd_u8(a0, b0); const uint8x8_t avg2 = vhadd_u8(c0, d0); const uint8x8_t avg3 = vhadd_u8(avg1, avg2); return vget_lane_u32(vreinterpret_u32_u8(avg3), 0); }
int main (int argc, char **argv) { uint64x1_t base_a = vcreate_u64 (0x1111222233334444ULL); uint64x1_t base_b = vcreate_u64 (0x5555666677778888ULL); int32x2_t int32x2_a = vreinterpret_s32_u64 (base_a); int32x2_t int32x2_b = vreinterpret_s32_u64 (base_b); /* { dg-error "lane -1 out of range 0 - 1" "" {target *-*-*} 0 } */ vqrdmulh_lane_s32 (int32x2_a, int32x2_b, -1); /* { dg-error "lane 2 out of range 0 - 1" "" {target *-*-*} 0 } */ vqrdmulh_lane_s32 (int32x2_a, int32x2_b, 2); }
int main (int argc, char **argv) { uint64x1_t base_a = vcreate_u64 (0x1111222233334444ULL); uint64x1_t base_b = vcreate_u64 (0x5555666677778888ULL); int16x4_t int16x4_a = vreinterpret_s16_u64 (base_a); int16x4_t int16x4_b = vreinterpret_s16_u64 (base_b); /* { dg-error "lane -1 out of range 0 - 3" "" {target *-*-*} 0 } */ vqdmulh_lane_s16 (int16x4_a, int16x4_b, -1); /* { dg-error "lane 4 out of range 0 - 3" "" {target *-*-*} 0 } */ vqdmulh_lane_s16 (int16x4_a, int16x4_b, 4); }
static WEBP_INLINE uint32_t Select(const uint32_t* const c0, const uint32_t* const c1, const uint32_t* const c2) { const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0)); const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1)); const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2)); const uint8x8_t bc = vabd_u8(p1, p2); // |b-c| const uint8x8_t ac = vabd_u8(p0, p2); // |a-c| const int16x4_t sum_bc = vreinterpret_s16_u16(vpaddl_u8(bc)); const int16x4_t sum_ac = vreinterpret_s16_u16(vpaddl_u8(ac)); const int32x2_t diff = vpaddl_s16(vsub_s16(sum_bc, sum_ac)); const int32_t pa_minus_pb = vget_lane_s32(diff, 0); return (pa_minus_pb <= 0) ? *c0 : *c1; }
int main (int argc, char **argv) { uint64x1_t base_a = vcreate_u64 (0x1111222233334444ULL); uint64x1_t base_b = vcreate_u64 (0x5555666677778888ULL); uint64x1_t base_c = vcreate_u64 (0x9999aaaabbbbccccULL); uint64x2_t baseq_b = vcombine_u64 (base_b, base_c); int16x4_t int16x4_a = vreinterpret_s16_u64 (base_a); int16x8_t int16x8_b = vreinterpretq_s16_u64 (baseq_b); /* { dg-error "lane -1 out of range 0 - 7" "" {target *-*-*} 0 } */ vqrdmulh_laneq_s16 (int16x4_a, int16x8_b, -1); /* { dg-error "lane 8 out of range 0 - 7" "" {target *-*-*} 0 } */ vqrdmulh_laneq_s16 (int16x4_a, int16x8_b, 8); }
void test_vcreateu64 (void) { uint64x1_t out_uint64x1_t; uint64_t arg0_uint64_t; out_uint64x1_t = vcreate_u64 (arg0_uint64_t); }
int main (int argc, char **argv) { uint64x1_t base_b = vcreate_u64 (0x5555666677778888ULL); uint64x1_t base_c = vcreate_u64 (0x9999aaaabbbbccccULL); uint64x1_t base_a = vcreate_u64 (0x1111222233334444ULL); uint64x2_t baseq_a = vcombine_u64 (base_a, base_b); int16x4_t int16x4_b = vreinterpret_s16_u64 (base_b); int16x4_t int16x4_c = vreinterpret_s16_u64 (base_c); int32x4_t int32x4_a = vreinterpretq_s32_u64 (baseq_a); /* { dg-error "lane -1 out of range 0 - 3" "" {target *-*-*} 0 } */ vqdmlsl_lane_s16 (int32x4_a, int16x4_b, int16x4_c, -1); /* { dg-error "lane 4 out of range 0 - 3" "" {target *-*-*} 0 } */ vqdmlsl_lane_s16 (int32x4_a, int16x4_b, int16x4_c, 4); }
uint64x1_t test_vcreate_u64(uint64_t v1) { // CHECK: test_vcreate_u64 return vcreate_u64(v1); // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} }