inline v_int32x4 v_round(const v_float32x4& a) { static const int32x4_t v_sign = vdupq_n_s32(1 << 31), v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); }
inline int32x4_t cv_vrndq_s32_f32(float32x4_t v) { static int32x4_t v_sign = vdupq_n_s32(1 << 31), v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v))); return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition))); }
inline v_float32x4 operator ~ (const v_float32x4& a) { return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val)))); }