inline int32x4_t cv_vrndq_s32_f32(float32x4_t v) { static int32x4_t v_sign = vdupq_n_s32(1 << 31), v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v))); return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition))); }
void test_vandQs32 (void) { int32x4_t out_int32x4_t; int32x4_t arg0_int32x4_t; int32x4_t arg1_int32x4_t; out_int32x4_t = vandq_s32 (arg0_int32x4_t, arg1_int32x4_t); }
inline v_int32x4 v_round(const v_float32x4& a) { static const int32x4_t v_sign = vdupq_n_s32(1 << 31), v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); }
// Update the noise estimation information. static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) { const int16_t kExp2Const = 11819; // Q13 int16_t* ptr_noiseEstLogQuantile = NULL; int16_t* ptr_noiseEstQuantile = NULL; int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); int32x4_t twentyOne32x4 = vdupq_n_s32(21); int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); int32x4_t constB32x4 = vdupq_n_s32(0x200000); int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, inst->magnLen); // Guarantee a Q-domain as high as possible and still fit in int16 inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, tmp16, 21); int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); v32x4A = vorrq_s32(v32x4A, constB32x4); // tmp16 = (int16_t)(tmp32no2 >> 21); v32x4B = vshrq_n_s32(v32x4B, 21); // tmp16 -= 21;// shift 21 to get result in Q0 v32x4B = vsubq_s32(v32x4B, twentyOne32x4); // tmp16 += (int16_t) inst->qNoise; // shift to get result in Q(qNoise) v32x4B = vaddq_s32(v32x4B, qNoise32x4); // if (tmp16 < 0) { // tmp32no1 >>= -tmp16; // } else { // tmp32no1 <<= tmp16; // } v32x4B = vshlq_s32(v32x4A, v32x4B); // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); v16x4 = vqmovn_s32(v32x4B); //inst->noiseEstQuantile[i] = tmp16; vst1_s16(ptr_noiseEstQuantile, v16x4); } // Last iteration: // inst->quantile[i]=exp(inst->lquantile[offset+i]); // in Q21 int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile; int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac tmp16 = (int16_t)(tmp32no2 >> 21); tmp16 -= 21;// shift 21 to get result in Q0 tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) if (tmp16 < 0) { tmp32no1 >>= -tmp16; } else {
inline int32x4_t vandq(const int32x4_t & v0, const int32x4_t & v1) { return vandq_s32(v0, v1); }
int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: test_vandq_s32 return vandq_s32(a, b); // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }