/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ static OPUS_INLINE void warped_true2monic_coefs( silk_float *coefs_syn, silk_float *coefs_ana, silk_float lambda, silk_float limit, opus_int order ) { opus_int i, iter, ind = 0; silk_float tmp, maxabs, chirp, gain_syn, gain_ana; /* Convert to monic coefficients */ for( i = order - 1; i > 0; i-- ) { coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; } gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); for( i = 0; i < order; i++ ) { coefs_syn[ i ] *= gain_syn; coefs_ana[ i ] *= gain_ana; } /* Limit */ for( iter = 0; iter < 10; iter++ ) { /* Find maximum absolute value */ maxabs = -1.0f; for( i = 0; i < order; i++ ) { tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); if( tmp > maxabs ) { maxabs = tmp; ind = i; } } if( maxabs <= limit ) { /* Coefficients are within range - done */ return; } /* Convert back to true warped coefficients */ for( i = 1; i < order; i++ ) { coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; } gain_syn = 1.0f / gain_syn; gain_ana = 1.0f / gain_ana; for( i = 0; i < order; i++ ) { coefs_syn[ i ] *= gain_syn; coefs_ana[ i ] *= gain_ana; } /* Apply bandwidth expansion */ chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); silk_bwexpander_FLP( coefs_syn, order, chirp ); silk_bwexpander_FLP( coefs_ana, order, chirp ); /* Convert to monic warped coefficients */ for( i = order - 1; i > 0; i-- ) { coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; } gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); for( i = 0; i < order; i++ ) { coefs_syn[ i ] *= gain_syn; coefs_ana[ i ] *= gain_ana; } } silk_assert( 0 ); }
void silk_find_pitch_lags_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ silk_float res[], /* O Residual */ const silk_float x[], /* I Speech signal */ int arch /* I Run-time architecture */ ) { opus_int buf_len; silk_float thrhld, res_nrg; const silk_float *x_buf_ptr, *x_buf; silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; silk_float A[ MAX_FIND_PITCH_LPC_ORDER ]; silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ]; silk_float Wsig[ FIND_PITCH_LPC_WIN_MAX ]; silk_float *Wsig_ptr; /******************************************/ /* Set up buffer lengths etc based on Fs */ /******************************************/ buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; /* Safety check */ silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); x_buf = x - psEnc->sCmn.ltp_mem_length; /******************************************/ /* Estimate LPC AR coeficients */ /******************************************/ /* Calculate windowed signal */ /* First LA_LTP samples */ x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; Wsig_ptr = Wsig; silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); /* Middle non-windowed samples */ Wsig_ptr += psEnc->sCmn.la_pitch; x_buf_ptr += psEnc->sCmn.la_pitch; silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) ); /* Last LA_LTP samples */ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); /* Calculate autocorrelation sequence */ silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); /* Add white noise, as a fraction of the energy */ auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1; /* Calculate the reflection coefficients using Schur */ res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); /* Prediction gain */ psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f ); /* Convert reflection coefficients to prediction coefficients */ silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder ); /* Bandwidth expansion */ silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION ); /*****************************************/ /* LPC analysis filtering */ /*****************************************/ silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { /* Threshold for pitch estimator */ thrhld = 0.6f; thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder; thrhld -= 0.1f * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1); thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ); /*****************************************/ /* Call Pitch estimator */ /*****************************************/ if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) { psEnc->sCmn.indices.signalType = TYPE_VOICED; } else { psEnc->sCmn.indices.signalType = TYPE_UNVOICED; } } else {
/* Compute noise shaping coefficients and initial gain values */ void silk_noise_shape_analysis_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ const silk_float *pitch_res, /* I LPC residual from pitch analysis */ const silk_float *x /* I Input signal [frame_length + la_shape] */ ) { silk_shape_state_FLP *psShapeSt = &psEnc->sShape; opus_int k, nSamples; silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; const silk_float *x_ptr, *pitch_res_ptr; /* Point to start of first LPC analysis block */ x_ptr = x - psEnc->sCmn.la_shape; /****************/ /* GAIN CONTROL */ /****************/ SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ); /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f ); /* Coding quality level, between 0.0 and 1.0 */ psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) ); if( psEnc->sCmn.useCBR == 0 ) { /* Reduce coding SNR during low speech activity */ b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b; } if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Reduce gains for periodic signals */ SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr; } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality ); } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Initially set to 0; may be overruled in process_gains(..) */ psEnc->sCmn.indices.quantOffsetType = 0; psEncCtrl->sparseness = 0.0f; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = 2 * psEnc->sCmn.fs_kHz; energy_variation = 0.0f; log_energy_prev = 0.0f; pitch_res_ptr = pitch_res; for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); log_energy = silk_log2( nrg ); if( k > 0 ) { energy_variation += silk_abs_float( log_energy - log_energy_prev ); } log_energy_prev = log_energy; pitch_res_ptr += nSamples; } psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); /* Set quantization offset depending on sparseness measure */ if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { psEnc->sCmn.indices.quantOffsetType = 0; } else { psEnc->sCmn.indices.quantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ /* More BWE for signals with high prediction gain */ strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); BWExp1 -= delta; BWExp2 += delta; /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1 /= BWExp2; if( psEnc->sCmn.warping_Q16 > 0 ) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; } else { warping = 0.0f; } /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { /* Apply window: sine slope followed by flat part followed by cosine slope */ opus_int shift, slope_part, flat_part; flat_part = psEnc->sCmn.fs_kHz * 3; slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2; silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part ); shift = slope_part; silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) ); shift += flat_part; silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part ); /* Update pointer: next LPC analysis block */ x_ptr += psEnc->sCmn.subfr_length; if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); } /* Add white noise, as a fraction of energy */ auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; /* Convert correlations to prediction coefficients, and compute residual energy */ nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Adjust gain for warping */ psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); } /* Bandwidth expansion for synthesis filter shaping */ silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); /* Compute noise shaping filter coefficients */ silk_memcpy( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); /* Bandwidth expansion for analysis filter shaping */ silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); /* Ratio of prediction gains, in energy domain */ pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); /* Convert to monic warped prediction coefficients and limit absolute values */ warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); } /*****************/ /* Gain tweaking */ /*****************/ /* Increase gains during low speech activity */ gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB ); gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->Gains[ k ] *= gain_mult; psEncCtrl->Gains[ k ] += gain_add; } gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->GainsPre[ k ] *= gain_mult; } /************************************************/ /* Control low-frequency shaping and noise tilt */ /************************************************/ /* Less low frequency shaping for noisy inputs */ strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) ); strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ]; psEncCtrl->LF_MA_shp[ k ] = -1.0f + b; psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength; } Tilt = - HP_NOISE_COEF - (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); } else { b = 1.3f / psEnc->sCmn.fs_kHz; psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b; psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f; for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ]; psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ]; } Tilt = -HP_NOISE_COEF; } /****************************/ /* HARMONIC SHAPING CONTROL */ /****************************/ /* Control boosting of harmonic frequencies */ HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; /* More harmonic boost for noisy input signals */ HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /* Harmonic noise shaping */ HarmShapeGain = HARMONIC_SHAPING; /* More harmonic noise shaping for high bitrates or noisy input */ HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality ); /* Less harmonic noise shaping for less periodic signals */ HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr ); } else { HarmShapeGain = 0.0f; } /*************************/ /* Smooth over subframes */ /*************************/ for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth; } }