/* Processing of gains */ void silk_process_gains_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ opus_int condCoding /* I The type of conditional coding to use */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; opus_int k; opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl->ResNrg[ k ]; ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); if( psEncCtrl->ResNrgQ[ k ] > 0 ) { ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); } else { if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { ResNrgPart = silk_int32_MAX; } else { ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); } } gain = psEncCtrl->Gains_Q16[ k ]; gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); if( gain_squared < silk_int16_MAX ) { /* recalculate with higher precision */ gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); silk_assert( gain_squared > 0 ); gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ gain = silk_min( gain, silk_int32_MAX >> 8 ); psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ } else {
void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ int arch /* I Run-time architecture */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; VARDECL( opus_int16, x_windowed ); const opus_int16 *x_ptr, *pitch_res_ptr; SAVE_STACK; /* Point to start of first LPC analysis block */ x_ptr = x - psEnc->sCmn.la_shape; /****************/ /* GAIN CONTROL */ /****************/ SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); /* Reduce coding SNR during low speech activity */ if( psEnc->sCmn.useCBR == 0 ) { b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ }
opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ silk_encoder_state *psEncC, /* I/O Encoder state */ const opus_int16 pIn[] /* I PCM input */ ) { opus_int SA_Q15, pSNR_dB_Q7, input_tilt; opus_int decimated_framelength1, decimated_framelength2; opus_int decimated_framelength; opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; opus_int32 sumSquared, smooth_coef_Q16; opus_int16 HPstateTmp; VARDECL( opus_int16, X ); opus_int32 Xnrg[ VAD_N_BANDS ]; opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; opus_int32 speech_nrg, x_tmp; opus_int X_offset[ VAD_N_BANDS ]; opus_int ret = 0; silk_VAD_state *psSilk_VAD = &psEncC->sVAD; SAVE_STACK; /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); silk_assert( psEncC->frame_length <= 512 ); silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ /***********************/ decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); /* Decimate into 4 bands: 0 L 3L L 3L 5L - -- - -- -- 8 8 2 4 4 [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | They're arranged to allow the minimal ( frame_length / 4 ) extra scratch space during the downsampling process */ X_offset[ 0 ] = 0; X_offset[ 1 ] = decimated_framelength + decimated_framelength2; X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); HPstateTmp = X[ decimated_framelength - 1 ]; for( i = decimated_framelength - 1; i > 0; i-- ) { X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); X[ i ] -= X[ i - 1 ]; } X[ 0 ] -= psSilk_VAD->HPstate; psSilk_VAD->HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for( b = 0; b < VAD_N_BANDS; b++ ) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); /* Split length into subframe lengths */ dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { sumSquared = 0; for( i = 0; i < dec_subframe_length; i++ ) { /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = silk_RSHIFT( X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); /* Safety check */ silk_assert( sumSquared >= 0 ); } /* Add/saturate summed energy of current subframe */ if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); } else { /* Look-ahead subframe */ Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD->XnrgSubfr[ b ] = sumSquared; } /********************/ /* Noise estimation */ /********************/ silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; if( speech_nrg > 0 ) { /* Divide, with sufficient resolution */ if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); } else { NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); } /* Convert to log domain */ SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; /* Sum-of-squares */ sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ /* Tilt measure */ if( speech_nrg < ( (opus_int32)1 << 20 ) ) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); } input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); } else { NrgToNoiseRatio_Q8[ b ] = 256; } } /* Mean-of-squares */ sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); } else if( speech_nrg < 32768 ) { if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); } else { speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); } /* square-root */ speech_nrg = silk_SQRT_APPROX( speech_nrg ); SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); } /* Copy the resulting speech activity in Q8 */ psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { smooth_coef_Q16 >>= 1; }