/* Gain scalar quantization with hysteresis, uniform on log scale */ void SKP_Silk_gains_quant( SKP_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ SKP_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ SKP_int8 *prev_ind, /* I/O last index in previous frame */ const SKP_int conditional, /* I first gain is delta coded if 1 */ const SKP_int nb_subfr /* I number of subframes */ ) { SKP_int k, double_step_size_threshold; for( k = 0; k < nb_subfr; k++ ) { /* Add half of previous quantization error, convert to log scale, scale, floor() */ ind[ k ] = SKP_SMULWB( SCALE_Q16, SKP_Silk_lin2log( gain_Q16[ k ] ) - OFFSET ); /* Round towards previous quantized gain (hysteresis) */ if( ind[ k ] < *prev_ind ) { ind[ k ]++; } ind[ k ] = SKP_max_int( ind[ k ], 0 ); /* Compute delta indices and limit */ if( k == 0 && conditional == 0 ) { /* Full index */ ind[ k ] = SKP_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 ); *prev_ind = ind[ k ]; } else { /* Delta index */ ind[ k ] = ind[ k ] - *prev_ind; /* Double the quantization step size for large gain increases, so that the max gain level can be reached */ double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; if( ind[ k ] > double_step_size_threshold ) { ind[ k ] = double_step_size_threshold + SKP_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 ); } ind[ k ] = SKP_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT ); /* Accumulate deltas */ if( ind[ k ] > double_step_size_threshold ) { *prev_ind += SKP_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold; } else { *prev_ind += ind[ k ]; } /* Shift to make non-negative */ ind[ k ] -= MIN_DELTA_GAIN_QUANT; } /* Convert to linear scale and scale */ gain_Q16[ k ] = SKP_Silk_log2lin( SKP_min_32( SKP_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ } }
/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ void SKP_Silk_HP_variable_cutoff_FIX( SKP_Silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ SKP_Silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ SKP_int16 *out, /* O high-pass filtered output signal */ const SKP_int16 *in /* I input signal */ ) { SKP_int quality_Q15; SKP_int32 B_Q28[ 3 ], A_Q28[ 2 ]; SKP_int32 Fc_Q19, r_Q28, r_Q22; SKP_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7; /*********************************************/ /* Estimate Low End of Pitch Frequency Range */ /*********************************************/ if( psEnc->sCmn.prev_sigtype == SIG_TYPE_VOICED ) { /* difference, in log domain */ pitch_freq_Hz_Q16 = SKP_DIV32_16( SKP_LSHIFT( SKP_MUL( psEnc->sCmn.fs_kHz, 1000 ), 16 ), psEnc->sCmn.prevLag ); pitch_freq_log_Q7 = SKP_Silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 ); //0x70 /* adjustment based on quality */ quality_Q15 = psEncCtrl->input_quality_bands_Q15[ 0 ]; pitch_freq_log_Q7 = SKP_SUB32( pitch_freq_log_Q7, SKP_SMULWB( SKP_SMULWB( SKP_LSHIFT( quality_Q15, 2 ), quality_Q15 ), pitch_freq_log_Q7 - SKP_LOG2_VARIABLE_HP_MIN_FREQ_Q7 ) ); pitch_freq_log_Q7 = SKP_ADD32( pitch_freq_log_Q7, SKP_RSHIFT( SKP_FIX_CONST( 0.6, 15 ) - quality_Q15, 9 ) ); //delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; delta_freq_Q7 = pitch_freq_log_Q7 - SKP_RSHIFT( psEnc->variable_HP_smth1_Q15, 8 ); if( delta_freq_Q7 < 0 ) { /* less smoothing for decreasing pitch frequency, to track something close to the minimum */ delta_freq_Q7 = SKP_MUL( delta_freq_Q7, 3 ); } /* limit delta, to reduce impact of outliers */ delta_freq_Q7 = SKP_LIMIT_32( delta_freq_Q7, -SKP_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SKP_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) ); /* update smoother */ psEnc->variable_HP_smth1_Q15 = SKP_SMLAWB( psEnc->variable_HP_smth1_Q15, SKP_MUL( SKP_LSHIFT( psEnc->speech_activity_Q8, 1 ), delta_freq_Q7 ), SKP_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) ); } /* second smoother */ psEnc->variable_HP_smth2_Q15 = SKP_SMLAWB( psEnc->variable_HP_smth2_Q15, psEnc->variable_HP_smth1_Q15 - psEnc->variable_HP_smth2_Q15, SKP_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); /* convert from log scale to Hertz */ psEncCtrl->pitch_freq_low_Hz = SKP_Silk_log2lin( SKP_RSHIFT( psEnc->variable_HP_smth2_Q15, 8 ) ); /* limit frequency range */ psEncCtrl->pitch_freq_low_Hz = SKP_LIMIT_32( psEncCtrl->pitch_freq_low_Hz, SKP_FIX_CONST( VARIABLE_HP_MIN_FREQ, 0 ), SKP_FIX_CONST( VARIABLE_HP_MAX_FREQ, 0 ) ); /********************************/ /* Compute Filter Coefficients */ /********************************/ /* compute cut-off frequency, in radians */ //Fc_num = (SKP_float)( 0.45f * 2.0f * 3.14159265359 * psEncCtrl->pitch_freq_low_Hz ); //Fc_denom = (SKP_float)( 1e3f * psEnc->sCmn.fs_kHz ); SKP_assert( psEncCtrl->pitch_freq_low_Hz <= SKP_int32_MAX / SKP_RADIANS_CONSTANT_Q19 ); Fc_Q19 = SKP_DIV32_16( SKP_SMULBB( SKP_RADIANS_CONSTANT_Q19, psEncCtrl->pitch_freq_low_Hz ), psEnc->sCmn.fs_kHz ); // range: 3704 - 27787, 11-15 bits SKP_assert( Fc_Q19 >= 3704 ); SKP_assert( Fc_Q19 <= 27787 ); r_Q28 = SKP_FIX_CONST( 1.0, 28 ) - SKP_MUL( SKP_FIX_CONST( 0.92, 9 ), Fc_Q19 ); SKP_assert( r_Q28 >= 255347779 ); SKP_assert( r_Q28 <= 266690872 ); /* b = r * [ 1; -2; 1 ]; */ /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ B_Q28[ 0 ] = r_Q28; B_Q28[ 1 ] = SKP_LSHIFT( -r_Q28, 1 ); B_Q28[ 2 ] = r_Q28; // -r * ( 2 - Fc * Fc ); r_Q22 = SKP_RSHIFT( r_Q28, 6 ); A_Q28[ 0 ] = SKP_SMULWW( r_Q22, SKP_SMULWW( Fc_Q19, Fc_Q19 ) - SKP_FIX_CONST( 2.0, 22 ) ); A_Q28[ 1 ] = SKP_SMULWW( r_Q22, r_Q22 ); /********************************/ /* High-Pass Filter */ /********************************/ SKP_Silk_biquad_alt( in, B_Q28, A_Q28, psEnc->sCmn.In_HP_State, out, psEnc->sCmn.frame_length ); }
SKP_int SKP_Silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ SKP_Silk_encoder_state *psEncC, /* I/O Encoder state */ const SKP_int16 pIn[] /* I PCM input */ ) { SKP_int SA_Q15, pSNR_dB_Q7, input_tilt; SKP_int decimated_framelength, dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; SKP_int32 sumSquared, smooth_coef_Q16; SKP_int16 HPstateTmp; SKP_int16 X[ VAD_N_BANDS ][ MAX_FRAME_LENGTH / 2 ]; SKP_int32 Xnrg[ VAD_N_BANDS ]; SKP_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; SKP_int32 speech_nrg, x_tmp; SKP_int ret = 0; SKP_Silk_VAD_state *psSilk_VAD = &psEncC->sVAD; /* Safety checks */ SKP_assert( VAD_N_BANDS == 4 ); SKP_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); SKP_assert( psEncC->frame_length <= 512 ); SKP_assert( psEncC->frame_length == 8 * SKP_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ /***********************/ /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ SKP_Silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], &X[ 0 ][ 0 ], &X[ 3 ][ 0 ], psEncC->frame_length ); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ SKP_Silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState1[ 0 ], &X[ 0 ][ 0 ], &X[ 2 ][ 0 ], SKP_RSHIFT( psEncC->frame_length, 1 ) ); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ SKP_Silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState2[ 0 ], &X[ 0 ][ 0 ], &X[ 1 ][ 0 ], SKP_RSHIFT( psEncC->frame_length, 2 ) ); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ decimated_framelength = SKP_RSHIFT( psEncC->frame_length, 3 ); X[ 0 ][ decimated_framelength - 1 ] = SKP_RSHIFT( X[ 0 ][ decimated_framelength - 1 ], 1 ); HPstateTmp = X[ 0 ][ decimated_framelength - 1 ]; for( i = decimated_framelength - 1; i > 0; i-- ) { X[ 0 ][ i - 1 ] = SKP_RSHIFT( X[ 0 ][ i - 1 ], 1 ); X[ 0 ][ i ] -= X[ 0 ][ i - 1 ]; } X[ 0 ][ 0 ] -= psSilk_VAD->HPstate; psSilk_VAD->HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for( b = 0; b < VAD_N_BANDS; b++ ) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = SKP_RSHIFT( psEncC->frame_length, SKP_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); /* Split length into subframe lengths */ dec_subframe_length = SKP_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { sumSquared = 0; for( i = 0; i < dec_subframe_length; i++ ) { /* The energy will be less than dec_subframe_length * ( SKP_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = SKP_RSHIFT( X[ b ][ i + dec_subframe_offset ], 3 ); sumSquared = SKP_SMLABB( sumSquared, x_tmp, x_tmp ); /* Safety check */ SKP_assert( sumSquared >= 0 ); } /* Add/saturate summed energy of current subframe */ if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); } else { /* Look-ahead subframe */ Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], SKP_RSHIFT( sumSquared, 1 ) ); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD->XnrgSubfr[ b ] = sumSquared; } /********************/ /* Noise estimation */ /********************/ SKP_Silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; if( speech_nrg > 0 ) { /* Divide, with sufficient resolution */ if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { NrgToNoiseRatio_Q8[ b ] = SKP_DIV32( SKP_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); } else { NrgToNoiseRatio_Q8[ b ] = SKP_DIV32( Xnrg[ b ], SKP_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); } /* Convert to log domain */ SNR_Q7 = SKP_Silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; /* Sum-of-squares */ sumSquared = SKP_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ /* Tilt measure */ if( speech_nrg < ( 1 << 20 ) ) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = SKP_SMULWB( SKP_LSHIFT( SKP_Silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); } input_tilt = SKP_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); } else { NrgToNoiseRatio_Q8[ b ] = 256; } } /* Mean-of-squares */ sumSquared = SKP_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = ( SKP_int16 )( 3 * SKP_Silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = SKP_Silk_sigm_Q15( SKP_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC->input_tilt_Q15 = SKP_LSHIFT( SKP_Silk_sigm_Q15( input_tilt ) - 16384, 1 ); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += ( b + 1 ) * SKP_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = SKP_RSHIFT( SA_Q15, 1 ); } else if( speech_nrg < 32768 ) { if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 16 ); } else { speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 15 ); } /* square-root */ speech_nrg = SKP_Silk_SQRT_APPROX( speech_nrg ); SA_Q15 = SKP_SMULWB( 32768 + speech_nrg, SA_Q15 ); } /* Copy the resulting speech activity in Q8 */ psEncC->speech_activity_Q8 = SKP_min_int( SKP_RSHIFT( SA_Q15, 7 ), SKP_uint8_MAX ); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = SKP_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, SKP_SMULWB( SA_Q15, SA_Q15 ) ); if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { smooth_coef_Q16 >>= 1; }
void SKP_Silk_noise_shape_analysis_FIX( SKP_Silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ SKP_Silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const SKP_int16 *pitch_res, /* I LPC residual from pitch analysis */ const SKP_int16 *x /* I Input signal [ frame_length + la_shape ] */ ) { SKP_Silk_shape_state_FIX *psShapeSt = &psEnc->sShape; SKP_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; SKP_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; SKP_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; SKP_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; SKP_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; SKP_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; SKP_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; SKP_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; SKP_int16 x_windowed[ SHAPE_LPC_WIN_MAX ]; const SKP_int16 *x_ptr, *pitch_res_ptr; SKP_int32 sqrt_nrg[ NB_SUBFR ], Qnrg_vec[ NB_SUBFR ]; /* Point to start of first LPC analysis block */ x_ptr = x - psEnc->sCmn.la_shape; /****************/ /* CONTROL SNR */ /****************/ /* Reduce SNR_dB values if recent bitstream has exceeded TargetRate */ psEncCtrl->current_SNR_dB_Q7 = psEnc->SNR_dB_Q7 - SKP_SMULWB( SKP_LSHIFT( ( SKP_int32 )psEnc->BufferedInChannel_ms, 7 ), SKP_FIX_CONST( 0.05, 16 ) ); /* Reduce SNR_dB if inband FEC used */ if( psEnc->speech_activity_Q8 > SKP_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { psEncCtrl->current_SNR_dB_Q7 -= SKP_RSHIFT( psEnc->inBandFEC_SNR_comp_Q8, 1 ); } /****************/ /* GAIN CONTROL */ /****************/ /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl->input_quality_Q14 = ( SKP_int )SKP_RSHIFT( ( SKP_int32 )psEncCtrl->input_quality_bands_Q15[ 0 ] + psEncCtrl->input_quality_bands_Q15[ 1 ], 2 ); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl->coding_quality_Q14 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_RSHIFT_ROUND( psEncCtrl->current_SNR_dB_Q7 - SKP_FIX_CONST( 18.0, 7 ), 4 ) ), 1 ); /* Reduce coding SNR during low speech activity */ b_Q8 = SKP_FIX_CONST( 1.0, 8 ) - psEnc->speech_activity_Q8; b_Q8 = SKP_SMULWB( SKP_LSHIFT( b_Q8, 8 ), b_Q8 ); SNR_adj_dB_Q7 = SKP_SMLAWB( psEncCtrl->current_SNR_dB_Q7, SKP_SMULBB( SKP_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), // Q11 SKP_SMULWB( SKP_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); // Q12 if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Reduce gains for periodic signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_SMLAWB( SKP_FIX_CONST( 6.0, 9 ), -SKP_FIX_CONST( 0.4, 18 ), psEncCtrl->current_SNR_dB_Q7 ), SKP_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Initally set to 0; may be overruled in process_gains(..) */ psEncCtrl->sCmn.QuantOffsetType = 0; psEncCtrl->sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = SKP_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr = pitch_res; for( k = 0; k < FRAME_LENGTH_MS / 2; k++ ) { SKP_Silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); nrg += SKP_RSHIFT( nSamples, scale ); // Q(-scale) log_energy_Q7 = SKP_Silk_lin2log( nrg ); if( k > 0 ) { energy_variation_Q7 += SKP_abs( log_energy_Q7 - log_energy_prev_Q7 ); } log_energy_prev_Q7 = log_energy_Q7; pitch_res_ptr += nSamples; } psEncCtrl->sparseness_Q8 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_SMULWB( energy_variation_Q7 - SKP_FIX_CONST( 5.0, 7 ), SKP_FIX_CONST( 0.1, 16 ) ) ), 7 ); /* Set quantization offset depending on sparseness measure */ if( psEncCtrl->sparseness_Q8 > SKP_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { psEncCtrl->sCmn.QuantOffsetType = 0; } else { psEncCtrl->sCmn.QuantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SKP_FIX_CONST( 0.5, 8 ) ); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ /* More BWE for signals with high prediction gain */ strength_Q16 = SKP_SMULWB( psEncCtrl->predGain_Q16, SKP_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); BWExp1_Q16 = BWExp2_Q16 = SKP_DIV32_varQ( SKP_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), SKP_SMLAWW( SKP_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); delta_Q16 = SKP_SMULWB( SKP_FIX_CONST( 1.0, 16 ) - SKP_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), SKP_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); BWExp1_Q16 = SKP_SUB32( BWExp1_Q16, delta_Q16 ); BWExp2_Q16 = SKP_ADD32( BWExp2_Q16, delta_Q16 ); /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1_Q16 = SKP_DIV32_16( SKP_LSHIFT( BWExp1_Q16, 14 ), SKP_RSHIFT( BWExp2_Q16, 2 ) ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ warping_Q16 = SKP_SMLAWB( psEnc->sCmn.warping_Q16, psEncCtrl->coding_quality_Q14, SKP_FIX_CONST( 0.01, 18 ) ); } else { warping_Q16 = 0; } /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ for( k = 0; k < NB_SUBFR; k++ ) { /* Apply window: sine slope followed by flat part followed by cosine slope */ SKP_int shift, slope_part, flat_part; flat_part = psEnc->sCmn.fs_kHz * 5; slope_part = SKP_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); SKP_Silk_apply_sine_window_new( x_windowed, x_ptr, 1, slope_part ); shift = slope_part; SKP_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(SKP_int16) ); shift += flat_part; SKP_Silk_apply_sine_window_new( x_windowed + shift, x_ptr + shift, 2, slope_part ); /* Update pointer: next LPC analysis block */ x_ptr += psEnc->sCmn.subfr_length; if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ SKP_Silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ SKP_Silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); } /* Add white noise, as a fraction of energy */ auto_corr[0] = SKP_ADD32( auto_corr[0], SKP_max_32( SKP_SMULWB( SKP_RSHIFT( auto_corr[ 0 ], 4 ), SKP_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); /* Calculate the reflection coefficients using schur */ nrg = SKP_Silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); SKP_assert( nrg >= 0 ); /* Convert reflection coefficients to prediction coefficients */ SKP_Silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); Qnrg = -scale; // range: -12...30 SKP_assert( Qnrg >= -12 ); SKP_assert( Qnrg <= 30 ); /* Make sure that Qnrg is an even number */ if( Qnrg & 1 ) { Qnrg -= 1; nrg >>= 1; } tmp32 = SKP_Silk_SQRT_APPROX( nrg ); Qnrg >>= 1; // range: -6...15 sqrt_nrg[ k ] = tmp32; Qnrg_vec[ k ] = Qnrg; psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( tmp32, 16 - Qnrg ); if( psEnc->sCmn.warping_Q16 > 0 ) { /* Adjust gain for warping */ gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); SKP_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); psEncCtrl->Gains_Q16[ k ] = SKP_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); if( psEncCtrl->Gains_Q16[ k ] < 0 ) { psEncCtrl->Gains_Q16[ k ] = SKP_int32_MAX; } } /* Bandwidth expansion for synthesis filter shaping */ SKP_Silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); /* Compute noise shaping filter coefficients */ SKP_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( SKP_int32 ) ); /* Bandwidth expansion for analysis filter shaping */ SKP_assert( BWExp1_Q16 <= SKP_FIX_CONST( 1.0, 16 ) ); SKP_Silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); /* Ratio of prediction gains, in energy domain */ SKP_Silk_LPC_inverse_pred_gain_Q24( &pre_nrg_Q30, AR2_Q24, psEnc->sCmn.shapingLPCOrder ); SKP_Silk_LPC_inverse_pred_gain_Q24( &nrg, AR1_Q24, psEnc->sCmn.shapingLPCOrder ); //psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg; pre_nrg_Q30 = SKP_LSHIFT32( SKP_SMULWB( pre_nrg_Q30, SKP_FIX_CONST( 0.7, 15 ) ), 1 ); psEncCtrl->GainsPre_Q14[ k ] = ( SKP_int ) SKP_FIX_CONST( 0.3, 14 ) + SKP_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); /* Convert to monic warped prediction coefficients and limit absolute values */ limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SKP_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); /* Convert from Q24 to Q13 and store in int16 */ for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); } }
void SKP_Silk_noise_shape_analysis_FIX( SKP_Silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ SKP_Silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const SKP_int16 *pitch_res, /* I LPC residual from pitch analysis */ const SKP_int16 *x /* I Input signal [ 2 * frame_length + la_shape ]*/ ) { SKP_Silk_shape_state_FIX *psShapeSt = &psEnc->sShape; SKP_int k, nSamples, lz, Qnrg, b_Q14, scale = 0, sz; SKP_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; SKP_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; SKP_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; SKP_int32 auto_corr[ SHAPE_LPC_ORDER_MAX + 1 ]; SKP_int32 refl_coef_Q16[ SHAPE_LPC_ORDER_MAX ]; SKP_int32 AR_Q24[ SHAPE_LPC_ORDER_MAX ]; SKP_int16 x_windowed[ SHAPE_LPC_WIN_MAX ]; const SKP_int16 *x_ptr, *pitch_res_ptr; SKP_int32 sqrt_nrg[ NB_SUBFR ], Qnrg_vec[ NB_SUBFR ]; /* Point to start of first LPC analysis block */ x_ptr = x + psEnc->sCmn.la_shape - SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz ) + psEnc->sCmn.frame_length / NB_SUBFR; /****************/ /* CONTROL SNR */ /****************/ /* Reduce SNR_dB values if recent bitstream has exceeded TargetRate */ psEncCtrl->current_SNR_dB_Q7 = psEnc->SNR_dB_Q7 - SKP_SMULWB( SKP_LSHIFT( ( SKP_int32 )psEnc->BufferedInChannel_ms, 7 ), 3277 ); /* Reduce SNR_dB if inband FEC used */ if( psEnc->speech_activity_Q8 > LBRR_SPEECH_ACTIVITY_THRES_Q8 ) { psEncCtrl->current_SNR_dB_Q7 -= SKP_RSHIFT( psEnc->inBandFEC_SNR_comp_Q8, 1 ); } /****************/ /* GAIN CONTROL */ /****************/ /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl->input_quality_Q14 = ( SKP_int )SKP_RSHIFT( ( SKP_int32 )psEncCtrl->input_quality_bands_Q15[ 0 ] + psEncCtrl->input_quality_bands_Q15[ 1 ], 2 ); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl->coding_quality_Q14 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_RSHIFT_ROUND( psEncCtrl->current_SNR_dB_Q7 - ( 18 << 7 ), 4 ) ), 1 ); /* Reduce coding SNR during low speech activity */ b_Q8 = ( 1 << 8 ) - psEnc->speech_activity_Q8; b_Q8 = SKP_SMULWB( SKP_LSHIFT( b_Q8, 8 ), b_Q8 ); SNR_adj_dB_Q7 = SKP_SMLAWB( psEncCtrl->current_SNR_dB_Q7, SKP_SMULBB( -BG_SNR_DECR_dB_Q7 >> ( 4 + 1 ), b_Q8 ), // Q11 SKP_SMULWB( ( 1 << 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); // Q12 if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Reduce gains for periodic signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, HARM_SNR_INCR_dB_Q7 << 1, psEnc->LTPCorr_Q15 ); } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_SMLAWB( 6 << ( 7 + 2 ), -104856, psEncCtrl->current_SNR_dB_Q7 ), //-104856_Q18 = -0.4_Q0, Q9 ( 1 << 14 ) - psEncCtrl->input_quality_Q14 ); // Q14 } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Initally set to 0; may be overruled in process_gains(..) */ psEncCtrl->sCmn.QuantOffsetType = 0; psEncCtrl->sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = SKP_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr = pitch_res; for( k = 0; k < FRAME_LENGTH_MS / 2; k++ ) { SKP_Silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); nrg += SKP_RSHIFT( nSamples, scale ); // Q(-scale) log_energy_Q7 = SKP_Silk_lin2log( nrg ); if( k > 0 ) { energy_variation_Q7 += SKP_abs( log_energy_Q7 - log_energy_prev_Q7 ); } log_energy_prev_Q7 = log_energy_Q7; pitch_res_ptr += nSamples; } psEncCtrl->sparseness_Q8 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_SMULWB( energy_variation_Q7 - ( 5 << 7 ), 6554 ) ), 7 ); // 6554_Q16 = 0.1_Q0 /* Set quantization offset depending on sparseness measure */ if( psEncCtrl->sparseness_Q8 > SPARSENESS_THRESHOLD_QNT_OFFSET_Q8 ) { psEncCtrl->sCmn.QuantOffsetType = 0; } else { psEncCtrl->sCmn.QuantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SPARSE_SNR_INCR_dB_Q7 << 8, psEncCtrl->sparseness_Q8 - ( 1 << 7 ) ); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ delta_Q16 = SKP_SMULWB( ( 1 << 16 ) - SKP_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), LOW_RATE_BANDWIDTH_EXPANSION_DELTA_Q16 ); BWExp1_Q16 = BANDWIDTH_EXPANSION_Q16 - delta_Q16; BWExp2_Q16 = BANDWIDTH_EXPANSION_Q16 + delta_Q16; if( psEnc->sCmn.fs_kHz == 24 ) { /* Less bandwidth expansion for super wideband */ BWExp1_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp1_Q16 ); BWExp2_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp2_Q16 ); } /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1_Q16 = SKP_DIV32_16( SKP_LSHIFT( BWExp1_Q16, 14 ), SKP_RSHIFT( BWExp2_Q16, 2 ) ); /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ sz = ( SKP_int )SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz ); for( k = 0; k < NB_SUBFR; k++ ) { /* Apply window */ SKP_Silk_apply_sine_window( x_windowed, x_ptr, 0, SHAPE_LPC_WIN_MS * psEnc->sCmn.fs_kHz ); /* Update pointer: next LPC analysis block */ x_ptr += psEnc->sCmn.frame_length / NB_SUBFR; /* Calculate auto correlation */ SKP_Silk_autocorr( auto_corr, &scale, x_windowed, sz, psEnc->sCmn.shapingLPCOrder + 1 ); /* Add white noise, as a fraction of energy */ auto_corr[0] = SKP_ADD32( auto_corr[0], SKP_max_32( SKP_SMULWB( SKP_RSHIFT( auto_corr[ 0 ], 4 ), SHAPE_WHITE_NOISE_FRACTION_Q20 ), 1 ) ); /* Calculate the reflection coefficients using schur */ nrg = SKP_Silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); /* Convert reflection coefficients to prediction coefficients */ SKP_Silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); /* Bandwidth expansion for synthesis filter shaping */ SKP_Silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); /* Make sure to fit in Q13 SKP_int16 */ SKP_Silk_LPC_fit( &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], AR_Q24, 13, psEnc->sCmn.shapingLPCOrder ); /* Compute noise shaping filter coefficients */ SKP_memcpy( &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder * sizeof( SKP_int16 ) ); /* Bandwidth expansion for analysis filter shaping */ SKP_assert( BWExp1_Q16 <= ( 1 << 16 ) ); // If ever breaking, use LPC_stabilize() in these cases to stay within range SKP_Silk_bwexpander( &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); /* Increase residual energy */ nrg = SKP_SMLAWB( nrg, SKP_RSHIFT( auto_corr[ 0 ], 8 ), SHAPE_MIN_ENERGY_RATIO_Q24 ); Qnrg = -scale; // range: -12...30 SKP_assert( Qnrg >= -12 ); SKP_assert( Qnrg <= 30 ); /* Make sure that Qnrg is an even number */ if( Qnrg & 1 ) { Qnrg -= 1; nrg >>= 1; } tmp32 = SKP_Silk_SQRT_APPROX( nrg ); Qnrg >>= 1; // range: -6...15 sqrt_nrg[ k ] = tmp32; Qnrg_vec[ k ] = Qnrg; psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( tmp32, 16 - Qnrg ); /* Ratio of prediction gains, in energy domain */ SKP_Silk_LPC_inverse_pred_gain_Q13( &pre_nrg_Q30, &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder ); SKP_Silk_LPC_inverse_pred_gain_Q13( &nrg, &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder ); lz = SKP_min_32( SKP_Silk_CLZ32( pre_nrg_Q30 ) - 1, 19 ); pre_nrg_Q30 = SKP_DIV32( SKP_LSHIFT( pre_nrg_Q30, lz ), SKP_RSHIFT( nrg, 20 - lz ) + 1 ); // Q20 pre_nrg_Q30 = SKP_RSHIFT( SKP_LSHIFT_SAT32( pre_nrg_Q30, 9 ), 1 ); /* Q28 */ psEncCtrl->GainsPre_Q14[ k ] = ( SKP_int )SKP_Silk_SQRT_APPROX( pre_nrg_Q30 ); }