void SKP_Silk_VAD_GetNoiseLevels( const SKP_int32 pX[ VAD_N_BANDS ], /* I subband energies */ SKP_Silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ) { SKP_int k; SKP_int32 nl, nrg, inv_nrg; SKP_int coef, min_coef; /* Initially faster smoothing */ if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ min_coef = SKP_DIV32_16( SKP_int16_MAX, SKP_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); } else { min_coef = 0; } for( k = 0; k < VAD_N_BANDS; k++ ) { /* Get old noise level estimate for current band */ nl = psSilk_VAD->NL[ k ]; SKP_assert( nl >= 0 ); /* Add bias */ nrg = SKP_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] ); SKP_assert( nrg > 0 ); /* Invert energies */ inv_nrg = SKP_DIV32( SKP_int32_MAX, nrg ); SKP_assert( inv_nrg >= 0 ); /* Less update when subband energy is high */ if( nrg > SKP_LSHIFT( nl, 3 ) ) { coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3; } else if( nrg < nl ) {
SKP_int SKP_Silk_VAD_Init( /* O Return value, 0 if success */ SKP_Silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ) { SKP_int b, ret = 0; /* reset state memory */ SKP_memset( psSilk_VAD, 0, sizeof( SKP_Silk_VAD_state ) ); /* init noise levels */ /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NoiseLevelBias[ b ] = SKP_max_32( SKP_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 ); } /* Initialize state */ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NL[ b ] = SKP_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] ); psSilk_VAD->inv_NL[ b ] = SKP_DIV32( SKP_int32_MAX, psSilk_VAD->NL[ b ] ); } psSilk_VAD->counter = 15; /* init smoothed energy-to-noise ratio*/ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */ } return( ret ); }
/* Glues concealed frames with new good recieved frames */ void SKP_Silk_PLC_glue_frames( SKP_Silk_decoder_state *psDec, /* I/O decoder state */ SKP_Silk_decoder_control *psDecCtrl, /* I/O Decoder control */ SKP_int16 signal[], /* I/O signal */ SKP_int length /* I length of residual */ ) { SKP_int i, energy_shift; SKP_int32 energy; SKP_Silk_PLC_struct *psPLC; psPLC = &psDec->sPLC; if( psDec->lossCnt ) { /* Calculate energy in concealed residual */ SKP_Silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, signal, length ); psPLC->last_frame_lost = 1; } else { if( psDec->sPLC.last_frame_lost ) { /* Calculate residual in decoded signal if last frame was lost */ SKP_Silk_sum_sqr_shift( &energy, &energy_shift, signal, length ); /* Normalize energies */ if( energy_shift > psPLC->conc_energy_shift ) { psPLC->conc_energy = SKP_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift ); } else if( energy_shift < psPLC->conc_energy_shift ) { energy = SKP_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift ); } /* Fade in the energy difference */ if( energy > psPLC->conc_energy ) { SKP_int32 frac_Q24, LZ; SKP_int32 gain_Q12, slope_Q12; LZ = SKP_Silk_CLZ32( psPLC->conc_energy ); LZ = LZ - 1; psPLC->conc_energy = SKP_LSHIFT( psPLC->conc_energy, LZ ); energy = SKP_RSHIFT( energy, SKP_max_32( 24 - LZ, 0 ) ); frac_Q24 = SKP_DIV32( psPLC->conc_energy, SKP_max( energy, 1 ) ); gain_Q12 = SKP_Silk_SQRT_APPROX( frac_Q24 ); slope_Q12 = SKP_DIV32_16( ( 1 << 12 ) - gain_Q12, length ); for( i = 0; i < length; i++ ) { signal[ i ] = SKP_RSHIFT( SKP_MUL( gain_Q12, signal[ i ] ), 12 ); gain_Q12 += slope_Q12; gain_Q12 = SKP_min( gain_Q12, ( 1 << 12 ) ); } } } psPLC->last_frame_lost = 0; } }
/* Control SNR of redidual quantizer */ SKP_int silk_control_SNR( silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ SKP_int32 TargetRate_bps /* I Target max bitrate (bps) */ ) { SKP_int k, ret = SILK_NO_ERROR; SKP_int32 frac_Q6; const SKP_int32 *rateTable; /* Set bitrate/coding quality */ TargetRate_bps = SKP_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS ); if( TargetRate_bps != psEncC->TargetRate_bps ) { psEncC->TargetRate_bps = TargetRate_bps; /* If new TargetRate_bps, translate to SNR_dB value */ if( psEncC->fs_kHz == 8 ) { rateTable = silk_TargetRate_table_NB; } else if( psEncC->fs_kHz == 12 ) { rateTable = silk_TargetRate_table_MB; } else { rateTable = silk_TargetRate_table_WB; } /* Reduce bitrate for 10 ms modes in these calculations */ if( psEncC->nb_subfr == 2 ) { TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS; } /* Find bitrate interval in table and interpolate */ for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) { if( TargetRate_bps <= rateTable[ k ] ) { frac_Q6 = SKP_DIV32( SKP_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), rateTable[ k ] - rateTable[ k - 1 ] ); psEncC->SNR_dB_Q7 = SKP_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + SKP_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] ); break; } } } return ret; }
SKP_int SKP_Silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ SKP_Silk_encoder_state *psEncC, /* I/O Encoder state */ const SKP_int16 pIn[] /* I PCM input */ ) { SKP_int SA_Q15, pSNR_dB_Q7, input_tilt; SKP_int decimated_framelength, dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; SKP_int32 sumSquared, smooth_coef_Q16; SKP_int16 HPstateTmp; SKP_int16 X[ VAD_N_BANDS ][ MAX_FRAME_LENGTH / 2 ]; SKP_int32 Xnrg[ VAD_N_BANDS ]; SKP_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; SKP_int32 speech_nrg, x_tmp; SKP_int ret = 0; SKP_Silk_VAD_state *psSilk_VAD = &psEncC->sVAD; /* Safety checks */ SKP_assert( VAD_N_BANDS == 4 ); SKP_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); SKP_assert( psEncC->frame_length <= 512 ); SKP_assert( psEncC->frame_length == 8 * SKP_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ /***********************/ /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ SKP_Silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], &X[ 0 ][ 0 ], &X[ 3 ][ 0 ], psEncC->frame_length ); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ SKP_Silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState1[ 0 ], &X[ 0 ][ 0 ], &X[ 2 ][ 0 ], SKP_RSHIFT( psEncC->frame_length, 1 ) ); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ SKP_Silk_ana_filt_bank_1( &X[ 0 ][ 0 ], &psSilk_VAD->AnaState2[ 0 ], &X[ 0 ][ 0 ], &X[ 1 ][ 0 ], SKP_RSHIFT( psEncC->frame_length, 2 ) ); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ decimated_framelength = SKP_RSHIFT( psEncC->frame_length, 3 ); X[ 0 ][ decimated_framelength - 1 ] = SKP_RSHIFT( X[ 0 ][ decimated_framelength - 1 ], 1 ); HPstateTmp = X[ 0 ][ decimated_framelength - 1 ]; for( i = decimated_framelength - 1; i > 0; i-- ) { X[ 0 ][ i - 1 ] = SKP_RSHIFT( X[ 0 ][ i - 1 ], 1 ); X[ 0 ][ i ] -= X[ 0 ][ i - 1 ]; } X[ 0 ][ 0 ] -= psSilk_VAD->HPstate; psSilk_VAD->HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for( b = 0; b < VAD_N_BANDS; b++ ) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = SKP_RSHIFT( psEncC->frame_length, SKP_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); /* Split length into subframe lengths */ dec_subframe_length = SKP_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { sumSquared = 0; for( i = 0; i < dec_subframe_length; i++ ) { /* The energy will be less than dec_subframe_length * ( SKP_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = SKP_RSHIFT( X[ b ][ i + dec_subframe_offset ], 3 ); sumSquared = SKP_SMLABB( sumSquared, x_tmp, x_tmp ); /* Safety check */ SKP_assert( sumSquared >= 0 ); } /* Add/saturate summed energy of current subframe */ if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); } else { /* Look-ahead subframe */ Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], SKP_RSHIFT( sumSquared, 1 ) ); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD->XnrgSubfr[ b ] = sumSquared; } /********************/ /* Noise estimation */ /********************/ SKP_Silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; if( speech_nrg > 0 ) { /* Divide, with sufficient resolution */ if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { NrgToNoiseRatio_Q8[ b ] = SKP_DIV32( SKP_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); } else { NrgToNoiseRatio_Q8[ b ] = SKP_DIV32( Xnrg[ b ], SKP_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); } /* Convert to log domain */ SNR_Q7 = SKP_Silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; /* Sum-of-squares */ sumSquared = SKP_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ /* Tilt measure */ if( speech_nrg < ( 1 << 20 ) ) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = SKP_SMULWB( SKP_LSHIFT( SKP_Silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); } input_tilt = SKP_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); } else { NrgToNoiseRatio_Q8[ b ] = 256; } } /* Mean-of-squares */ sumSquared = SKP_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = ( SKP_int16 )( 3 * SKP_Silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = SKP_Silk_sigm_Q15( SKP_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC->input_tilt_Q15 = SKP_LSHIFT( SKP_Silk_sigm_Q15( input_tilt ) - 16384, 1 ); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += ( b + 1 ) * SKP_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = SKP_RSHIFT( SA_Q15, 1 ); } else if( speech_nrg < 32768 ) { if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 16 ); } else { speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 15 ); } /* square-root */ speech_nrg = SKP_Silk_SQRT_APPROX( speech_nrg ); SA_Q15 = SKP_SMULWB( 32768 + speech_nrg, SA_Q15 ); } /* Copy the resulting speech activity in Q8 */ psEncC->speech_activity_Q8 = SKP_min_int( SKP_RSHIFT( SA_Q15, 7 ), SKP_uint8_MAX ); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = SKP_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, SKP_SMULWB( SA_Q15, SA_Q15 ) ); if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { smooth_coef_Q16 >>= 1; }
void SKP_Silk_find_pred_coefs_FIX( SKP_Silk_encoder_state_FIX *psEnc, /* I/O encoder state */ SKP_Silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ const SKP_int16 res_pitch[], /* I Residual from pitch analysis */ const SKP_int16 x[] /* I Speech signal */ ) { SKP_int i; SKP_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; SKP_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; SKP_int16 NLSF_Q15[ MAX_LPC_ORDER ]; const SKP_int16 *x_ptr; SKP_int16 *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; SKP_int32 tmp, min_gain_Q16; SKP_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; /* weighting for weighted least squares */ min_gain_Q16 = SKP_int32_MAX >> 6; for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { min_gain_Q16 = SKP_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] ); } for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { /* Divide to Q16 */ SKP_assert( psEncCtrl->Gains_Q16[ i ] > 0 ); /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[ i ] = SKP_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); /* Ensure Wght_Q15 a minimum value 1 */ invGains_Q16[ i ] = SKP_max( invGains_Q16[ i ], 363 ); /* Square the inverted gains */ SKP_assert( invGains_Q16[ i ] == SKP_SAT16( invGains_Q16[ i ] ) ); tmp = SKP_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); Wght_Q15[ i ] = SKP_RSHIFT( tmp, 1 ); /* Invert the inverted and normalized gains */ local_gains[ i ] = SKP_DIV32( ( 1 << 16 ), invGains_Q16[ i ] ); } if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /**********/ /* VOICED */ /**********/ SKP_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); /* LTP analysis */ SKP_Silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift ); /* Quantize LTP gain parameters */ SKP_Silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr); /* Control LTP scaling */ SKP_Silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl ); /* Create LTP residual */ SKP_Silk_LTP_analysis_filter_FIX( LPC_in_pre, psEnc->x_buf + psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); } else { /************/ /* UNVOICED */ /************/ /* Create signal with prepended subframes, scaled by inverse gains */ x_ptr = x - psEnc->sCmn.predictLPCOrder; x_pre_ptr = LPC_in_pre; for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { SKP_Silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; x_ptr += psEnc->sCmn.subfr_length; } SKP_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( SKP_int16 ) ); psEncCtrl->LTPredCodGain_Q7 = 0; } /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ TIC(FIND_LPC) SKP_Silk_find_LPC_FIX( NLSF_Q15, &psEnc->sCmn.indices.NLSFInterpCoef_Q2, psEnc->sCmn.prev_NLSFq_Q15, psEnc->sCmn.useInterpolatedNLSFs, psEnc->sCmn.first_frame_after_reset, psEnc->sCmn.predictLPCOrder, LPC_in_pre, psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder, psEnc->sCmn.nb_subfr ); TOC(FIND_LPC) /* Quantize LSFs */ TIC(PROCESS_LSFS) SKP_Silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); TOC(PROCESS_LSFS) /* Calculate residual energy using quantized LPC coefficients */ SKP_Silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); /* Copy to prediction struct for use in next frame for fluctuation reduction */ SKP_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); }
void SKP_Silk_PLC_update( SKP_Silk_decoder_state *psDec, /* (I/O) Decoder state */ SKP_Silk_decoder_control *psDecCtrl, /* (I/O) Decoder control */ SKP_int16 signal[], SKP_int length ) { SKP_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14; SKP_int i, j; SKP_Silk_PLC_struct *psPLC; psPLC = &psDec->sPLC; /* Update parameters used in case of packet loss */ psDec->prev_sigtype = psDecCtrl->sigtype; LTP_Gain_Q14 = 0; if( psDecCtrl->sigtype == SIG_TYPE_VOICED ) { /* Find the parameters for the last subframe which contains a pitch pulse */ for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ NB_SUBFR - 1 ]; j++ ) { temp_LTP_Gain_Q14 = 0; for( i = 0; i < LTP_ORDER; i++ ) { temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( NB_SUBFR - 1 - j ) * LTP_ORDER + i ]; } if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) { LTP_Gain_Q14 = temp_LTP_Gain_Q14; SKP_memcpy( psPLC->LTPCoef_Q14, &psDecCtrl->LTPCoef_Q14[ SKP_SMULBB( NB_SUBFR - 1 - j, LTP_ORDER ) ], LTP_ORDER * sizeof( SKP_int16 ) ); psPLC->pitchL_Q8 = SKP_LSHIFT( psDecCtrl->pitchL[ NB_SUBFR - 1 - j ], 8 ); } } #if USE_SINGLE_TAP SKP_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( SKP_int16 ) ); psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14; #endif /* Limit LT coefs */ if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) { SKP_int scale_Q10; SKP_int32 tmp; tmp = SKP_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 ); scale_Q10 = SKP_DIV32( tmp, SKP_max( LTP_Gain_Q14, 1 ) ); for( i = 0; i < LTP_ORDER; i++ ) { psPLC->LTPCoef_Q14[ i ] = SKP_RSHIFT( SKP_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 ); } } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) { SKP_int scale_Q14; SKP_int32 tmp; tmp = SKP_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 ); scale_Q14 = SKP_DIV32( tmp, SKP_max( LTP_Gain_Q14, 1 ) ); for( i = 0; i < LTP_ORDER; i++ ) { psPLC->LTPCoef_Q14[ i ] = SKP_RSHIFT( SKP_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 ); } } } else { psPLC->pitchL_Q8 = SKP_LSHIFT( SKP_SMULBB( psDec->fs_kHz, 18 ), 8 ); SKP_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( SKP_int16 )); } /* Save LPC coeficients */ SKP_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( SKP_int16 ) ); psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14; /* Save Gains */ SKP_memcpy( psPLC->prevGain_Q16, psDecCtrl->Gains_Q16, NB_SUBFR * sizeof( SKP_int32 ) ); }
int SKP_Silk_encode_frame_FIX(SKP_Silk_encoder_state_FIX * psEnc, /* I/O Pointer to Silk FIX encoder state */ uint8_t * pCode, /* O Pointer to payload */ int16_t * pnBytesOut, /* I/O Pointer to number of payload bytes */ /* input: max length; output: used */ const int16_t * pIn /* I Pointer to input speech frame */ ) { SKP_Silk_encoder_control_FIX sEncCtrl; int i, nBytes, ret = 0; int16_t *x_frame, *res_pitch_frame; int16_t xfw[MAX_FRAME_LENGTH]; int16_t pIn_HP[MAX_FRAME_LENGTH]; int16_t res_pitch[2 * MAX_FRAME_LENGTH + LA_PITCH_MAX]; int LBRR_idx, frame_terminator, SNR_dB_Q7; const uint16_t *FrameTermination_CDF; /* Low bitrate redundancy parameters */ uint8_t LBRRpayload[MAX_ARITHM_BYTES]; int16_t nBytesLBRR; //int32_t Seed[ MAX_LAYERS ]; sEncCtrl.sCmn.Seed = psEnc->sCmn.frameCounter++ & 3; /**************************************************************/ /* Setup Input Pointers, and insert frame in input buffer */ /*************************************************************/ x_frame = psEnc->x_buf + psEnc->sCmn.frame_length; /* start of frame to encode */ res_pitch_frame = res_pitch + psEnc->sCmn.frame_length; /* start of pitch LPC residual frame */ /****************************/ /* Voice Activity Detection */ /****************************/ ret = SKP_Silk_VAD_GetSA_Q8(&psEnc->sCmn.sVAD, &psEnc->speech_activity_Q8, &SNR_dB_Q7, sEncCtrl.input_quality_bands_Q15, &sEncCtrl.input_tilt_Q15, pIn, psEnc->sCmn.frame_length); /*******************************************/ /* High-pass filtering of the input signal */ /*******************************************/ #if HIGH_PASS_INPUT /* Variable high-pass filter */ SKP_Silk_HP_variable_cutoff_FIX(psEnc, &sEncCtrl, pIn_HP, pIn); #else SKP_memcpy(pIn_HP, pIn, psEnc->sCmn.frame_length * sizeof(int16_t)); #endif #if SWITCH_TRANSITION_FILTERING /* Ensure smooth bandwidth transitions */ SKP_Silk_LP_variable_cutoff(&psEnc->sCmn.sLP, x_frame + psEnc->sCmn.la_shape, pIn_HP, psEnc->sCmn.frame_length); #else SKP_memcpy(x_frame + psEnc->sCmn.la_shape, pIn_HP, psEnc->sCmn.frame_length * sizeof(int16_t)); #endif /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ SKP_Silk_find_pitch_lags_FIX(psEnc, &sEncCtrl, res_pitch, x_frame); /************************/ /* Noise shape analysis */ /************************/ SKP_Silk_noise_shape_analysis_FIX(psEnc, &sEncCtrl, res_pitch_frame, x_frame); /*****************************************/ /* Prefiltering for noise shaper */ /*****************************************/ SKP_Silk_prefilter_FIX(psEnc, &sEncCtrl, xfw, x_frame); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ SKP_Silk_find_pred_coefs_FIX(psEnc, &sEncCtrl, res_pitch); /****************************************/ /* Process gains */ /****************************************/ SKP_Silk_process_gains_FIX(psEnc, &sEncCtrl); psEnc->sCmn.sigtype[psEnc->sCmn.nFramesInPayloadBuf] = sEncCtrl.sCmn.sigtype; psEnc->sCmn.QuantOffsetType[psEnc->sCmn.nFramesInPayloadBuf] = sEncCtrl.sCmn.QuantOffsetType; /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ nBytesLBRR = MAX_ARITHM_BYTES; SKP_Silk_LBRR_encode_FIX(psEnc, &sEncCtrl, LBRRpayload, &nBytesLBRR, xfw); /*****************************************/ /* Noise shaping quantization */ /*****************************************/ psEnc->NoiseShapingQuantizer(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sNSQ, xfw, &psEnc->sCmn.q[psEnc->sCmn. nFramesInPayloadBuf * psEnc->sCmn.frame_length], sEncCtrl.sCmn.NLSFInterpCoef_Q2, sEncCtrl.PredCoef_Q12[0], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ /**************************************************/ if (psEnc->speech_activity_Q8 < SPEECH_ACTIVITY_DTX_THRES_Q8) { psEnc->sCmn.vadFlag = NO_VOICE_ACTIVITY; psEnc->sCmn.noSpeechCounter++; if (psEnc->sCmn.noSpeechCounter > NO_SPEECH_FRAMES_BEFORE_DTX) { psEnc->sCmn.inDTX = 1; } if (psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX) { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; } } else { psEnc->sCmn.noSpeechCounter = 0; psEnc->sCmn.inDTX = 0; psEnc->sCmn.vadFlag = VOICE_ACTIVITY; } /****************************************/ /* Initialize arithmetic coder */ /****************************************/ if (psEnc->sCmn.nFramesInPayloadBuf == 0) { SKP_Silk_range_enc_init(&psEnc->sCmn.sRC); psEnc->sCmn.nBytesInPayloadBuf = 0; } /****************************************/ /* Encode Parameters */ /****************************************/ if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { SKP_Silk_encode_parameters_v4(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC); FrameTermination_CDF = SKP_Silk_FrameTermination_v4_CDF; } else { SKP_Silk_encode_parameters(&psEnc->sCmn, &sEncCtrl.sCmn, &psEnc->sCmn.sRC, &psEnc->sCmn.q[psEnc->sCmn. nFramesInPayloadBuf * psEnc->sCmn. frame_length]); FrameTermination_CDF = SKP_Silk_FrameTermination_CDF; } /****************************************/ /* Update Buffers and State */ /****************************************/ /* Update Input buffer */ SKP_memmove(psEnc->x_buf, &psEnc->x_buf[psEnc->sCmn.frame_length], (psEnc->sCmn.frame_length + psEnc->sCmn.la_shape) * sizeof(int16_t)); /* parameters needed for next frame */ psEnc->sCmn.prev_sigtype = sEncCtrl.sCmn.sigtype; psEnc->sCmn.prevLag = sEncCtrl.sCmn.pitchL[NB_SUBFR - 1]; psEnc->sCmn.first_frame_after_reset = 0; if (psEnc->sCmn.sRC.error) { /* encoder returned error: clear payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { psEnc->sCmn.nFramesInPayloadBuf++; } /****************************************/ /* finalize payload and copy to output */ /****************************************/ if (psEnc->sCmn.nFramesInPayloadBuf * FRAME_LENGTH_MS >= psEnc->sCmn.PacketSize_ms) { LBRR_idx = (psEnc->sCmn.oldest_LBRR_idx + 1) & LBRR_IDX_MASK; /* Check if FEC information should be added */ frame_terminator = SKP_SILK_LAST_FRAME; if (psEnc->sCmn.LBRR_buffer[LBRR_idx].usage == SKP_SILK_ADD_LBRR_TO_PLUS1) { frame_terminator = SKP_SILK_LBRR_VER1; } if (psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. usage == SKP_SILK_ADD_LBRR_TO_PLUS2) { frame_terminator = SKP_SILK_LBRR_VER2; LBRR_idx = psEnc->sCmn.oldest_LBRR_idx; } /* Add the frame termination info to stream */ SKP_Silk_range_encoder(&psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF); if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { /* Code excitation signal */ for (i = 0; i < psEnc->sCmn.nFramesInPayloadBuf; i++) { SKP_Silk_encode_pulses(&psEnc->sCmn.sRC, psEnc->sCmn.sigtype[i], psEnc->sCmn. QuantOffsetType[i], &psEnc->sCmn.q[i * psEnc-> sCmn. frame_length], psEnc->sCmn. frame_length); } } /* payload length so far */ SKP_Silk_range_coder_get_length(&psEnc->sCmn.sRC, &nBytes); /* check that there is enough space in external output buffer, and move data */ if (*pnBytesOut >= nBytes) { SKP_Silk_range_enc_wrap_up(&psEnc->sCmn.sRC); SKP_memcpy(pCode, psEnc->sCmn.sRC.buffer, nBytes * sizeof(uint8_t)); if (frame_terminator > SKP_SILK_MORE_FRAMES && *pnBytesOut >= nBytes + psEnc->sCmn.LBRR_buffer[LBRR_idx].nBytes) { /* Get old packet and add to payload. */ SKP_memcpy(&pCode[nBytes], psEnc->sCmn.LBRR_buffer[LBRR_idx]. payload, psEnc->sCmn.LBRR_buffer[LBRR_idx]. nBytes * sizeof(uint8_t)); nBytes += psEnc->sCmn.LBRR_buffer[LBRR_idx].nBytes; } *pnBytesOut = nBytes; /* Update FEC buffer */ SKP_memcpy(psEnc->sCmn. LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. payload, LBRRpayload, nBytesLBRR * sizeof(uint8_t)); psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. nBytes = nBytesLBRR; /* This line tells describes how FEC should be used */ psEnc->sCmn.LBRR_buffer[psEnc->sCmn.oldest_LBRR_idx]. usage = sEncCtrl.sCmn.LBRR_usage; psEnc->sCmn.oldest_LBRR_idx = (psEnc->sCmn.oldest_LBRR_idx + 1) & LBRR_IDX_MASK; /* Reset number of frames in payload buffer */ psEnc->sCmn.nFramesInPayloadBuf = 0; } else { /* Not enough space: Payload will be discarded */ *pnBytesOut = 0; nBytes = 0; psEnc->sCmn.nFramesInPayloadBuf = 0; ret = SKP_SILK_ENC_PAYLOAD_BUF_TOO_SHORT; } } else { /* no payload for you this time */ *pnBytesOut = 0; /* Encode that more frames follows */ frame_terminator = SKP_SILK_MORE_FRAMES; SKP_Silk_range_encoder(&psEnc->sCmn.sRC, frame_terminator, FrameTermination_CDF); /* payload length so far */ SKP_Silk_range_coder_get_length(&psEnc->sCmn.sRC, &nBytes); if (psEnc->sCmn.bitstream_v == BIT_STREAM_V4) { /* Take into account the q signal that isnt in the bitstream yet */ nBytes += SKP_Silk_pulses_to_bytes(&psEnc->sCmn, &psEnc->sCmn. q[(psEnc->sCmn. nFramesInPayloadBuf - 1) * psEnc->sCmn. frame_length]); } } /* Check for arithmetic coder errors */ if (psEnc->sCmn.sRC.error) { ret = SKP_SILK_ENC_INTERNAL_ERROR; } /* simulate number of ms buffered in channel because of exceeding TargetRate */ assert((8 * 1000 * ((int64_t) nBytes - (int64_t) psEnc->sCmn.nBytesInPayloadBuf)) == SKP_SAT32(8 * 1000 * ((int64_t) nBytes - (int64_t) psEnc->sCmn.nBytesInPayloadBuf))); assert(psEnc->sCmn.TargetRate_bps > 0); psEnc->BufferedInChannel_ms += SKP_DIV32(8 * 1000 * (nBytes - psEnc->sCmn.nBytesInPayloadBuf), psEnc->sCmn.TargetRate_bps); psEnc->BufferedInChannel_ms -= FRAME_LENGTH_MS; psEnc->BufferedInChannel_ms = SKP_LIMIT(psEnc->BufferedInChannel_ms, 0, 100); psEnc->sCmn.nBytesInPayloadBuf = nBytes; if (psEnc->speech_activity_Q8 > WB_DETECT_ACTIVE_SPEECH_LEVEL_THRES_Q8) { psEnc->sCmn.sSWBdetect.ActiveSpeech_ms = SKP_ADD_POS_SAT32(psEnc->sCmn.sSWBdetect.ActiveSpeech_ms, FRAME_LENGTH_MS); } return (ret); }
/* Decode a frame */ SKP_int SKP_Silk_SDK_Decode( void* decState, /* I/O: State */ SKP_SILK_SDK_DecControlStruct* decControl, /* I/O: Control structure */ SKP_int lostFlag, /* I: 0: no loss, 1 loss */ const SKP_uint8 *inData, /* I: Encoded input vector */ const SKP_int nBytesIn, /* I: Number of input Bytes */ SKP_int16 *samplesOut, /* O: Decoded output speech vector */ SKP_int16 *nSamplesOut /* I/O: Number of samples (vector/decoded) */ ) { SKP_int ret = 0, used_bytes, prev_fs_kHz; SKP_Silk_decoder_state *psDec; SKP_int16 samplesOutInternal[ MAX_API_FS_KHZ * FRAME_LENGTH_MS ]; SKP_int16 *pSamplesOutInternal; psDec = (SKP_Silk_decoder_state *)decState; /* We need this buffer to have room for an internal frame */ pSamplesOutInternal = samplesOut; if( psDec->fs_kHz * 1000 > decControl->API_sampleRate ) { pSamplesOutInternal = samplesOutInternal; } /**********************************/ /* Test if first frame in payload */ /**********************************/ if( psDec->moreInternalDecoderFrames == 0 ) { /* First Frame in Payload */ psDec->nFramesDecoded = 0; /* Used to count frames in packet */ } if( psDec->moreInternalDecoderFrames == 0 && /* First frame in packet */ lostFlag == 0 && /* Not packet loss */ nBytesIn > MAX_ARITHM_BYTES ) { /* Too long payload */ /* Avoid trying to decode a too large packet */ lostFlag = 1; ret = SKP_SILK_DEC_PAYLOAD_TOO_LARGE; } /* Save previous sample frequency */ prev_fs_kHz = psDec->fs_kHz; /* Call decoder for one frame */ ret += SKP_Silk_decode_frame( psDec, pSamplesOutInternal, nSamplesOut, inData, nBytesIn, lostFlag, &used_bytes ); if( used_bytes ) { /* Only Call if not a packet loss */ if( psDec->nBytesLeft > 0 && psDec->FrameTermination == SKP_SILK_MORE_FRAMES && psDec->nFramesDecoded < 5 ) { /* We have more frames in the Payload */ psDec->moreInternalDecoderFrames = 1; } else { /* Last frame in Payload */ psDec->moreInternalDecoderFrames = 0; psDec->nFramesInPacket = psDec->nFramesDecoded; /* Track inband FEC usage */ if( psDec->vadFlag == VOICE_ACTIVITY ) { if( psDec->FrameTermination == SKP_SILK_LAST_FRAME ) { psDec->no_FEC_counter++; if( psDec->no_FEC_counter > NO_LBRR_THRES ) { psDec->inband_FEC_offset = 0; } } else if( psDec->FrameTermination == SKP_SILK_LBRR_VER1 ) { psDec->inband_FEC_offset = 1; /* FEC info with 1 packet delay */ psDec->no_FEC_counter = 0; } else if( psDec->FrameTermination == SKP_SILK_LBRR_VER2 ) { psDec->inband_FEC_offset = 2; /* FEC info with 2 packets delay */ psDec->no_FEC_counter = 0; } } } } if( MAX_API_FS_KHZ * 1000 < decControl->API_sampleRate || 8000 > decControl->API_sampleRate ) { ret = SKP_SILK_DEC_INVALID_SAMPLING_FREQUENCY; return( ret ); } /* Resample if needed */ if( psDec->fs_kHz * 1000 != decControl->API_sampleRate ) { SKP_int16 samplesOut_tmp[ MAX_API_FS_KHZ * FRAME_LENGTH_MS ]; SKP_assert( psDec->fs_kHz <= MAX_API_FS_KHZ ); /* Copy to a tmp buffer as the resampling writes to samplesOut */ SKP_memcpy( samplesOut_tmp, pSamplesOutInternal, *nSamplesOut * sizeof( SKP_int16 ) ); /* (Re-)initialize resampler state when switching internal sampling frequency */ if( prev_fs_kHz != psDec->fs_kHz || psDec->prev_API_sampleRate != decControl->API_sampleRate ) { ret = SKP_Silk_resampler_init( &psDec->resampler_state, SKP_SMULBB( psDec->fs_kHz, 1000 ), decControl->API_sampleRate ); } /* Resample the output to API_sampleRate */ ret += SKP_Silk_resampler( &psDec->resampler_state, samplesOut, samplesOut_tmp, *nSamplesOut ); /* Update the number of output samples */ *nSamplesOut = SKP_DIV32( ( SKP_int32 )*nSamplesOut * decControl->API_sampleRate, psDec->fs_kHz * 1000 ); } else if( prev_fs_kHz * 1000 > decControl->API_sampleRate ) { SKP_memcpy( samplesOut, pSamplesOutInternal, *nSamplesOut * sizeof( SKP_int16 ) ); } psDec->prev_API_sampleRate = decControl->API_sampleRate; /* Copy all parameters that are needed out of internal structure to the control stucture */ decControl->frameSize = (SKP_uint16)( decControl->API_sampleRate / 50 ) ; decControl->framesPerPacket = ( SKP_int )psDec->nFramesInPacket; decControl->inBandFECOffset = ( SKP_int )psDec->inband_FEC_offset; decControl->moreInternalDecoderFrames = ( SKP_int )psDec->moreInternalDecoderFrames; return ret; }
void SKP_Silk_noise_shape_analysis_FIX( SKP_Silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ SKP_Silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const SKP_int16 *pitch_res, /* I LPC residual from pitch analysis */ const SKP_int16 *x /* I Input signal [ 2 * frame_length + la_shape ]*/ ) { SKP_Silk_shape_state_FIX *psShapeSt = &psEnc->sShape; SKP_int k, nSamples, lz, Qnrg, b_Q14, scale = 0, sz; SKP_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; SKP_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; SKP_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; SKP_int32 auto_corr[ SHAPE_LPC_ORDER_MAX + 1 ]; SKP_int32 refl_coef_Q16[ SHAPE_LPC_ORDER_MAX ]; SKP_int32 AR_Q24[ SHAPE_LPC_ORDER_MAX ]; SKP_int16 x_windowed[ SHAPE_LPC_WIN_MAX ]; const SKP_int16 *x_ptr, *pitch_res_ptr; SKP_int32 sqrt_nrg[ NB_SUBFR ], Qnrg_vec[ NB_SUBFR ]; /* Point to start of first LPC analysis block */ x_ptr = x + psEnc->sCmn.la_shape - SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz ) + psEnc->sCmn.frame_length / NB_SUBFR; /****************/ /* CONTROL SNR */ /****************/ /* Reduce SNR_dB values if recent bitstream has exceeded TargetRate */ psEncCtrl->current_SNR_dB_Q7 = psEnc->SNR_dB_Q7 - SKP_SMULWB( SKP_LSHIFT( ( SKP_int32 )psEnc->BufferedInChannel_ms, 7 ), 3277 ); /* Reduce SNR_dB if inband FEC used */ if( psEnc->speech_activity_Q8 > LBRR_SPEECH_ACTIVITY_THRES_Q8 ) { psEncCtrl->current_SNR_dB_Q7 -= SKP_RSHIFT( psEnc->inBandFEC_SNR_comp_Q8, 1 ); } /****************/ /* GAIN CONTROL */ /****************/ /* Input quality is the average of the quality in the lowest two VAD bands */ psEncCtrl->input_quality_Q14 = ( SKP_int )SKP_RSHIFT( ( SKP_int32 )psEncCtrl->input_quality_bands_Q15[ 0 ] + psEncCtrl->input_quality_bands_Q15[ 1 ], 2 ); /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ psEncCtrl->coding_quality_Q14 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_RSHIFT_ROUND( psEncCtrl->current_SNR_dB_Q7 - ( 18 << 7 ), 4 ) ), 1 ); /* Reduce coding SNR during low speech activity */ b_Q8 = ( 1 << 8 ) - psEnc->speech_activity_Q8; b_Q8 = SKP_SMULWB( SKP_LSHIFT( b_Q8, 8 ), b_Q8 ); SNR_adj_dB_Q7 = SKP_SMLAWB( psEncCtrl->current_SNR_dB_Q7, SKP_SMULBB( -BG_SNR_DECR_dB_Q7 >> ( 4 + 1 ), b_Q8 ), // Q11 SKP_SMULWB( ( 1 << 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); // Q12 if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Reduce gains for periodic signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, HARM_SNR_INCR_dB_Q7 << 1, psEnc->LTPCorr_Q15 ); } else { /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_SMLAWB( 6 << ( 7 + 2 ), -104856, psEncCtrl->current_SNR_dB_Q7 ), //-104856_Q18 = -0.4_Q0, Q9 ( 1 << 14 ) - psEncCtrl->input_quality_Q14 ); // Q14 } /*************************/ /* SPARSENESS PROCESSING */ /*************************/ /* Set quantizer offset */ if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) { /* Initally set to 0; may be overruled in process_gains(..) */ psEncCtrl->sCmn.QuantOffsetType = 0; psEncCtrl->sparseness_Q8 = 0; } else { /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ nSamples = SKP_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); energy_variation_Q7 = 0; log_energy_prev_Q7 = 0; pitch_res_ptr = pitch_res; for( k = 0; k < FRAME_LENGTH_MS / 2; k++ ) { SKP_Silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); nrg += SKP_RSHIFT( nSamples, scale ); // Q(-scale) log_energy_Q7 = SKP_Silk_lin2log( nrg ); if( k > 0 ) { energy_variation_Q7 += SKP_abs( log_energy_Q7 - log_energy_prev_Q7 ); } log_energy_prev_Q7 = log_energy_Q7; pitch_res_ptr += nSamples; } psEncCtrl->sparseness_Q8 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_SMULWB( energy_variation_Q7 - ( 5 << 7 ), 6554 ) ), 7 ); // 6554_Q16 = 0.1_Q0 /* Set quantization offset depending on sparseness measure */ if( psEncCtrl->sparseness_Q8 > SPARSENESS_THRESHOLD_QNT_OFFSET_Q8 ) { psEncCtrl->sCmn.QuantOffsetType = 0; } else { psEncCtrl->sCmn.QuantOffsetType = 1; } /* Increase coding SNR for sparse signals */ SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SPARSE_SNR_INCR_dB_Q7 << 8, psEncCtrl->sparseness_Q8 - ( 1 << 7 ) ); } /*******************************/ /* Control bandwidth expansion */ /*******************************/ delta_Q16 = SKP_SMULWB( ( 1 << 16 ) - SKP_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), LOW_RATE_BANDWIDTH_EXPANSION_DELTA_Q16 ); BWExp1_Q16 = BANDWIDTH_EXPANSION_Q16 - delta_Q16; BWExp2_Q16 = BANDWIDTH_EXPANSION_Q16 + delta_Q16; if( psEnc->sCmn.fs_kHz == 24 ) { /* Less bandwidth expansion for super wideband */ BWExp1_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp1_Q16 ); BWExp2_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp2_Q16 ); } /* BWExp1 will be applied after BWExp2, so make it relative */ BWExp1_Q16 = SKP_DIV32_16( SKP_LSHIFT( BWExp1_Q16, 14 ), SKP_RSHIFT( BWExp2_Q16, 2 ) ); /********************************************/ /* Compute noise shaping AR coefs and gains */ /********************************************/ sz = ( SKP_int )SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz ); for( k = 0; k < NB_SUBFR; k++ ) { /* Apply window */ SKP_Silk_apply_sine_window( x_windowed, x_ptr, 0, SHAPE_LPC_WIN_MS * psEnc->sCmn.fs_kHz ); /* Update pointer: next LPC analysis block */ x_ptr += psEnc->sCmn.frame_length / NB_SUBFR; /* Calculate auto correlation */ SKP_Silk_autocorr( auto_corr, &scale, x_windowed, sz, psEnc->sCmn.shapingLPCOrder + 1 ); /* Add white noise, as a fraction of energy */ auto_corr[0] = SKP_ADD32( auto_corr[0], SKP_max_32( SKP_SMULWB( SKP_RSHIFT( auto_corr[ 0 ], 4 ), SHAPE_WHITE_NOISE_FRACTION_Q20 ), 1 ) ); /* Calculate the reflection coefficients using schur */ nrg = SKP_Silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); /* Convert reflection coefficients to prediction coefficients */ SKP_Silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); /* Bandwidth expansion for synthesis filter shaping */ SKP_Silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); /* Make sure to fit in Q13 SKP_int16 */ SKP_Silk_LPC_fit( &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], AR_Q24, 13, psEnc->sCmn.shapingLPCOrder ); /* Compute noise shaping filter coefficients */ SKP_memcpy( &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder * sizeof( SKP_int16 ) ); /* Bandwidth expansion for analysis filter shaping */ SKP_assert( BWExp1_Q16 <= ( 1 << 16 ) ); // If ever breaking, use LPC_stabilize() in these cases to stay within range SKP_Silk_bwexpander( &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); /* Increase residual energy */ nrg = SKP_SMLAWB( nrg, SKP_RSHIFT( auto_corr[ 0 ], 8 ), SHAPE_MIN_ENERGY_RATIO_Q24 ); Qnrg = -scale; // range: -12...30 SKP_assert( Qnrg >= -12 ); SKP_assert( Qnrg <= 30 ); /* Make sure that Qnrg is an even number */ if( Qnrg & 1 ) { Qnrg -= 1; nrg >>= 1; } tmp32 = SKP_Silk_SQRT_APPROX( nrg ); Qnrg >>= 1; // range: -6...15 sqrt_nrg[ k ] = tmp32; Qnrg_vec[ k ] = Qnrg; psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( tmp32, 16 - Qnrg ); /* Ratio of prediction gains, in energy domain */ SKP_Silk_LPC_inverse_pred_gain_Q13( &pre_nrg_Q30, &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder ); SKP_Silk_LPC_inverse_pred_gain_Q13( &nrg, &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder ); lz = SKP_min_32( SKP_Silk_CLZ32( pre_nrg_Q30 ) - 1, 19 ); pre_nrg_Q30 = SKP_DIV32( SKP_LSHIFT( pre_nrg_Q30, lz ), SKP_RSHIFT( nrg, 20 - lz ) + 1 ); // Q20 pre_nrg_Q30 = SKP_RSHIFT( SKP_LSHIFT_SAT32( pre_nrg_Q30, 9 ), 1 ); /* Q28 */ psEncCtrl->GainsPre_Q14[ k ] = ( SKP_int )SKP_Silk_SQRT_APPROX( pre_nrg_Q30 ); }
SKP_int SKP_Silk_SDK_Encode( void *encState, /* I/O: State */ SKP_Silk_EncodeControlStruct *encControl, /* I: Control structure */ const SKP_int16 *samplesIn, /* I: Speech sample input vector */ SKP_int nSamplesIn, /* I: Number of samples in input vector */ ec_enc *psRangeEnc, /* I/O Compressor data structure */ SKP_int32 *nBytesOut, /* I/O: Number of bytes in payload (input: Max bytes) */ const SKP_int prefillFlag /* I: Flag to indicate prefilling buffers no coding */ ) { SKP_int tmp_payloadSize_ms, tmp_complexity, ret = 0; SKP_int nSamplesToBuffer, nBlocksOf10ms, nSamplesFromInput = 0; SKP_Silk_encoder_state_Fxx *psEnc = ( SKP_Silk_encoder_state_Fxx* )encState; ret = process_enc_control_struct( psEnc, encControl ); nBlocksOf10ms = SKP_DIV32( 100 * nSamplesIn, psEnc->sCmn.API_fs_Hz ); if( prefillFlag ) { /* Only accept input length of 10 ms */ if( nBlocksOf10ms != 1 ) { ret = SKP_SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; SKP_assert( 0 ); return( ret ); } /* Reset Encoder */ if( ret = SKP_Silk_init_encoder_Fxx( psEnc ) ) { SKP_assert( 0 ); } tmp_payloadSize_ms = encControl->payloadSize_ms; encControl->payloadSize_ms = 10; tmp_complexity = encControl->complexity; encControl->complexity = 0; ret = process_enc_control_struct( psEnc, encControl ); psEnc->sCmn.prefillFlag = 1; } else { /* Only accept input lengths that are a multiple of 10 ms */ if( nBlocksOf10ms * psEnc->sCmn.API_fs_Hz != 100 * nSamplesIn || nSamplesIn < 0 ) { ret = SKP_SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; SKP_assert( 0 ); return( ret ); } /* Make sure no more than one packet can be produced */ if( 1000 * (SKP_int32)nSamplesIn > psEnc->sCmn.PacketSize_ms * psEnc->sCmn.API_fs_Hz ) { ret = SKP_SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; SKP_assert( 0 ); return( ret ); } } /* Input buffering/resampling and encoding */ while( 1 ) { nSamplesToBuffer = psEnc->sCmn.frame_length - psEnc->sCmn.inputBufIx; if( psEnc->sCmn.API_fs_Hz == SKP_SMULBB( 1000, psEnc->sCmn.fs_kHz ) ) { nSamplesToBuffer = SKP_min_int( nSamplesToBuffer, nSamplesIn ); nSamplesFromInput = nSamplesToBuffer; /* Copy to buffer */ SKP_memcpy( &psEnc->sCmn.inputBuf[ psEnc->sCmn.inputBufIx ], samplesIn, nSamplesFromInput * sizeof( SKP_int16 ) ); } else { nSamplesToBuffer = SKP_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->sCmn.fs_kHz ); nSamplesFromInput = SKP_DIV32_16( nSamplesToBuffer * psEnc->sCmn.API_fs_Hz, psEnc->sCmn.fs_kHz * 1000 ); /* Resample and write to buffer */ ret += SKP_Silk_resampler( &psEnc->sCmn.resampler_state, &psEnc->sCmn.inputBuf[ psEnc->sCmn.inputBufIx ], samplesIn, nSamplesFromInput ); } samplesIn += nSamplesFromInput; nSamplesIn -= nSamplesFromInput; psEnc->sCmn.inputBufIx += nSamplesToBuffer; /* Silk encoder */ if( psEnc->sCmn.inputBufIx >= psEnc->sCmn.frame_length ) { SKP_assert( psEnc->sCmn.inputBufIx == psEnc->sCmn.frame_length ); /* Enough data in input buffer, so encode */ if( ( ret = SKP_Silk_encode_frame_Fxx( psEnc, nBytesOut, psRangeEnc ) ) != 0 ) { SKP_assert( 0 ); } psEnc->sCmn.inputBufIx = 0; psEnc->sCmn.controlled_since_last_payload = 0; if( nSamplesIn == 0 ) { break; } } else { break; } } if( prefillFlag ) { encControl->payloadSize_ms = tmp_payloadSize_ms; encControl->complexity = tmp_complexity; ret = process_enc_control_struct( psEnc, encControl ); psEnc->sCmn.prefillFlag = 0; } return ret; }
void SKP_Silk_find_pred_coefs_FIX(SKP_Silk_encoder_state_FIX * psEnc, /* I/O encoder state */ SKP_Silk_encoder_control_FIX * psEncCtrl, /* I/O encoder control */ const int16_t res_pitch[] /* I Residual from pitch analysis */ ) { int i; int32_t WLTP[NB_SUBFR * LTP_ORDER * LTP_ORDER]; int32_t invGains_Q16[NB_SUBFR], local_gains_Qx[NB_SUBFR], Wght_Q15[NB_SUBFR]; int NLSF_Q15[MAX_LPC_ORDER]; const int16_t *x_ptr; int16_t *x_pre_ptr, LPC_in_pre[NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH]; int32_t tmp, min_gain_Q16; #if !VARQ int LZ; #endif int LTP_corrs_rshift[NB_SUBFR]; /* weighting for weighted least squares */ min_gain_Q16 = int32_t_MAX >> 6; for (i = 0; i < NB_SUBFR; i++) { min_gain_Q16 = SKP_min(min_gain_Q16, psEncCtrl->Gains_Q16[i]); } #if !VARQ LZ = SKP_Silk_CLZ32(min_gain_Q16) - 1; LZ = SKP_LIMIT(LZ, 0, 16); min_gain_Q16 = SKP_RSHIFT(min_gain_Q16, 2); /* Ensure that maximum invGains_Q16 is within range of a 16 bit int */ #endif for (i = 0; i < NB_SUBFR; i++) { /* Divide to Q16 */ assert(psEncCtrl->Gains_Q16[i] > 0); #if VARQ /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ invGains_Q16[i] = SKP_DIV32_varQ(min_gain_Q16, psEncCtrl->Gains_Q16[i], 16 - 2); #else invGains_Q16[i] = SKP_DIV32(SKP_LSHIFT(min_gain_Q16, LZ), SKP_RSHIFT(psEncCtrl->Gains_Q16[i], 16 - LZ)); #endif /* Ensure Wght_Q15 a minimum value 1 */ invGains_Q16[i] = SKP_max(invGains_Q16[i], 363); /* Square the inverted gains */ assert(invGains_Q16[i] == SKP_SAT16(invGains_Q16[i])); tmp = SKP_SMULWB(invGains_Q16[i], invGains_Q16[i]); Wght_Q15[i] = SKP_RSHIFT(tmp, 1); /* Invert the inverted and normalized gains */ local_gains_Qx[i] = SKP_DIV32((1 << (16 + Qx)), invGains_Q16[i]); } if (psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED) { /**********/ /* VOICED */ /**********/ assert(psEnc->sCmn.frame_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->sCmn.pitchL[0] + LTP_ORDER / 2); /* LTP analysis */ SKP_Silk_find_LTP_FIX(psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, res_pitch, res_pitch + SKP_RSHIFT(psEnc->sCmn.frame_length, 1), psEncCtrl->sCmn.pitchL, Wght_Q15, psEnc->sCmn.subfr_length, psEnc->sCmn.frame_length, LTP_corrs_rshift); /* Quantize LTP gain parameters */ SKP_Silk_quant_LTP_gains_FIX(psEncCtrl->LTPCoef_Q14, psEncCtrl->sCmn.LTPIndex, &psEncCtrl->sCmn.PERIndex, WLTP, psEnc->mu_LTP_Q8, psEnc->sCmn.LTPQuantLowComplexity); /* Control LTP scaling */ SKP_Silk_LTP_scale_ctrl_FIX(psEnc, psEncCtrl); /* Create LTP residual */ SKP_Silk_LTP_analysis_filter_FIX(LPC_in_pre, psEnc->x_buf + psEnc->sCmn.frame_length - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, psEncCtrl->sCmn.pitchL, invGains_Q16, 16, psEnc->sCmn.subfr_length, psEnc->sCmn.predictLPCOrder); } else { /************/ /* UNVOICED */ /************/ /* Create signal with prepended subframes, scaled by inverse gains */ x_ptr = psEnc->x_buf + psEnc->sCmn.frame_length - psEnc->sCmn.predictLPCOrder; x_pre_ptr = LPC_in_pre; for (i = 0; i < NB_SUBFR; i++) { SKP_Silk_scale_copy_vector16(x_pre_ptr, x_ptr, invGains_Q16[i], psEnc->sCmn.subfr_length + psEnc->sCmn. predictLPCOrder); x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; x_ptr += psEnc->sCmn.subfr_length; } SKP_memset(psEncCtrl->LTPCoef_Q14, 0, NB_SUBFR * LTP_ORDER * sizeof(int16_t)); psEncCtrl->LTPredCodGain_Q7 = 0; } /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ TIC(FIND_LPC) SKP_Silk_find_LPC_FIX(NLSF_Q15, &psEncCtrl->sCmn.NLSFInterpCoef_Q2, psEnc->sPred.prev_NLSFq_Q15, psEnc->sCmn.useInterpolatedNLSFs * (1 - psEnc-> sCmn. first_frame_after_reset), psEnc->sCmn.predictLPCOrder, LPC_in_pre, psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder); TOC(FIND_LPC) /* Quantize LSFs */ TIC(PROCESS_LSFS) SKP_Silk_process_NLSFs_FIX(psEnc, psEncCtrl, NLSF_Q15); TOC(PROCESS_LSFS) /* Calculate residual energy using quantized LPC coefficients */ SKP_Silk_residual_energy_FIX(psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, (const int16_t(*)[])psEncCtrl->PredCoef_Q12, local_gains_Qx, Qx, psEnc->sCmn.subfr_length, psEnc->sCmn.predictLPCOrder); /* Copy to prediction struct for use in next frame for fluctuation reduction */ SKP_memcpy(psEnc->sPred.prev_NLSFq_Q15, NLSF_Q15, psEnc->sCmn.predictLPCOrder * sizeof(int)); }