/* Decode mid/side predictors */ void silk_stereo_decode_pred( ec_dec *psRangeDec, /* I/O Compressor data structure */ opus_int32 pred_Q13[] /* O Predictors */ ) { opus_int n, ix[ 2 ][ 3 ]; opus_int32 low_Q13, step_Q13; /* Entropy decoding */ n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 ); ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 ); ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ]; for( n = 0; n < 2; n++ ) { ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 ); ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 ); } /* Dequantize */ for( n = 0; n < 2; n++ ) { ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ]; low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ]; step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13, SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 ); } /* Subtract second from first predictor (helps when actually applying these) */ pred_Q13[ 0 ] -= pred_Q13[ 1 ]; }
void silk_NLSF_decode( opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ ) { opus_int i; opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; opus_int16 ec_ix[ MAX_LPC_ORDER ]; opus_int16 res_Q10[ MAX_LPC_ORDER ]; opus_int32 NLSF_Q15_tmp; const opus_uint8 *pCB_element; const opus_int16 *pCB_Wght_Q9; /* Unpack entropy table indices and predictor for current CB1 index */ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); /* Predictive residual dequantizer */ silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); /* Apply inverse square-rooted weights to first stage and add to output */ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; pCB_Wght_Q9 = &psNLSF_CB->CB1_Wght_Q9[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; for( i = 0; i < psNLSF_CB->order; i++ ) { NLSF_Q15_tmp = silk_ADD_LSHIFT32( silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), pCB_Wght_Q9[ i ] ), (opus_int16)pCB_element[ i ], 7 ); pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); } /* NLSF stabilization */ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); }
opus_int silk_VAD_Init( /* O Return value, 0 if success */ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ) { opus_int b, ret = 0; /* reset state memory */ silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) ); /* init noise levels */ /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 ); } /* Initialize state */ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] ); psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] ); } psSilk_VAD->counter = 15; /* init smoothed energy-to-noise ratio*/ for( b = 0; b < VAD_N_BANDS; b++ ) { psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */ } return( ret ); }
/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ void silk_HP_variable_cutoff(silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ ) { int quality_Q15; int32_t pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7; silk_encoder_state *psEncC1 = &state_Fxx[0].sCmn; /* Adaptive cutoff frequency: estimate low end of pitch frequency range */ if (psEncC1->prevSignalType == TYPE_VOICED) { /* difference, in log domain */ pitch_freq_Hz_Q16 = silk_DIV32_16(silk_LSHIFT (silk_MUL(psEncC1->fs_kHz, 1000), 16), psEncC1->prevLag); pitch_freq_log_Q7 = silk_lin2log(pitch_freq_Hz_Q16) - (16 << 7); /* adjustment based on quality */ quality_Q15 = psEncC1->input_quality_bands_Q15[0]; pitch_freq_log_Q7 = silk_SMLAWB(pitch_freq_log_Q7, silk_SMULWB(silk_LSHIFT(-quality_Q15, 2), quality_Q15), pitch_freq_log_Q7 - (silk_lin2log (SILK_FIX_CONST(VARIABLE_HP_MIN_CUTOFF_HZ, 16)) - (16 << 7))); /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */ delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT(psEncC1->variable_HP_smth1_Q15, 8); if (delta_freq_Q7 < 0) { /* less smoothing for decreasing pitch frequency, to track something close to the minimum */ delta_freq_Q7 = silk_MUL(delta_freq_Q7, 3); } /* limit delta, to reduce impact of outliers in pitch estimation */ delta_freq_Q7 = silk_LIMIT_32(delta_freq_Q7, -SILK_FIX_CONST(VARIABLE_HP_MAX_DELTA_FREQ, 7), SILK_FIX_CONST(VARIABLE_HP_MAX_DELTA_FREQ, 7)); /* update smoother */ psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB(psEncC1->variable_HP_smth1_Q15, silk_SMULBB(psEncC1->speech_activity_Q8, delta_freq_Q7), SILK_FIX_CONST(VARIABLE_HP_SMTH_COEF1, 16)); /* limit frequency range */ psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32(psEncC1->variable_HP_smth1_Q15, silk_LSHIFT(silk_lin2log (VARIABLE_HP_MIN_CUTOFF_HZ), 8), silk_LSHIFT(silk_lin2log (VARIABLE_HP_MAX_CUTOFF_HZ), 8)); } }
/* Glues concealed frames with new good received frames */ void silk_PLC_glue_frames( silk_decoder_state *psDec, /* I/O decoder state */ opus_int16 frame[], /* I/O signal */ opus_int length /* I length of signal */ ) { opus_int i, energy_shift; opus_int32 energy; silk_PLC_struct *psPLC; psPLC = &psDec->sPLC; if( psDec->lossCnt ) { /* Calculate energy in concealed residual */ silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length ); psPLC->last_frame_lost = 1; } else { if( psDec->sPLC.last_frame_lost ) { /* Calculate residual in decoded signal if last frame was lost */ silk_sum_sqr_shift( &energy, &energy_shift, frame, length ); /* Normalize energies */ if( energy_shift > psPLC->conc_energy_shift ) { psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift ); } else if( energy_shift < psPLC->conc_energy_shift ) { energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift ); } /* Fade in the energy difference */ if( energy > psPLC->conc_energy ) { opus_int32 frac_Q24, LZ; opus_int32 gain_Q16, slope_Q16; LZ = silk_CLZ32( psPLC->conc_energy ); LZ = LZ - 1; psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ ); energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) ); frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) ); gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 ); slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length ); /* Make slope 4x steeper to avoid missing onsets after DTX */ slope_Q16 = silk_LSHIFT( slope_Q16, 2 ); for( i = 0; i < length; i++ ) { frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] ); gain_Q16 += slope_Q16; if( gain_Q16 > (opus_int32)1 << 16 ) { break; } } } } psPLC->last_frame_lost = 0; } }
/* uses SMLAWB(), requiring armv5E and higher. */ opus_int32 silk_schur( /* O Returns residual energy */ opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ const opus_int32 *c, /* I correlations [order+1] */ const opus_int32 order /* I prediction order */ ) { opus_int k, n, lz; opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); /* Get number of leading zeros */ lz = silk_CLZ32( c[ 0 ] ); /* Copy correlations and adjust level to Q30 */ if( lz < 2 ) { /* lz must be 1, so shift one to the right */ for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); } } else if( lz > 2 ) { /* Shift to the left */ lz -= 2; for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); } } else { /* No need to shift */ for( k = 0; k < order + 1; k++ ) { C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; } } for( k = 0; k < order; k++ ) { /* Get reflection coefficient */ rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) ); /* Clip (shouldn't happen for properly conditioned inputs) */ rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 ); /* Store */ rc_Q15[ k ] = (opus_int16)rc_tmp_Q15; /* Update correlations */ for( n = 0; n < order - k; n++ ) { Ctmp1 = C[ n + k + 1 ][ 0 ]; Ctmp2 = C[ n ][ 1 ]; C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 ); C[ n ][ 1 ] = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 ); } } /* return residual energy */ return C[ 0 ][ 1 ]; }
opus_int silk_setup_resamplers( silk_encoder_state_Fxx *psEnc, /* I/O */ opus_int fs_kHz /* I */ ) { opus_int ret = SILK_NO_ERROR; opus_int32 nSamples_temp; if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) { if( psEnc->sCmn.fs_kHz == 0 ) { /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); } else { /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */ opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ]; silk_resampler_state_struct temp_resampler_state; #ifdef FIXED_POINT opus_int16 *x_bufFIX = psEnc->x_buf; #else opus_int16 x_bufFIX[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ]; #endif nSamples_temp = silk_LSHIFT( psEnc->sCmn.frame_length, 1 ) + LA_SHAPE_MS * psEnc->sCmn.fs_kHz; #ifndef FIXED_POINT silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp ); #endif /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); /* Temporary resampling of x_buf data to API_fs_Hz */ ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp ); /* Calculate number of samples that has been temporarily upsampled */ nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) ); /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp ); #ifndef FIXED_POINT silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz ); #endif } } psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; return ret; }
/* Laroia low complexity NLSF weights */ void silk_NLSF_VQ_weights_laroia( opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ const opus_int D /* I Input vector dimension (even) */ ) { opus_int k; opus_int32 tmp1_int, tmp2_int; silk_assert( D > 0 ); silk_assert( ( D & 1 ) == 0 ); /* First value */ tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 ); tmp1_int = silk_DIV32_16( 1 << ( 15 + NLSF_W_Q ), tmp1_int ); tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 ); tmp2_int = silk_DIV32_16( 1 << ( 15 + NLSF_W_Q ), tmp2_int ); pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 ); /* Main loop */ for( k = 1; k < D - 1; k += 2 ) { tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 ); tmp1_int = silk_DIV32_16( 1 << ( 15 + NLSF_W_Q ), tmp1_int ); pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); silk_assert( pNLSFW_Q_OUT[ k ] > 0 ); tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 ); tmp2_int = silk_DIV32_16( 1 << ( 15 + NLSF_W_Q ), tmp2_int ); pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 ); } /* Last value */ tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 ); tmp1_int = silk_DIV32_16( 1 << ( 15 + NLSF_W_Q ), tmp1_int ); pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 ); }
/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ void silk_stereo_MS_to_LR( stereo_dec_state *state, /* I/O State */ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ opus_int16 x2[], /* I/O Right input signal, becomes side signal */ const opus_int32 pred_Q13[], /* I Predictors */ opus_int fs_kHz, /* I Samples rate (kHz) */ opus_int frame_length /* I Number of samples */ ) { opus_int n, denom_Q16, delta0_Q13, delta1_Q13; opus_int32 sum, diff, pred0_Q13, pred1_Q13; /* Buffering */ silk_memcpy( x1, state->sMid, 2 * sizeof( opus_int16 ) ); silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) ); silk_memcpy( state->sMid, &x1[ frame_length ], 2 * sizeof( opus_int16 ) ); silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) ); /* Interpolate predictors and add prediction to side channel */ pred0_Q13 = state->pred_prev_Q13[ 0 ]; pred1_Q13 = state->pred_prev_Q13[ 1 ]; denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); } pred0_Q13 = pred_Q13[ 0 ]; pred1_Q13 = pred_Q13[ 1 ]; for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); } state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ]; state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ]; /* Convert to left/right signals */ for( n = 0; n < frame_length; n++ ) { sum = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ]; diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ]; x1[ n + 1 ] = (opus_int16)silk_SAT16( sum ); x2[ n + 1 ] = (opus_int16)silk_SAT16( diff ); } }
/* Processing of gains */ void silk_process_gains_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ opus_int condCoding /* I The type of conditional coding to use */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; opus_int k; opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl->ResNrg[ k ]; ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); if( psEncCtrl->ResNrgQ[ k ] > 0 ) { ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); } else { if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { ResNrgPart = silk_int32_MAX; } else { ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); } } gain = psEncCtrl->Gains_Q16[ k ]; gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); if( gain_squared < silk_int16_MAX ) { /* recalculate with higher precision */ gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); silk_assert( gain_squared > 0 ); gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ gain = silk_min( gain, silk_int32_MAX >> 8 ); psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ } else {
void silk_NLSF_decode( opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ ) { opus_int i; opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; opus_int16 ec_ix[ MAX_LPC_ORDER ]; opus_int16 res_Q10[ MAX_LPC_ORDER ]; opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; opus_int32 W_tmp_Q9, NLSF_Q15_tmp; const opus_uint8 *pCB_element; /* Decode first stage */ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; for( i = 0; i < psNLSF_CB->order; i++ ) { pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); } /* Unpack entropy table indices and predictor for current CB1 index */ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); /* Predictive residual dequantizer */ silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); /* Weights from codebook vector */ silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); /* Apply inverse square-rooted weights and add to output */ for( i = 0; i < psNLSF_CB->order; i++ ) { W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); } /* NLSF stabilization */ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); }
/* Quantize mid/side predictors */ void silk_stereo_quant_pred( opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ ) { opus_int i, j, n; opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0; /* Quantize */ for( n = 0; n < 2; n++ ) { /* Brute-force search over quantization levels */ err_min_Q13 = silk_int32_MAX; for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) { low_Q13 = silk_stereo_pred_quant_Q13[ i ]; step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13, SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) { lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 ); err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 ); if( err_Q13 < err_min_Q13 ) { err_min_Q13 = err_Q13; quant_pred_Q13 = lvl_Q13; ix[ n ][ 0 ] = i; ix[ n ][ 1 ] = j; } else { /* Error increasing, so we're past the optimum */ goto done; } } } done: ix[ n ][ 2 ] = silk_DIV32_16( ix[ n ][ 0 ], 3 ); ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3; pred_Q13[ n ] = quant_pred_Q13; } /* Subtract second from first predictor (helps when actually applying these) */ pred_Q13[ 0 ] -= pred_Q13[ 1 ]; }
/* Deactivate by setting psEncC->mode = 0; */ void silk_LP_variable_cutoff( silk_LP_state *psLP, /* I/O LP filter state */ opus_int16 *frame, /* I/O Low-pass filtered output signal */ const opus_int frame_length /* I Frame length */ ) { opus_int32 B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0; opus_int ind = 0; silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES ); /* Run filter if needed */ if( psLP->mode != 0 ) { /* Calculate index and interpolation factor for interpolation */ #if( TRANSITION_INT_STEPS == 64 ) fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 ); #else fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES ); #endif ind = silk_RSHIFT( fac_Q16, 16 ); fac_Q16 -= silk_LSHIFT( ind, 16 ); silk_assert( ind >= 0 ); silk_assert( ind < TRANSITION_INT_NUM ); /* Interpolate filter coefficients */ silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 ); /* Update transition frame number for next frame */ psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES ); /* ARMA low-pass filtering */ silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); } }
opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ silk_encoder_state *psEncC, /* I/O Encoder state */ const opus_int16 pIn[] /* I PCM input */ ) { opus_int SA_Q15, pSNR_dB_Q7, input_tilt; opus_int decimated_framelength1, decimated_framelength2; opus_int decimated_framelength; opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; opus_int32 sumSquared, smooth_coef_Q16; opus_int16 HPstateTmp; VARDECL( opus_int16, X ); opus_int32 Xnrg[ VAD_N_BANDS ]; opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; opus_int32 speech_nrg, x_tmp; opus_int X_offset[ VAD_N_BANDS ]; opus_int ret = 0; silk_VAD_state *psSilk_VAD = &psEncC->sVAD; SAVE_STACK; /* Safety checks */ silk_assert( VAD_N_BANDS == 4 ); silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); silk_assert( psEncC->frame_length <= 512 ); silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); /***********************/ /* Filter and Decimate */ /***********************/ decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); /* Decimate into 4 bands: 0 L 3L L 3L 5L - -- - -- -- 8 8 2 4 4 [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | They're arranged to allow the minimal ( frame_length / 4 ) extra scratch space during the downsampling process */ X_offset[ 0 ] = 0; X_offset[ 1 ] = decimated_framelength + decimated_framelength2; X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); /*********************************************/ /* HP filter on lowest band (differentiator) */ /*********************************************/ X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); HPstateTmp = X[ decimated_framelength - 1 ]; for( i = decimated_framelength - 1; i > 0; i-- ) { X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); X[ i ] -= X[ i - 1 ]; } X[ 0 ] -= psSilk_VAD->HPstate; psSilk_VAD->HPstate = HPstateTmp; /*************************************/ /* Calculate the energy in each band */ /*************************************/ for( b = 0; b < VAD_N_BANDS; b++ ) { /* Find the decimated framelength in the non-uniformly divided bands */ decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); /* Split length into subframe lengths */ dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); dec_subframe_offset = 0; /* Compute energy per sub-frame */ /* initialize with summed energy of last subframe */ Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { sumSquared = 0; for( i = 0; i < dec_subframe_length; i++ ) { /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ x_tmp = silk_RSHIFT( X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); /* Safety check */ silk_assert( sumSquared >= 0 ); } /* Add/saturate summed energy of current subframe */ if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); } else { /* Look-ahead subframe */ Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); } dec_subframe_offset += dec_subframe_length; } psSilk_VAD->XnrgSubfr[ b ] = sumSquared; } /********************/ /* Noise estimation */ /********************/ silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); /***********************************************/ /* Signal-plus-noise to noise ratio estimation */ /***********************************************/ sumSquared = 0; input_tilt = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; if( speech_nrg > 0 ) { /* Divide, with sufficient resolution */ if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); } else { NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); } /* Convert to log domain */ SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; /* Sum-of-squares */ sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ /* Tilt measure */ if( speech_nrg < ( (opus_int32)1 << 20 ) ) { /* Scale down SNR value for small subband speech energies */ SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); } input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); } else { NrgToNoiseRatio_Q8[ b ] = 256; } } /* Mean-of-squares */ sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ /* Root-mean-square approximation, scale to dBs, and write to output pointer */ pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ /*********************************/ /* Speech Probability Estimation */ /*********************************/ SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); /**************************/ /* Frequency Tilt Measure */ /**************************/ psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); /**************************************************/ /* Scale the sigmoid output based on power levels */ /**************************************************/ speech_nrg = 0; for( b = 0; b < VAD_N_BANDS; b++ ) { /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); } /* Power scaling */ if( speech_nrg <= 0 ) { SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); } else if( speech_nrg < 32768 ) { if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); } else { speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); } /* square-root */ speech_nrg = silk_SQRT_APPROX( speech_nrg ); SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); } /* Copy the resulting speech activity in Q8 */ psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); /***********************************/ /* Energy Level and SNR estimation */ /***********************************/ /* Smoothing coefficient */ smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { smooth_coef_Q16 >>= 1; }
/* Initialize/reset the resampler state for a given pair of input/output sampling rates */ opus_int silk_resampler_init( silk_resampler_state_struct *S, /* I/O Resampler state */ opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ opus_int32 Fs_Hz_out /* I Output sampling rate (Hz) */ ) { opus_int32 up2 = 0, down2 = 0; /* Clear state */ silk_memset( S, 0, sizeof( silk_resampler_state_struct ) ); /* Input checking */ if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { silk_assert( 0 ); return -1; } /* Number of samples processed per batch */ S->batchSize = silk_DIV32_16( Fs_Hz_in, 100 ); /* Find resampler with the right sampling ratio */ if( Fs_Hz_out > Fs_Hz_in ) { /* Upsample */ if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */ /* Special case: directly use 2x upsampler */ S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper; } else { /* Default resampler */ S->resampler_function = USE_silk_resampler_private_IIR_FIR; up2 = 1; } } else if ( Fs_Hz_out < Fs_Hz_in ) { /* Downsample */ if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */ S->FIR_Fracs = 3; S->Coefs = silk_Resampler_3_4_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */ S->FIR_Fracs = 2; S->Coefs = silk_Resampler_2_3_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */ S->FIR_Fracs = 1; S->Coefs = silk_Resampler_1_2_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */ S->FIR_Fracs = 1; S->Coefs = silk_Resampler_1_3_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */ S->FIR_Fracs = 1; down2 = 1; S->Coefs = silk_Resampler_1_2_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */ S->FIR_Fracs = 1; down2 = 1; S->Coefs = silk_Resampler_1_3_COEFS; S->resampler_function = USE_silk_resampler_private_down_FIR; } else { /* None available */ silk_assert( 0 ); return -1; } } else { /* Input and output sampling rates are equal: copy */ S->resampler_function = USE_silk_resampler_copy; } S->input2x = up2 | down2; /* Ratio of input/output samples */ S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2 - down2 ), Fs_Hz_out ), 2 ); /* Make sure the ratio is rounded up */ while( silk_SMULWW( S->invRatio_Q16, silk_LSHIFT32( Fs_Hz_out, down2 ) ) < silk_LSHIFT32( Fs_Hz_in, up2 ) ) { S->invRatio_Q16++; } return 0; }
/* Encode side-information parameters to payload */ void silk_encode_indices( silk_encoder_state *psEncC, /* I/O Encoder state */ ec_enc *psRangeEnc, /* I/O Compressor data structure */ opus_int FrameIndex, /* I Frame number */ opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ opus_int condCoding /* I The type of conditional coding to use */ ) { opus_int i, k, typeOffset; opus_int encode_absolute_lagIndex, delta_lagIndex; opus_int16 ec_ix[ MAX_LPC_ORDER ]; opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; const SideInfoIndices *psIndices; if( encode_LBRR ) { psIndices = &psEncC->indices_LBRR[ FrameIndex ]; } else { psIndices = &psEncC->indices; } /*******************************************/ /* Encode signal type and quantizer offset */ /*******************************************/ typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType; silk_assert( typeOffset >= 0 && typeOffset < 6 ); silk_assert( encode_LBRR == 0 || typeOffset >= 2 ); if( encode_LBRR || typeOffset >= 2 ) { ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 ); } else { ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 ); } /****************/ /* Encode gains */ /****************/ /* first subframe */ if( condCoding == CODE_CONDITIONALLY ) { /* conditional coding */ silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 ); } else { /* independent coding, in two stages: MSB bits followed by 3 LSBs */ silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN ); ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 ); ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 ); } /* remaining subframes */ for( i = 1; i < psEncC->nb_subfr; i++ ) { silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 ); } /****************/ /* Encode NLSFs */ /****************/ ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 ); silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] ); silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) { if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) { ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) { ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); } else { ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); } } /* Encode NLSF interpolation factor */ if( psEncC->nb_subfr == MAX_NB_SUBFR ) { silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 ); ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 ); } if( psIndices->signalType == TYPE_VOICED ) { /*********************/ /* Encode pitch lags */ /*********************/ /* lag index */ encode_absolute_lagIndex = 1; if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) { /* Delta Encoding */ delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex; if( delta_lagIndex < -8 || delta_lagIndex > 11 ) { delta_lagIndex = 0; } else { delta_lagIndex = delta_lagIndex + 9; encode_absolute_lagIndex = 0; /* Only use delta */ } silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 ); ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 ); } if( encode_absolute_lagIndex ) { /* Absolute encoding */ opus_int32 pitch_high_bits, pitch_low_bits; pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 ); silk_assert( pitch_high_bits < 32 ); ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 ); ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 ); } psEncC->ec_prevLagIndex = psIndices->lagIndex; /* Countour index */ silk_assert( psIndices->contourIndex >= 0 ); silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 4 ) || ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) || ( psIndices->contourIndex < 12 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 2 ) || ( psIndices->contourIndex < 3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) ); ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 ); /********************/ /* Encode LTP gains */ /********************/ /* PERIndex value */ silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 ); ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 ); /* Codebook Indices */ for( k = 0; k < psEncC->nb_subfr; k++ ) { silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) ); ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 ); } /**********************/ /* Encode LTP scaling */ /**********************/ if( condCoding == CODE_INDEPENDENTLY ) { silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 ); ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 ); } silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 ); } psEncC->ec_prevSignalType = psIndices->signalType; /***************/ /* Encode seed */ /***************/ silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 ); ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 ); }
/* Control internal sampling rate */ int silk_control_audio_bandwidth(silk_encoder_state * psEncC, /* I/O Pointer to Silk encoder state */ silk_EncControlStruct * encControl /* I Control structure */ ) { int fs_kHz; int32_t fs_Hz; fs_kHz = psEncC->fs_kHz; fs_Hz = silk_SMULBB(fs_kHz, 1000); if (fs_Hz == 0) { /* Encoder has just been initialized */ fs_Hz = silk_min(psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz); fs_kHz = silk_DIV32_16(fs_Hz, 1000); } else if (fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz) { /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */ fs_Hz = psEncC->API_fs_Hz; fs_Hz = silk_min(fs_Hz, psEncC->maxInternal_fs_Hz); fs_Hz = silk_max(fs_Hz, psEncC->minInternal_fs_Hz); fs_kHz = silk_DIV32_16(fs_Hz, 1000); } else { /* State machine for the internal sampling rate switching */ if (psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES) { /* Stop transition phase */ psEncC->sLP.mode = 0; } if (psEncC->allow_bandwidth_switch || encControl->opusCanSwitch) { /* Check if we should switch down */ if (silk_SMULBB(psEncC->fs_kHz, 1000) > psEncC->desiredInternal_fs_Hz) { /* Switch down */ if (psEncC->sLP.mode == 0) { /* New transition */ psEncC->sLP.transition_frame_no = TRANSITION_FRAMES; /* Reset transition filter state */ memzero(psEncC->sLP.In_LP_State, sizeof(psEncC->sLP.In_LP_State)); } if (encControl->opusCanSwitch) { /* Stop transition phase */ psEncC->sLP.mode = 0; /* Switch to a lower sample frequency */ fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; } else { if (psEncC->sLP.transition_frame_no <= 0) { encControl->switchReady = 1; /* Make room for redundancy */ encControl->maxBits -= encControl->maxBits * 5 / (encControl-> payloadSize_ms + 5); } else { /* Direction: down (at double speed) */ psEncC->sLP.mode = -2; } } } else /* Check if we should switch up */ if (silk_SMULBB(psEncC->fs_kHz, 1000) < psEncC->desiredInternal_fs_Hz) { /* Switch up */ if (encControl->opusCanSwitch) { /* Switch to a higher sample frequency */ fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; /* New transition */ psEncC->sLP.transition_frame_no = 0; /* Reset transition filter state */ memzero(psEncC->sLP.In_LP_State, sizeof(psEncC->sLP.In_LP_State)); /* Direction: up */ psEncC->sLP.mode = 1; } else { if (psEncC->sLP.mode == 0) { encControl->switchReady = 1; /* Make room for redundancy */ encControl->maxBits -= encControl->maxBits * 5 / (encControl-> payloadSize_ms + 5); } else { /* Direction: up */ psEncC->sLP.mode = 1; } } } else { if (psEncC->sLP.mode < 0) psEncC->sLP.mode = 1; } } } return fs_kHz; }
/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ void silk_A2NLSF( opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ const opus_int d /* I Filter order (must be even) */ ) { opus_int i, k, m, dd, root_ix, ffrac; opus_int32 xlo, xhi, xmid; opus_int32 ylo, yhi, ymid, thr; opus_int32 nom, den; opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ]; opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; opus_int32 *PQ[ 2 ]; opus_int32 *p; /* Store pointers to array */ PQ[ 0 ] = P; PQ[ 1 ] = Q; dd = silk_RSHIFT( d, 1 ); silk_A2NLSF_init( a_Q16, P, Q, dd ); /* Find roots, alternating between P and Q */ p = P; /* Pointer to polynomial */ xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); if( ylo < 0 ) { /* Set the first NLSF to zero and move on to the next */ NLSF[ 0 ] = 0; p = Q; /* Pointer to polynomial */ ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); root_ix = 1; /* Index of current root */ } else { root_ix = 0; /* Index of current root */ } k = 1; /* Loop counter */ i = 0; /* Counter for bandwidth expansions applied */ thr = 0; while( 1 ) { /* Evaluate polynomial */ xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */ yhi = silk_A2NLSF_eval_poly( p, xhi, dd ); /* Detect zero crossing */ if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) { if( yhi == 0 ) { /* If the root lies exactly at the end of the current */ /* interval, look for the next root in the next interval */ thr = 1; } else { thr = 0; } /* Binary division */ ffrac = -256; for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) { /* Evaluate polynomial */ xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 ); ymid = silk_A2NLSF_eval_poly( p, xmid, dd ); /* Detect zero crossing */ if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) { /* Reduce frequency */ xhi = xmid; yhi = ymid; } else { /* Increase frequency */ xlo = xmid; ylo = ymid; ffrac = silk_ADD_RSHIFT( ffrac, 128, m ); } } /* Interpolate */ if( silk_abs( ylo ) < 65536 ) { /* Avoid dividing by zero */ den = ylo - yhi; nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 ); if( den != 0 ) { ffrac += silk_DIV32( nom, den ); } } else { /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */ ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) ); } NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX ); silk_assert( NLSF[ root_ix ] >= 0 ); root_ix++; /* Next root */ if( root_ix >= d ) { /* Found all roots */ break; } /* Alternate pointer to polynomial */ p = PQ[ root_ix & 1 ]; /* Evaluate polynomial */ xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/ ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 ); } else { /* Increment loop counter */ k++; xlo = xhi; ylo = yhi; thr = 0; if( k > LSF_COS_TAB_SZ_FIX ) { i++; if( i > MAX_ITERATIONS_A2NLSF_FIX ) { /* Set NLSFs to white spectrum and exit */ NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 ); for( k = 1; k < d; k++ ) { NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] ); } return; } /* Error: Apply progressively more bandwidth expansion and run again */ silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/ silk_A2NLSF_init( a_Q16, P, Q, dd ); p = P; /* Pointer to polynomial */ xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); if( ylo < 0 ) { /* Set the first NLSF to zero and move on to the next */ NLSF[ 0 ] = 0; p = Q; /* Pointer to polynomial */ ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); root_ix = 1; /* Index of current root */ } else { root_ix = 0; /* Index of current root */ } k = 1; /* Reset loop counter */ } } } }
static opus_int silk_setup_resamplers( silk_encoder_state_Fxx *psEnc, /* I/O */ opus_int fs_kHz /* I */ ) { opus_int ret = SILK_NO_ERROR; SAVE_STACK; if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) { if( psEnc->sCmn.fs_kHz == 0 ) { /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); } else { VARDECL( opus_int16, x_buf_API_fs_Hz ); VARDECL( silk_resampler_state_struct, temp_resampler_state ); #ifdef OPUS_FIXED_POINT opus_int16 *x_bufFIX = psEnc->x_buf; #else VARDECL( opus_int16, x_bufFIX ); opus_int32 new_buf_samples; #endif opus_int32 api_buf_samples; opus_int32 old_buf_samples; opus_int32 buf_length_ms; buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS; old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz; #ifndef OPUS_FIXED_POINT new_buf_samples = buf_length_ms * fs_kHz; ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ), opus_int16 ); silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples ); #endif /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ ALLOC( temp_resampler_state, 1, silk_resampler_state_struct ); ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); /* Calculate number of samples to temporarily upsample */ api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 ); /* Temporary resampling of x_buf data to API_fs_Hz */ ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 ); ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples ); /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples ); #ifndef OPUS_FIXED_POINT silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples); #endif } } psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; RESTORE_STACK; return ret; }
opus_int silk_setup_fs( silk_encoder_state_Fxx *psEnc, /* I/O */ opus_int fs_kHz, /* I */ opus_int PacketSize_ms /* I */ ) { opus_int ret = SILK_NO_ERROR; /* Set packet size */ if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { if( ( PacketSize_ms != 10 ) && ( PacketSize_ms != 20 ) && ( PacketSize_ms != 40 ) && ( PacketSize_ms != 60 ) ) { ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; } if( PacketSize_ms <= 10 ) { psEnc->sCmn.nFramesPerPacket = 1; psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); if( psEnc->sCmn.fs_kHz == 8 ) { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; } else { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; } } else { psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); if( psEnc->sCmn.fs_kHz == 8 ) { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; } else { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; } } psEnc->sCmn.PacketSize_ms = PacketSize_ms; psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ } /* Set internal sampling frequency */ silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); if( psEnc->sCmn.fs_kHz != fs_kHz ) { /* reset part of the state */ silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); psEnc->sCmn.inputBufIx = 0; psEnc->sCmn.nFramesEncoded = 0; psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ /* Initialize non-zero parameters */ psEnc->sCmn.prevLag = 100; psEnc->sCmn.first_frame_after_reset = 1; psEnc->sPrefilt.lagPrev = 100; psEnc->sShape.LastGainIndex = 10; psEnc->sCmn.sNSQ.lagPrev = 100; psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; psEnc->sCmn.fs_kHz = fs_kHz; if( psEnc->sCmn.fs_kHz == 8 ) { if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; } else { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; } } else { if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; } else { psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; } } if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; } else { psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; } psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); } else { psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); } if( psEnc->sCmn.fs_kHz == 16 ) { psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; } else if( psEnc->sCmn.fs_kHz == 12 ) { psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; } else { psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; } } /* Check that settings are valid */ silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); return ret; }
/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ void silk_stereo_LR_to_MS( stereo_enc_state *state, /* I/O State */ opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ opus_int16 x2[], /* I/O Right input signal, becomes side signal */ opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ opus_int32 total_rate_bps, /* I Total bitrate */ opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ opus_int toMono, /* I Last frame before a stereo->mono transition */ opus_int fs_kHz, /* I Sample rate (kHz) */ opus_int frame_length /* I Number of samples */ ) { opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13; opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; VARDECL( opus_int16, side ); VARDECL( opus_int16, LP_mid ); VARDECL( opus_int16, HP_mid ); VARDECL( opus_int16, LP_side ); VARDECL( opus_int16, HP_side ); opus_int16 *mid = &x1[ -2 ]; SAVE_STACK; ALLOC( side, frame_length + 2, opus_int16 ); /* Convert to basic mid/side signals */ for( n = 0; n < frame_length + 2; n++ ) { sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ]; diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ]; mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) ); } /* Buffering */ silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) ); silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) ); silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) ); silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) ); /* LP and HP filter mid signal */ ALLOC( LP_mid, frame_length, opus_int16 ); ALLOC( HP_mid, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); LP_mid[ n ] = sum; HP_mid[ n ] = mid[ n + 1 ] - sum; } /* LP and HP filter side signal */ ALLOC( LP_side, frame_length, opus_int16 ); ALLOC( HP_side, frame_length, opus_int16 ); for( n = 0; n < frame_length; n++ ) { sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); LP_side[ n ] = sum; HP_side[ n ] = side[ n + 1 ] - sum; } /* Find energies and predictors */ is10msFrame = frame_length == 10 * fs_kHz; smooth_coef_Q16 = is10msFrame ? SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) : SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 ); smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 ); pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 ); pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 ); /* Ratio of the norms of residual and mid signals */ frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 ); frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) ); /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ if( total_rate_bps < 1 ) { total_rate_bps = 1; } min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); silk_assert( min_mid_rate_bps < 32767 ); /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ frac_3_Q16 = silk_MUL( 3, frac_Q16 ); mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 ); /* If Mid bitrate below minimum, reduce stereo width */ if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) { mid_side_rates_bps[ 0 ] = min_mid_rate_bps; mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps, silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 ); width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) ); } else { mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; width_Q14 = SILK_FIX_CONST( 1, 14 ); } /* Smoother */ state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 ); /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ *mid_only_flag = 0; if( toMono ) { /* Last frame before stereo->mono transition; collapse stereo width */ width_Q14 = 0; pred_Q13[ 0 ] = 0; pred_Q13[ 1 ] = 0; silk_stereo_quant_pred( pred_Q13, ix ); } else if( state->width_prev_Q14 == 0 && ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) ) { /* Code as panned-mono; previous frame already had zero width */ /* Scale down and quantize predictors */ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); silk_stereo_quant_pred( pred_Q13, ix ); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[ 0 ] = 0; pred_Q13[ 1 ] = 0; mid_side_rates_bps[ 0 ] = total_rate_bps; mid_side_rates_bps[ 1 ] = 0; *mid_only_flag = 1; } else if( state->width_prev_Q14 != 0 && ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) ) { /* Transition to zero-width stereo */ /* Scale down and quantize predictors */ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); silk_stereo_quant_pred( pred_Q13, ix ); /* Collapse stereo width */ width_Q14 = 0; pred_Q13[ 0 ] = 0; pred_Q13[ 1 ] = 0; } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) { /* Full-width stereo coding */ silk_stereo_quant_pred( pred_Q13, ix ); width_Q14 = SILK_FIX_CONST( 1, 14 ); } else { /* Reduced-width stereo coding; scale down and quantize predictors */ pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); silk_stereo_quant_pred( pred_Q13, ix ); width_Q14 = state->smth_width_Q14; } /* Make sure to keep on encoding until the tapered output has been transmitted */ if( *mid_only_flag == 1 ) { state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz; if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) { *mid_only_flag = 0; } else { /* Limit to avoid wrapping around */ state->silent_side_len = 10000; } } else { state->silent_side_len = 0; } if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) { mid_side_rates_bps[ 1 ] = 1; mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]); } /* Interpolate predictors and subtract prediction from side channel */ pred0_Q13 = -state->pred_prev_Q13[ 0 ]; pred1_Q13 = -state->pred_prev_Q13[ 1 ]; w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 ); denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 ); for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { pred0_Q13 += delta0_Q13; pred1_Q13 += delta1_Q13; w_Q24 += deltaw_Q24; sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); } pred0_Q13 = -pred_Q13[ 0 ]; pred1_Q13 = -pred_Q13[ 1 ]; w_Q24 = silk_LSHIFT( width_Q14, 10 ); for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); } state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ]; state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ]; state->width_prev_Q14 = (opus_int16)width_Q14; RESTORE_STACK; }
int silk_encode_frame_FIX(silk_encoder_state_FIX * psEnc, /* I/O Pointer to Silk FIX encoder state */ int32_t * pnBytesOut, /* O Pointer to number of payload bytes; */ ec_enc * psRangeEnc, /* I/O compressor data structure */ int condCoding, /* I The type of conditional coding to use */ int maxBits, /* I If > 0: maximum number of output bits */ int useCBR /* I Flag to force constant-bitrate operation */ ) { silk_encoder_control_FIX sEncCtrl; int i, iter, maxIter, found_upper, found_lower, ret = 0; int16_t *x_frame; ec_enc sRangeEnc_copy, sRangeEnc_copy2; silk_nsq_state sNSQ_copy, sNSQ_copy2; int32_t seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; int32_t gainsID, gainsID_lower, gainsID_upper; int16_t gainMult_Q8; int16_t ec_prevLagIndex_copy; int ec_prevSignalType_copy; int8_t LastGainIndex_copy2; /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; /**************************************************************/ /* Set up Input Pointers, and insert frame in input buffer */ /*************************************************************/ /* start of frame to encode */ x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /***************************************/ /* Ensure smooth bandwidth transitions */ /***************************************/ silk_LP_variable_cutoff(&psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length); /*******************************************/ /* Copy new frame to front of input buffer */ /*******************************************/ memcpy(x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof(int16_t)); if (!psEnc->sCmn.prefillFlag) { int16_t *res_pitch_frame; int16_t res_pitch[psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length]; /* start of pitch LPC residual frame */ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ silk_find_pitch_lags_FIX(psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch); /************************/ /* Noise shape analysis */ /************************/ silk_noise_shape_analysis_FIX(psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ silk_find_pred_coefs_FIX(psEnc, &sEncCtrl, res_pitch, x_frame, condCoding); /****************************************/ /* Process gains */ /****************************************/ silk_process_gains_FIX(psEnc, &sEncCtrl, condCoding); /*****************************************/ /* Prefiltering for noise shaper */ /*****************************************/ int32_t xfw_Q3[psEnc->sCmn.frame_length]; silk_prefilter_FIX(psEnc, &sEncCtrl, xfw_Q3, x_frame); /****************************************/ /* Low Bitrate Redundant Encoding */ /****************************************/ silk_LBRR_encode_FIX(psEnc, &sEncCtrl, xfw_Q3, condCoding); /* Loop over quantizer and entropy coding to control bitrate */ maxIter = 6; gainMult_Q8 = SILK_FIX_CONST(1, 8); found_lower = 0; found_upper = 0; gainsID = silk_gains_ID(psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr); gainsID_lower = -1; gainsID_upper = -1; /* Copy part of the input state */ memcpy(&sRangeEnc_copy, psRangeEnc, sizeof(ec_enc)); memcpy(&sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof(silk_nsq_state)); seed_copy = psEnc->sCmn.indices.Seed; ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; uint8_t ec_buf_copy[1275]; for (iter = 0;; iter++) { if (gainsID == gainsID_lower) { nBits = nBits_lower; } else if (gainsID == gainsID_upper) { nBits = nBits_upper; } else { /* Restore part of the input state */ if (iter > 0) { memcpy(psRangeEnc, &sRangeEnc_copy, sizeof(ec_enc)); memcpy(&psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof(silk_nsq_state)); psEnc->sCmn.indices.Seed = seed_copy; psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; } /*****************************************/ /* Noise shaping quantization */ /*****************************************/ if (psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0) { silk_NSQ_del_dec(&psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, sEncCtrl. PredCoef_Q12[0], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl. HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl. LTP_scale_Q14); } else { silk_NSQ(&psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, sEncCtrl.PredCoef_Q12[0], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14); } /****************************************/ /* Encode Parameters */ /****************************************/ silk_encode_indices(&psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding); /****************************************/ /* Encode Excitation Signal */ /****************************************/ silk_encode_pulses(psRangeEnc, psEnc->sCmn.indices. signalType, psEnc->sCmn.indices. quantOffsetType, psEnc->sCmn.pulses, psEnc->sCmn.frame_length); nBits = ec_tell(psRangeEnc); if (useCBR == 0 && iter == 0 && nBits <= maxBits) { break; } } if (iter == maxIter) { if (found_lower && (gainsID == gainsID_lower || nBits > maxBits)) { /* Restore output state from earlier iteration that did meet the bitrate budget */ memcpy(psRangeEnc, &sRangeEnc_copy2, sizeof(ec_enc)); assert(sRangeEnc_copy2.offs <= 1275); memcpy(psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs); memcpy(&psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof(silk_nsq_state)); psEnc->sShape.LastGainIndex = LastGainIndex_copy2; } break; } if (nBits > maxBits) { if (found_lower == 0 && iter >= 2) { /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32(sEncCtrl. Lambda_Q10, sEncCtrl. Lambda_Q10, 1); found_upper = 0; gainsID_upper = -1; } else { found_upper = 1; nBits_upper = nBits; gainMult_upper = gainMult_Q8; gainsID_upper = gainsID; } } else if (nBits < maxBits - 5) { found_lower = 1; nBits_lower = nBits; gainMult_lower = gainMult_Q8; if (gainsID != gainsID_lower) { gainsID_lower = gainsID; /* Copy part of the output state */ memcpy(&sRangeEnc_copy2, psRangeEnc, sizeof(ec_enc)); assert(psRangeEnc->offs <= 1275); memcpy(ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs); memcpy(&sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof(silk_nsq_state)); LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; } } else { /* Within 5 bits of budget: close enough */ break; } if ((found_lower & found_upper) == 0) { /* Adjust gain according to high-rate rate/distortion curve */ int32_t gain_factor_Q16; gain_factor_Q16 = silk_log2lin(silk_LSHIFT(nBits - maxBits, 7) / psEnc->sCmn.frame_length + SILK_FIX_CONST(16, 7)); gain_factor_Q16 = silk_min_32(gain_factor_Q16, SILK_FIX_CONST(2, 16)); if (nBits > maxBits) { gain_factor_Q16 = silk_max_32(gain_factor_Q16, SILK_FIX_CONST(1.3, 16)); } gainMult_Q8 = silk_SMULWB(gain_factor_Q16, gainMult_Q8); } else { /* Adjust gain by interpolating */ assert(nBits_upper != nBits_lower); gainMult_Q8 = gainMult_lower + silk_DIV32_16(silk_MUL (gainMult_upper - gainMult_lower, maxBits - nBits_lower), nBits_upper - nBits_lower); /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ if (gainMult_Q8 > silk_ADD_RSHIFT32(gainMult_lower, gainMult_upper - gainMult_lower, 2)) { gainMult_Q8 = silk_ADD_RSHIFT32(gainMult_lower, gainMult_upper - gainMult_lower, 2); } else if (gainMult_Q8 < silk_SUB_RSHIFT32(gainMult_upper, gainMult_upper - gainMult_lower, 2)) { gainMult_Q8 = silk_SUB_RSHIFT32(gainMult_upper, gainMult_upper - gainMult_lower, 2); } } for (i = 0; i < psEnc->sCmn.nb_subfr; i++) { sEncCtrl.Gains_Q16[i] = silk_LSHIFT_SAT32(silk_SMULWB (sEncCtrl.GainsUnq_Q16[i], gainMult_Q8), 8); } /* Quantize gains */ psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; silk_gains_quant(psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr); /* Unique identifier of gains vector */ gainsID = silk_gains_ID(psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr); } } /* Update input buffer */ memmove(psEnc->x_buf, &psEnc->x_buf[psEnc->sCmn.frame_length], (psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz) * sizeof(int16_t)); /* Exit without entropy coding */ if (psEnc->sCmn.prefillFlag) { /* No payload */ *pnBytesOut = 0; return ret; } /* Parameters needed for next frame */ psEnc->sCmn.prevLag = sEncCtrl.pitchL[psEnc->sCmn.nb_subfr - 1]; psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; /****************************************/ /* Finalize payload */ /****************************************/ psEnc->sCmn.first_frame_after_reset = 0; /* Payload size */ *pnBytesOut = silk_RSHIFT(ec_tell(psRangeEnc) + 7, 3); return ret; }
/* Initialize/reset the resampler state for a given pair of input/output sampling rates */ opus_int silk_resampler_init( silk_resampler_state_struct *S, /* I/O Resampler state */ opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ opus_int forEnc /* I If 1: encoder; if 0: decoder */ ) { opus_int up2x; /* Clear state */ silk_memset( S, 0, sizeof( silk_resampler_state_struct ) ); /* Input checking */ if( forEnc ) { if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) { silk_assert( 0 ); return -1; } S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; } else { if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) || ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { silk_assert( 0 ); return -1; } S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; } S->Fs_in_kHz = silk_DIV32_16( Fs_Hz_in, 1000 ); S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 ); /* Number of samples processed per batch */ S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS; /* Find resampler with the right sampling ratio */ up2x = 0; if( Fs_Hz_out > Fs_Hz_in ) { /* Upsample */ if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */ /* Special case: directly use 2x upsampler */ S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper; } else { /* Default resampler */ S->resampler_function = USE_silk_resampler_private_IIR_FIR; up2x = 1; } } else if ( Fs_Hz_out < Fs_Hz_in ) { /* Downsample */ S->resampler_function = USE_silk_resampler_private_down_FIR; if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */ S->FIR_Fracs = 3; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; S->Coefs = silk_Resampler_3_4_COEFS; } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */ S->FIR_Fracs = 2; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; S->Coefs = silk_Resampler_2_3_COEFS; } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */ S->FIR_Fracs = 1; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1; S->Coefs = silk_Resampler_1_2_COEFS; } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */ S->FIR_Fracs = 1; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; S->Coefs = silk_Resampler_1_3_COEFS; } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */ S->FIR_Fracs = 1; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; S->Coefs = silk_Resampler_1_4_COEFS; } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */ S->FIR_Fracs = 1; S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; S->Coefs = silk_Resampler_1_6_COEFS; } else { /* None available */ silk_assert( 0 ); return -1; } } else { /* Input and output sampling rates are equal: copy */ S->resampler_function = USE_silk_resampler_copy; } /* Ratio of input/output samples */ S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 ); /* Make sure the ratio is rounded up */ while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) { S->invRatio_Q16++; } return 0; }
/* encControl->payloadSize_ms is set to */ opus_int silk_Encode( /* O Returns error code */ void *encState, /* I/O State */ silk_EncControlStruct *encControl, /* I Control status */ const opus_int16 *samplesIn, /* I Speech sample input vector */ opus_int nSamplesIn, /* I Number of samples in input vector */ ec_enc *psRangeEnc, /* I/O Compressor data structure */ opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ ) { opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; opus_int nSamplesFromInput = 0, nSamplesFromInputMax; opus_int speech_act_thr_for_switch_Q8; opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; silk_encoder *psEnc = ( silk_encoder * )encState; VARDECL( opus_int16, buf ); opus_int transition, curr_block, tot_blocks; SAVE_STACK; if (encControl->reducedDependency) { psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; } psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; /* Check values in encoder control structure */ if( ( ret = check_control_input( encControl ) ) != 0 ) { silk_assert( 0 ); RESTORE_STACK; return ret; } encControl->switchReady = 0; if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { /* Mono -> Stereo transition: init state of second channel and stereo state */ ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; psEnc->sStereo.width_prev_Q14 = 0; psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); if( psEnc->nChannelsAPI == 2 ) { silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); } } transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); psEnc->nChannelsAPI = encControl->nChannelsAPI; psEnc->nChannelsInternal = encControl->nChannelsInternal; nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; curr_block = 0; if( prefillFlag ) { /* Only accept input length of 10 ms */ if( nBlocksOf10ms != 1 ) { silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } /* Reset Encoder */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); silk_assert( !ret ); } tmp_payloadSize_ms = encControl->payloadSize_ms; encControl->payloadSize_ms = 10; tmp_complexity = encControl->complexity; encControl->complexity = 0; for( n = 0; n < encControl->nChannelsInternal; n++ ) { psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; } } else { /* Only accept input lengths that are a multiple of 10 ms */ if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } /* Make sure no more than one packet can be produced */ if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } } TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); for( n = 0; n < encControl->nChannelsInternal; n++ ) { /* Force the side channel to the same rate as the mid */ opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { silk_assert( 0 ); RESTORE_STACK; return ret; } if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; } } psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; } silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); /* Input buffering/resampling and encoding */ nSamplesToBufferMax = 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; nSamplesFromInputMax = silk_DIV32_16( nSamplesToBufferMax * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); ALLOC( buf, nSamplesFromInputMax, opus_int16 ); while( 1 ) { nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); /* Resample and write to buffer */ if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; for( n = 0; n < nSamplesFromInput; n++ ) { buf[ n ] = samplesIn[ 2 * n ]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); } ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); for( n = 0; n < nSamplesFromInput; n++ ) { buf[ n ] = samplesIn[ 2 * n + 1 ]; } ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { /* Combine left and right channels before resampling */ for( n = 0; n < nSamplesFromInput; n++ ) { sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); } ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); /* On the first mono frame, average the results for the two resampler states */ if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); } } psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } else { silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } samplesIn += nSamplesFromInput * encControl->nChannelsAPI; nSamplesIn -= nSamplesFromInput; /* Default */ psEnc->allowBandwidthSwitch = 0; /* Silk encoder */ if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { /* Enough data in input buffer, so encode */ silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); /* Deal with LBRR data */ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { /* Create space at start of payload for VAD and FEC flags */ opus_uint8 iCDF[ 2 ] = { 0, 0 }; iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); /* Encode any LBRR data from previous packet */ /* Encode LBRR flags */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { LBRR_symbol = 0; for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); } psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); } } /* Code LBRR indices and excitation signals */ for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { for( n = 0; n < encControl->nChannelsInternal; n++ ) { if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { opus_int condCoding; if( encControl->nChannelsInternal == 2 && n == 0 ) { silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); } } /* Use conditional coding if previous frame available */ if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { condCoding = CODE_CONDITIONALLY; } else { condCoding = CODE_INDEPENDENTLY; } silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); } } } /* Reset LBRR flags */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); } psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc ); } silk_HP_variable_cutoff( psEnc->state_Fxx ); /* Total target bits for packet */ nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); /* Subtract bits used for LBRR */ if( !prefillFlag ) { nBits -= psEnc->nBitsUsedLBRR; } /* Divide by number of uncoded frames left in packet */ nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket ); /* Convert to bits/second */ if( encControl->payloadSize_ms == 10 ) { TargetRate_bps = silk_SMULBB( nBits, 100 ); } else { TargetRate_bps = silk_SMULBB( nBits, 50 ); } /* Subtract fraction of bits in excess of target in previous frames and packets */ TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) { /* Compare actual vs target bits so far in this packet */ opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); } /* Never exceed input bitrate */ TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); /* Convert Left/Right to Mid/Side */ if( encControl->nChannelsInternal == 2 ) { silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { /* Reset side channel encoder memory for first frame with side coding */ if( psEnc->prev_decode_only_middle == 1 ) { silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; } silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); } else { psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; } if( !prefillFlag ) { silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); } } } else { /* Buffering */ silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); } silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); /* Encode */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { opus_int maxBits, useCBR; /* Handling rate constraints */ maxBits = encControl->maxBits; if( tot_blocks == 2 && curr_block == 0 ) { maxBits = maxBits * 3 / 5; } else if( tot_blocks == 3 ) { if( curr_block == 0 ) { maxBits = maxBits * 2 / 5; } else if( curr_block == 1 ) { maxBits = maxBits * 3 / 4; } } useCBR = encControl->useCBR && curr_block == tot_blocks - 1; if( encControl->nChannelsInternal == 1 ) { channelRate_bps = TargetRate_bps; } else { channelRate_bps = MStargetRates_bps[ n ]; if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { useCBR = 0; /* Give mid up to 1/2 of the max bits for that frame */ maxBits -= encControl->maxBits / ( tot_blocks * 2 ); } } if( channelRate_bps > 0 ) { opus_int condCoding; silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); /* Use independent coding if no previous frame available */ if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { condCoding = CODE_INDEPENDENTLY; } else if( n > 0 && psEnc->prev_decode_only_middle ) { /* If we skipped a side frame in this packet, we don't need LTP scaling; the LTP state is well-defined. */ condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; } else { condCoding = CODE_CONDITIONALLY; } if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { silk_assert( 0 ); } } psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; } psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; /* Insert VAD and FEC flags at beginning of bitstream */ if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { flags = 0; for( n = 0; n < encControl->nChannelsInternal; n++ ) { for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { flags = silk_LSHIFT( flags, 1 ); flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; } flags = silk_LSHIFT( flags, 1 ); flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; } if( !prefillFlag ) { ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); } /* Return zero bytes if all channels DTXed */ if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { *nBytesOut = 0; } psEnc->nBitsExceeded += *nBytesOut * 8; psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); /* Update flag indicating if bandwidth switching is allowed */ speech_act_thr_for_switch_Q8 = (opus_int) silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { psEnc->allowBandwidthSwitch = 1; psEnc->timeSinceSwitchAllowed_ms = 0; } else { psEnc->allowBandwidthSwitch = 0; psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; } } if( nSamplesIn == 0 ) { break; } } else { break; } curr_block++; } psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; if( prefillFlag ) { encControl->payloadSize_ms = tmp_payloadSize_ms; encControl->complexity = tmp_complexity; for( n = 0; n < encControl->nChannelsInternal; n++ ) { psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; } } RESTORE_STACK; return ret; }