/* Add noise to matrix diagonal */ void silk_regularize_correlations_FIX( opus_int32 *XX, /* I/O Correlation matrices */ opus_int32 *xx, /* I/O Correlation values */ opus_int32 noise, /* I Noise to add */ opus_int D /* I Dimension of XX */ ) { opus_int i; for (i = 0; i < D; i++) { matrix_ptr(&XX[0], i, i, D) = silk_ADD32(matrix_ptr(&XX[0], i, i, D), noise); } xx[0] += noise; }
/* Split signal into two decimated bands using first-order allpass filters */ void silk_ana_filt_bank_1( const opus_int16 *in, /* I Input signal [N] */ opus_int32 *S, /* I/O State vector [2] */ opus_int16 *outL, /* O Low band [N/2] */ opus_int16 *outH, /* O High band [N/2] */ const opus_int32 N /* I Number of input samples */ ) { opus_int k, N2 = silk_RSHIFT( N, 1 ); opus_int32 in32, X, Y, out_1, out_2; /* Internal variables and state are in Q10 format */ for( k = 0; k < N2; k++ ) { /* Convert to Q10 */ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); /* All-pass section for even input sample */ Y = silk_SUB32( in32, S[ 0 ] ); X = silk_SMLAWB( Y, Y, A_fb1_21 ); out_1 = silk_ADD32( S[ 0 ], X ); S[ 0 ] = silk_ADD32( in32, X ); /* Convert to Q10 */ in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); /* All-pass section for odd input sample, and add to output of previous section */ Y = silk_SUB32( in32, S[ 1 ] ); X = silk_SMULWB( Y, A_fb1_20 ); out_2 = silk_ADD32( S[ 1 ], X ); S[ 1 ] = silk_ADD32( in32, X ); /* Add/subtract, convert back to int16 and store to output */ outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) ); outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) ); } }
static OPUS_INLINE void silk_LS_divide_Q16_FIX( opus_int32 T[], /* I/O Numenator vector */ inv_D_t *inv_D, /* I 1 / D vector */ opus_int M /* I dimension */ ) { opus_int i; opus_int32 tmp_32; opus_int32 one_div_diag_Q36, one_div_diag_Q48; for( i = 0; i < M; i++ ) { one_div_diag_Q36 = inv_D[ i ].Q36_part; one_div_diag_Q48 = inv_D[ i ].Q48_part; tmp_32 = T[ i ]; T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); } }
void silk_NLSF_decode( opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ ) { opus_int i; opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; opus_int16 ec_ix[ MAX_LPC_ORDER ]; opus_int16 res_Q10[ MAX_LPC_ORDER ]; opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; opus_int32 W_tmp_Q9, NLSF_Q15_tmp; const opus_uint8 *pCB_element; /* Decode first stage */ pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; for( i = 0; i < psNLSF_CB->order; i++ ) { pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); } /* Unpack entropy table indices and predictor for current CB1 index */ silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); /* Predictive residual dequantizer */ silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); /* Weights from codebook vector */ silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); /* Apply inverse square-rooted weights and add to output */ for( i = 0; i < psNLSF_CB->order; i++ ) { W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); } /* NLSF stabilization */ silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); }
static OPUS_INLINE void silk_LDL_factorize_FIX( opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ) { opus_int i, j, k, status, loop_count; const opus_int32 *ptr1, *ptr2; opus_int32 diag_min_value, tmp_32, err; opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; silk_assert( M <= MAX_MATRIX_SIZE ); status = 1; diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { status = 0; for( j = 0; j < M; j++ ) { ptr1 = matrix_adr( L_Q16, j, 0, M ); tmp_32 = 0; for( i = 0; i < j; i++ ) { v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ } tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); if( tmp_32 < diag_min_value ) { tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); /* Matrix not positive semi-definite, or ill conditioned */ for( i = 0; i < M; i++ ) { matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); } status = 1; break; } D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ /* two-step division */ one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ /* Save 1/Ds */ inv_D[ j ].Q36_part = one_div_diag_Q36; inv_D[ j ].Q48_part = one_div_diag_Q48; matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ ptr1 = matrix_adr( A, j, 0, M ); ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); for( i = j + 1; i < M; i++ ) { tmp_32 = 0; for( k = 0; k < j; k++ ) { tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ } tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ /* tmp_32 / D_Q0[j] : Divide to Q16 */ matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); /* go to next column */ ptr2 += M; } } } silk_assert( status == 0 ); }
static inline void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ opus_int16 frame[] /* O LPC residual signal */ ) { opus_int i, j, k; opus_int lag, idx, sLTP_buf_idx, shift1, shift2; opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q16, inv_gain_Q30; opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; opus_int32 LPC_exc_Q14, LPC_pred_Q10, LTP_pred_Q12; opus_int16 rand_scale_Q14; opus_int16 *B_Q14, *exc_buf_ptr; opus_int32 *sLPC_Q14_ptr; opus_int16 exc_buf[ 2 * MAX_SUB_FRAME_LENGTH ]; opus_int16 A_Q12[ MAX_LPC_ORDER ]; opus_int16 sLTP[ MAX_FRAME_LENGTH ]; opus_int32 sLTP_Q14[ 2 * MAX_FRAME_LENGTH ]; silk_PLC_struct *psPLC = &psDec->sPLC; if (psDec->first_frame_after_reset) silk_memset(psPLC->prevLPC_Q12, 0, MAX_LPC_ORDER*sizeof(psPLC->prevLPC_Q12[ 0 ])); /* Find random noise component */ /* Scale previous excitation signal */ exc_buf_ptr = exc_buf; for( k = 0; k < 2; k++ ) { for( i = 0; i < psPLC->subfr_length; i++ ) { exc_buf_ptr[ i ] = ( opus_int16 )silk_RSHIFT( silk_SMULWW( psDec->exc_Q10[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], psPLC->prevGain_Q16[ k ] ), 10 ); } exc_buf_ptr += psPLC->subfr_length; } /* Find the subframe with lowest energy of the last two and use that as random noise generator */ silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { /* First sub-frame has lowest energy */ rand_ptr = &psDec->exc_Q10[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ]; } else { /* Second sub-frame has lowest energy */ rand_ptr = &psDec->exc_Q10[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ]; } /* Setup Gain to random noise component */ B_Q14 = psPLC->LTPCoef_Q14; rand_scale_Q14 = psPLC->randScale_Q14; /* Setup attenuation gains */ harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; if( psDec->prevSignalType == TYPE_VOICED ) { rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; } else { rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; } /* LPC concealment. Apply BWE to previous LPC */ silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) ); /* Preload LPC coeficients to array on stack. Gives small performance gain */ silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); /* First Lost frame */ if( psDec->lossCnt == 0 ) { rand_scale_Q14 = 1 << 14; /* Reduce random noise Gain for voiced frames */ if( psDec->prevSignalType == TYPE_VOICED ) { for( i = 0; i < LTP_ORDER; i++ ) { rand_scale_Q14 -= B_Q14[ i ]; } rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */ rand_scale_Q14 = ( opus_int16 )silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 ); } else { /* Reduce random noise for unvoiced frames with high LPC gain */ opus_int32 invGain_Q30, down_scale_Q30; silk_LPC_inverse_pred_gain( &invGain_Q30, psPLC->prevLPC_Q12, psDec->LPC_order ); down_scale_Q30 = silk_min_32( silk_RSHIFT( 1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 ); down_scale_Q30 = silk_max_32( silk_RSHIFT( 1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 ); down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES ); rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 ); } } rand_seed = psPLC->rand_seed; lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); sLTP_buf_idx = psDec->ltp_mem_length; /* Rewhiten LTP state */ idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; silk_assert( idx > 0 ); silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order ); /* Scale LTP state */ inv_gain_Q16 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 32 ); inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX ); inv_gain_Q30 = silk_LSHIFT( inv_gain_Q16, 14 ); for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) { sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] ); } /***************************/ /* LTP synthesis filtering */ /***************************/ for( k = 0; k < psDec->nb_subfr; k++ ) { /* Setup pointer */ pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; for( i = 0; i < psDec->subfr_length; i++ ) { /* Unrolled loop */ LTP_pred_Q12 = silk_SMULWB( pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); pred_lag_ptr++; /* Generate LPC excitation */ rand_seed = silk_RAND( rand_seed ); idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK; LPC_exc_Q14 = silk_LSHIFT32( silk_SMULWB( rand_ptr[ idx ], rand_scale_Q14 ), 6 ); /* Random noise part */ LPC_exc_Q14 = silk_ADD32( LPC_exc_Q14, silk_LSHIFT32( LTP_pred_Q12, 2 ) ); /* Harmonic part */ sLTP_Q14[ sLTP_buf_idx ] = LPC_exc_Q14; sLTP_buf_idx++; } /* Gradually reduce LTP gain */ for( j = 0; j < LTP_ORDER; j++ ) { B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); } /* Gradually reduce excitation gain */ rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); /* Slowly increase pitch lag */ psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) ); lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); } /***************************/ /* LPC synthesis filtering */ /***************************/ sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ]; /* Copy LPC state */ silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ for( i = 0; i < psDec->frame_length; i++ ) { /* partly unrolled */ LPC_pred_Q10 = silk_SMULWB( sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); for( j = 10; j < psDec->LPC_order; j++ ) { LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] ); } /* Add prediction to LPC excitation */ sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); /* Scale with Gain */ frame[ i ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], psPLC->prevGain_Q16[ 1 ] ), 14 ) ); } /* Save LPC state */ silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); /**************************************/ /* Update states */ /**************************************/ psPLC->rand_seed = rand_seed; psPLC->randScale_Q14 = rand_scale_Q14; for( i = 0; i < MAX_NB_SUBFR; i++ ) { psDecCtrl->pitchL[ i ] = lag; } }
void silk_find_LTP_FIX( opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ const opus_int subfr_length, /* I subframe length */ const opus_int nb_subfr, /* I number of subframes */ const opus_int mem_offset, /* I number of samples in LTP memory */ opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ ) { opus_int i, k, lshift; const opus_int16 *r_ptr, *lag_ptr; opus_int16 *b_Q14_ptr; opus_int32 regu; opus_int32 *WLTP_ptr; opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; opus_int32 temp32, denom32; opus_int extra_shifts; opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; opus_int32 wd, m_Q12; b_Q14_ptr = b_Q14; WLTP_ptr = WLTP; r_ptr = &r_lpc[ mem_offset ]; for( k = 0; k < nb_subfr; k++ ) { lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ /* Assure headroom */ LZs = silk_CLZ32( rr[k] ); if( LZs < LTP_CORRS_HEAD_ROOM ) { rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); } corr_rshifts[ k ] = rr_shifts; silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ if( corr_rshifts[ k ] > rr_shifts ) { rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ } silk_assert( rr[ k ] >= 0 ); regu = 1; regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ /* Limit and store in Q14 */ silk_fit_LTP( b_Q16, b_Q14_ptr ); /* Calculate residual energy */ nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ silk_RSHIFT( silk_SMULWB( subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ denom32 = silk_max( denom32, 1 ); silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ /* Limit temp such that the below scaling never wraps around */ WLTP_max = 0; for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); } lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ silk_assert( 26 - 18 + lshift >= 0 ); if( 26 - 18 + lshift < 31 ) { temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); } silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ silk_assert( w[k] >= 0 ); r_ptr += subfr_length; b_Q14_ptr += LTP_ORDER; WLTP_ptr += LTP_ORDER * LTP_ORDER; } maxRshifts = 0; for( k = 0; k < nb_subfr; k++ ) { maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); } /* Compute LTP coding gain */ if( LTPredCodGain_Q7 != NULL ) { LPC_LTP_res_nrg = 0; LPC_res_nrg = 0; silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ for( k = 0; k < nb_subfr; k++ ) { LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ } LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); } /* smoothing */ /* d = sum( B, 1 ); */ b_Q14_ptr = b_Q14; for( k = 0; k < nb_subfr; k++ ) { d_Q14[ k ] = 0; for( i = 0; i < LTP_ORDER; i++ ) { d_Q14[ k ] += b_Q14_ptr[ i ]; } b_Q14_ptr += LTP_ORDER; } /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ max_abs_d_Q14 = 0; max_w_bits = 0; for( k = 0; k < nb_subfr; k++ ) { max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ /* Find bits needed in Q( 18 - maxRshifts ) */ max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); } /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; /* Subtract what we got available; bits in output var plus maxRshifts */ extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ extra_shifts = silk_max_int( extra_shifts, 0 ); maxRshifts_wxtra = maxRshifts + extra_shifts; temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ wd = 0; for( k = 0; k < nb_subfr; k++ ) { /* w has at least 2 bits of headroom so no overflow should happen */ temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ } m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); b_Q14_ptr = b_Q14; for( k = 0; k < nb_subfr; k++ ) { /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ if( 2 - corr_rshifts[k] > 0 ) { temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); } else { temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); } g_Q26 = silk_MUL( silk_DIV32( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ temp32 = 0; for( i = 0; i < LTP_ORDER; i++ ) { delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ temp32 += delta_b_Q14[ i ]; /* Q14 */ } temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ for( i = 0; i < LTP_ORDER; i++ ) { b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); } b_Q14_ptr += LTP_ORDER; } }
/* notch filter just above Nyquist. */ void silk_resampler_private_up2_HQ( opus_int32 *S, /* I/O Resampler state [ 6 ] */ opus_int16 *out, /* O Output signal [ 2 * len ] */ const opus_int16 *in, /* I Input signal [ len ] */ opus_int32 len /* I Number of input samples */ ) { opus_int32 k; opus_int32 in32, out32_1, out32_2, Y, X; silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 ); silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 ); silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 ); silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 ); silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 ); silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 ); /* Internal variables and state are in Q10 format */ for( k = 0; k < len; k++ ) { /* Convert to Q10 */ in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 ); /* First all-pass section for even output sample */ Y = silk_SUB32( in32, S[ 0 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] ); out32_1 = silk_ADD32( S[ 0 ], X ); S[ 0 ] = silk_ADD32( in32, X ); /* Second all-pass section for even output sample */ Y = silk_SUB32( out32_1, S[ 1 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] ); out32_2 = silk_ADD32( S[ 1 ], X ); S[ 1 ] = silk_ADD32( out32_1, X ); /* Third all-pass section for even output sample */ Y = silk_SUB32( out32_2, S[ 2 ] ); X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] ); out32_1 = silk_ADD32( S[ 2 ], X ); S[ 2 ] = silk_ADD32( out32_2, X ); /* Apply gain in Q15, convert back to int16 and store to output */ out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); /* First all-pass section for odd output sample */ Y = silk_SUB32( in32, S[ 3 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] ); out32_1 = silk_ADD32( S[ 3 ], X ); S[ 3 ] = silk_ADD32( in32, X ); /* Second all-pass section for odd output sample */ Y = silk_SUB32( out32_1, S[ 4 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] ); out32_2 = silk_ADD32( S[ 4 ], X ); S[ 4 ] = silk_ADD32( out32_1, X ); /* Third all-pass section for odd output sample */ Y = silk_SUB32( out32_2, S[ 5 ] ); X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] ); out32_1 = silk_ADD32( S[ 5 ], X ); S[ 5 ] = silk_ADD32( out32_2, X ); /* Apply gain in Q15, convert back to int16 and store to output */ out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); } }
/* Calculates correlation matrix X'*X */ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ opus_int *rshifts /* I/O Right shifts of correlations */ ) { opus_int i, j, lag, rshifts_local, head_room_rshifts; opus_int32 energy; const opus_int16 *ptr1, *ptr2; /* Calculate energy to find shift used to fit in 32 bits */ silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); /* Add shifts to get the desired head room */ head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); energy = silk_RSHIFT32( energy, head_room_rshifts ); rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for( i = 0; i < order - 1; i++ ) { energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); } if( rshifts_local < *rshifts ) { /* Adjust energy */ energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); rshifts_local = *rshifts; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ matrix_ptr( XX, 0, 0, order ) = energy; ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ for( j = 1; j < order; j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); matrix_ptr( XX, j, j, order ) = energy; } ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ if( rshifts_local > 0 ) { /* Right shifting used */ for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for( i = 0; i < L; i++ ) { energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--; /* Update pointer to first sample of next column (lag) in X */ } } else { for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = silk_inner_prod_aligned( ptr1, ptr2, L ); matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } *rshifts = rshifts_local; }
static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL( opus_int16 *out, opus_int32 *buf, const opus_int16 *FIR_Coefs, opus_int FIR_Order, opus_int FIR_Fracs, opus_int32 max_index_Q16, opus_int32 index_increment_Q16 ) { opus_int32 index_Q16, res_Q6; opus_int32 *buf_ptr; opus_int32 interpol_ind; const opus_int16 *interpol_ptr; switch( FIR_Order ) { case RESAMPLER_DOWN_ORDER_FIR0: for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { /* Integer part gives pointer to buffered input */ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); /* Fractional part gives interpolation coefficients */ interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs ); /* Inner product */ interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ]; res_Q6 = silk_SMULWB( buf_ptr[ 0 ], interpol_ptr[ 0 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] ); interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ]; res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] ); res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 9 ], interpol_ptr[ 8 ] ); /* Scale down, saturate and store in output array */ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); } break; case RESAMPLER_DOWN_ORDER_FIR1: for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { /* Integer part gives pointer to buffered input */ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); /* Inner product */ res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 23 ] ), FIR_Coefs[ 0 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 22 ] ), FIR_Coefs[ 1 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 21 ] ), FIR_Coefs[ 2 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 20 ] ), FIR_Coefs[ 3 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 19 ] ), FIR_Coefs[ 4 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 18 ] ), FIR_Coefs[ 5 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 17 ] ), FIR_Coefs[ 6 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 16 ] ), FIR_Coefs[ 7 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 15 ] ), FIR_Coefs[ 8 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 14 ] ), FIR_Coefs[ 9 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] ); /* Scale down, saturate and store in output array */ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); } break; case RESAMPLER_DOWN_ORDER_FIR2: for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { /* Integer part gives pointer to buffered input */ buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); /* Inner product */ res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 35 ] ), FIR_Coefs[ 0 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 34 ] ), FIR_Coefs[ 1 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 33 ] ), FIR_Coefs[ 2 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 32 ] ), FIR_Coefs[ 3 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 31 ] ), FIR_Coefs[ 4 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 30 ] ), FIR_Coefs[ 5 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 29 ] ), FIR_Coefs[ 6 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 28 ] ), FIR_Coefs[ 7 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 27 ] ), FIR_Coefs[ 8 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 26 ] ), FIR_Coefs[ 9 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] ); res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] ); /* Scale down, saturate and store in output array */ *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); } break; default: silk_assert( 0 ); } return out; }
/* Finds LPC vector from correlations, and converts to NLSF */ void silk_find_LPC_FIX( silk_encoder_state *psEncC, /* I/O Encoder state */ opus_int16 NLSF_Q15[], /* O NLSFs */ const opus_int16 x[], /* I Input signal */ const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ ) { opus_int k, subfr_length; opus_int32 a_Q16[ MAX_LPC_ORDER ]; opus_int isInterpLower, shift; opus_int32 res_nrg0, res_nrg1; opus_int rshift0, rshift1; /* Used only for LSF interpolation */ opus_int32 a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg; opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ]; opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; SAVE_STACK; subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; /* Default: no interpolation */ psEncC->indices.NLSFInterpCoef_Q2 = 4; /* Burg AR analysis for the full frame */ silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { VARDECL( opus_int16, LPC_res ); /* Optimal solution for last 10 ms */ silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); /* subtract residual energy here, as that's easier than adding it to the */ /* residual energy of the first 10 ms in each iteration of the search below */ shift = res_tmp_nrg_Q - res_nrg_Q; if( shift >= 0 ) { if( shift < 32 ) { res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift ); } } else { silk_assert( shift > -32 ); res_nrg = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg; res_nrg_Q = res_tmp_nrg_Q; } /* Convert to NLSFs */ silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder ); ALLOC( LPC_res, 2 * subfr_length, opus_int16 ); /* Search over interpolation indices to find the one with lowest residual energy */ for( k = 3; k >= 0; k-- ) { /* Interpolate NLSFs for first half */ silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); /* Convert to LPC for residual energy evaluation */ silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); /* Calculate residual energy with NLSF interpolation */ silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder ); silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ); silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ); /* Add subframe energies from first half frame */ shift = rshift0 - rshift1; if( shift >= 0 ) { res_nrg1 = silk_RSHIFT( res_nrg1, shift ); res_nrg_interp_Q = -rshift0; } else { res_nrg0 = silk_RSHIFT( res_nrg0, -shift ); res_nrg_interp_Q = -rshift1; } res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 ); /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ shift = res_nrg_interp_Q - res_nrg_Q; if( shift >= 0 ) { if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) { isInterpLower = silk_TRUE; } else { isInterpLower = silk_FALSE; } } else { if( -shift < 32 ) { if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) { isInterpLower = silk_TRUE; } else { isInterpLower = silk_FALSE; } } else { isInterpLower = silk_FALSE; } } /* Determine whether current interpolated NLSFs are best so far */ if( isInterpLower == silk_TRUE ) { /* Interpolation has lower residual energy */ res_nrg = res_nrg_interp; res_nrg_Q = res_nrg_interp_Q; psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; } } } if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); } silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); RESTORE_STACK; }
/* Compute reflection coefficients from input signal */ void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceeding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int32 WhiteNoiseFrac_Q32, /* I Fraction added to zero-lag autocorrelation */ const opus_int D /* I Order */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra; opus_int32 C0, num, nrg, rc_Q31, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); silk_assert( nb_subfr <= MAX_NB_SUBFR ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); if( rshifts > MAX_RSHIFTS ) { C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); silk_assert( C0 > 0 ); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32( C0 ) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if( rshifts_extra > 0 ) { rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); C0 = silk_RSHIFT32( C0, rshifts_extra ); } else { rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); C0 = silk_LSHIFT32( C0, -rshifts_extra ); } rshifts += rshifts_extra; } silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts ); } } } silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( WhiteNoiseFrac_Q32, C0 ) + 1; /* Q(-rshifts)*/ for( n = 0; n < D; n++ ) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if( rshifts > -2 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts)*/ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts)*/ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16)*/ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16)*/ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts )*/ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16)*/ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16)*/ } tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts)*/ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts)*/ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift )*/ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift )*/ } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts )*/ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts )*/ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17*/ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17*/ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts )*/ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17*/ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17*/ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17*/ } tmp1 = -tmp1; /* Q17*/ tmp2 = -tmp2; /* Q17*/ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift )*/ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift )*/ } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q( -rshifts )*/ tmp2 = C_last_row[ n ]; /* Q( -rshifts )*/ num = 0; /* Q( -rshifts )*/ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts )*/ for( k = 0; k < n; k++ ) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; lz = silk_min( 32 - QA, lz ); Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz )*/ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts )*/ } CAf[ n + 1 ] = tmp1; /* Q( -rshifts )*/ CAb[ n + 1 ] = tmp2; /* Q( -rshifts )*/ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts )*/ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts )*/ /* Calculate the next order reflection (parcor) coefficient */ if( silk_abs( num ) < nrg ) { rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); } else { /* Negative energy or ratio too high; set remaining coefficients to zero and exit loop */ silk_memset( &Af_QA[ n ], 0, ( D - n ) * sizeof( opus_int32 ) ); silk_assert( 0 ); break; } /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { tmp1 = Af_QA[ k ]; /* QA*/ tmp2 = Af_QA[ n - k - 1 ]; /* QA*/ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA*/ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA*/ } Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA*/ /* Update C * Af and C * Ab */ for( k = 0; k <= n + 1; k++ ) { tmp1 = CAf[ k ]; /* Q( -rshifts )*/ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts )*/ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts )*/ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts )*/ } } /* Return residual energy */ nrg = CAf[ 0 ]; /* Q( -rshifts )*/ tmp1 = 1 << 16; /* Q16*/ for( k = 0; k < D; k++ ) { Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16*/ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts )*/ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16*/ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( WhiteNoiseFrac_Q32, C0 ), -tmp1 ); /* Q( -rshifts )*/ *res_nrg_Q = -rshifts; }
/* Compute reflection coefficients from input signal */ void silk_burg_modified_sse4_1( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * (D + subfr_length) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int D, /* I Order */ int arch /* I Run-time architecture */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); silk_assert(subfr_length * nb_subfr <= MAX_FRAME_SIZE); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift(&C0, &rshifts, x, nb_subfr * subfr_length); if(rshifts > MAX_RSHIFTS) { C0 = silk_LSHIFT32(C0, rshifts - MAX_RSHIFTS); silk_assert(C0 > 0); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32(C0) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if(rshifts_extra > 0) { rshifts_extra = silk_min(rshifts_extra, MAX_RSHIFTS - rshifts); C0 = silk_RSHIFT32(C0, rshifts_extra); } else { rshifts_extra = silk_max(rshifts_extra, MIN_RSHIFTS - rshifts); C0 = silk_LSHIFT32(C0, -rshifts_extra); } rshifts += rshifts_extra; } CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1; /* Q(-rshifts) */ silk_memset(C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof(opus_int32)); if(rshifts > 0) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; for(n = 1; n < D + 1; n++) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64(x_ptr, x_ptr + n, subfr_length - n, arch), rshifts); } } } else { for(s = 0; s < nb_subfr; s++) { int i; opus_int32 d; x_ptr = x + s * subfr_length; celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch); for(n = 1; n < D + 1; n++) { for (i = n + subfr_length - D, d = 0; i < subfr_length; i++) d = MAC16_16(d, x_ptr[ i ], x_ptr[ i - n ]); xcorr[ n - 1 ] += d; } for(n = 1; n < D + 1; n++) { C_first_row[ n - 1 ] += silk_LSHIFT32(xcorr[ n - 1 ], -rshifts); } } } silk_memcpy(C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof(opus_int32)); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1; /* Q(-rshifts) */ invGain_Q30 = (opus_int32)1 << 30; reached_max_gain = 0; for(n = 0; n < D; n++) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if(rshifts > -2) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32((opus_int32)x_ptr[ n ], 16 - rshifts); /* Q(16-rshifts) */ x2 = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts); /* Q(16-rshifts) */ tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ], QA - 16); /* Q(QA-16) */ tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16); /* Q(QA-16) */ for(k = 0; k < n; k++) { C_first_row[ k ] = silk_SMLAWB(C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q(-rshifts) */ C_last_row[ k ] = silk_SMLAWB(C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB(tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ tmp2 = silk_SMLAWB(tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ]); /* Q(QA-16) */ } tmp1 = silk_LSHIFT32(-tmp1, 32 - QA - rshifts); /* Q(16-rshifts) */ tmp2 = silk_LSHIFT32(-tmp2, 32 - QA - rshifts); /* Q(16-rshifts) */ for(k = 0; k <= n; k++) { CAf[ k ] = silk_SMLAWB(CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q(-rshift) */ CAb[ k ] = silk_SMLAWB(CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ]); /* Q(-rshift) */ } } } else { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32((opus_int32)x_ptr[ n ], -rshifts); /* Q(-rshifts) */ x2 = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts); /* Q(-rshifts) */ tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ], 17); /* Q17 */ tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 17); /* Q17 */ X1_3210 = _mm_set1_epi32(x1); X2_3210 = _mm_set1_epi32(x2); TMP1_3210 = _mm_setzero_si128(); TMP2_3210 = _mm_setzero_si128(); for(k = 0; k < n - 3; k += 4) { PTR_3210 = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 1 - 3 ]); SUBFR_3210 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k ]); FIRST_3210 = _mm_loadu_si128((__m128i *)&C_first_row[ k ]); PTR_3210 = _mm_shuffle_epi32(PTR_3210, _MM_SHUFFLE(0, 1, 2, 3)); LAST_3210 = _mm_loadu_si128((__m128i *)&C_last_row[ k ]); ATMP_3210 = _mm_loadu_si128((__m128i *)&Af_QA[ k ]); T1_3210 = _mm_mullo_epi32(PTR_3210, X1_3210); T2_3210 = _mm_mullo_epi32(SUBFR_3210, X2_3210); ATMP_3210 = _mm_srai_epi32(ATMP_3210, 7); ATMP_3210 = _mm_add_epi32(ATMP_3210, CONST1); ATMP_3210 = _mm_srai_epi32(ATMP_3210, 1); FIRST_3210 = _mm_add_epi32(FIRST_3210, T1_3210); LAST_3210 = _mm_add_epi32(LAST_3210, T2_3210); PTR_3210 = _mm_mullo_epi32(ATMP_3210, PTR_3210); SUBFR_3210 = _mm_mullo_epi32(ATMP_3210, SUBFR_3210); _mm_storeu_si128((__m128i *)&C_first_row[ k ], FIRST_3210); _mm_storeu_si128((__m128i *)&C_last_row[ k ], LAST_3210); TMP1_3210 = _mm_add_epi32(TMP1_3210, PTR_3210); TMP2_3210 = _mm_add_epi32(TMP2_3210, SUBFR_3210); } TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210)); TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210)); TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E)); TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E)); tmp1 += _mm_cvtsi128_si32(TMP1_3210); tmp2 += _mm_cvtsi128_si32(TMP2_3210); for(; k < n; k++) { C_first_row[ k ] = silk_MLA(C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q(-rshifts) */ C_last_row[ k ] = silk_MLA(C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */ Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 17); /* Q17 */ tmp1 = silk_MLA(tmp1, x_ptr[ n - k - 1 ], Atmp1); /* Q17 */ tmp2 = silk_MLA(tmp2, x_ptr[ subfr_length - n + k ], Atmp1); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ { __m128i xmm_tmp1, xmm_tmp2; __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; xmm_tmp1 = _mm_set1_epi32(tmp1); xmm_tmp2 = _mm_set1_epi32(tmp2); for(k = 0; k <= n - 3; k += 4) { xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 3 ]); xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k - 1 ]); xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 1, 2, 3)); xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32(xmm_x_ptr_n_k_x2x0, -rshifts - 1); xmm_x_ptr_sub_x2x0 = _mm_slli_epi32(xmm_x_ptr_sub_x2x0, -rshifts - 1); /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 3, 2, 1)); xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_sub_x2x0, _MM_SHUFFLE(0, 3, 2, 1)); xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32(xmm_x_ptr_n_k_x2x0, xmm_tmp1); xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32(xmm_x_ptr_n_k_x3x1, xmm_tmp1); xmm_x_ptr_sub_x2x0 = _mm_mul_epi32(xmm_x_ptr_sub_x2x0, xmm_tmp2); xmm_x_ptr_sub_x3x1 = _mm_mul_epi32(xmm_x_ptr_sub_x3x1, xmm_tmp2); xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64(xmm_x_ptr_n_k_x2x0, 16); xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64(xmm_x_ptr_n_k_x3x1, 16); xmm_x_ptr_sub_x2x0 = _mm_srli_epi64(xmm_x_ptr_sub_x2x0, 16); xmm_x_ptr_sub_x3x1 = _mm_slli_epi64(xmm_x_ptr_sub_x3x1, 16); xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16(xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC); xmm_x_ptr_sub_x2x0 = _mm_blend_epi16(xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC); X1_3210 = _mm_loadu_si128((__m128i *)&CAf[ k ]); PTR_3210 = _mm_loadu_si128((__m128i *)&CAb[ k ]); X1_3210 = _mm_add_epi32(X1_3210, xmm_x_ptr_n_k_x2x0); PTR_3210 = _mm_add_epi32(PTR_3210, xmm_x_ptr_sub_x2x0); _mm_storeu_si128((__m128i *)&CAf[ k ], X1_3210); _mm_storeu_si128((__m128i *)&CAb[ k ], PTR_3210); } for(; k <= n; k++) { CAf[ k ] = silk_SMLAWW(CAf[ k ], tmp1, silk_LSHIFT32((opus_int32)x_ptr[ n - k ], -rshifts - 1)); /* Q(-rshift) */ CAb[ k ] = silk_SMLAWW(CAb[ k ], tmp2, silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1)); /* Q(-rshift) */ } } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q(-rshifts) */ tmp2 = C_last_row[ n ]; /* Q(-rshifts) */ num = 0; /* Q(-rshifts) */ nrg = silk_ADD32(CAb[ 0 ], CAf[ 0 ]); /* Q(1-rshifts) */ for(k = 0; k < n; k++) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32(silk_abs(Atmp_QA)) - 1; lz = silk_min(32 - QA, lz); Atmp1 = silk_LSHIFT32(Atmp_QA, lz); /* Q(QA + lz) */ tmp1 = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(C_last_row[ n - k - 1 ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ tmp2 = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(C_first_row[ n - k - 1 ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ num = silk_ADD_LSHIFT32(num, silk_SMMUL(CAb[ n - k ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ nrg = silk_ADD_LSHIFT32(nrg, silk_SMMUL(silk_ADD32(CAb[ k + 1 ], CAf[ k + 1 ]), Atmp1), 32 - QA - lz); /* Q(1-rshifts) */ } CAf[ n + 1 ] = tmp1; /* Q(-rshifts) */ CAb[ n + 1 ] = tmp2; /* Q(-rshifts) */ num = silk_ADD32(num, tmp2); /* Q(-rshifts) */ num = silk_LSHIFT32(-num, 1); /* Q(1-rshifts) */ /* Calculate the next order reflection (parcor) coefficient */ if(silk_abs(num) < nrg) { rc_Q31 = silk_DIV32_varQ(num, nrg, 31); } else { rc_Q31 = (num > 0) ? silk_int32_MAX : silk_int32_MIN; } /* Update inverse prediction gain */ tmp1 = ((opus_int32)1 << 30) - silk_SMMUL(rc_Q31, rc_Q31); tmp1 = silk_LSHIFT(silk_SMMUL(invGain_Q30, tmp1), 2); if(tmp1 <= minInvGain_Q30) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ((opus_int32)1 << 30) - silk_DIV32_varQ(minInvGain_Q30, invGain_Q30, 30); /* Q30 */ rc_Q31 = silk_SQRT_APPROX(tmp2); /* Q15 */ /* Newton-Raphson iteration */ rc_Q31 = silk_RSHIFT32(rc_Q31 + silk_DIV32(tmp2, rc_Q31), 1); /* Q15 */ rc_Q31 = silk_LSHIFT32(rc_Q31, 16); /* Q31 */ if(num < 0) { /* Ensure adjusted reflection coefficients has the original sign */ rc_Q31 = -rc_Q31; } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; } else { invGain_Q30 = tmp1; } /* Update the AR coefficients */ for(k = 0; k < (n + 1) >> 1; k++) { tmp1 = Af_QA[ k ]; /* QA */ tmp2 = Af_QA[ n - k - 1 ]; /* QA */ Af_QA[ k ] = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1); /* QA */ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1); /* QA */ } Af_QA[ n ] = silk_RSHIFT32(rc_Q31, 31 - QA); /* QA */ if(reached_max_gain) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for(k = n + 1; k < D; k++) { Af_QA[ k ] = 0; } break; } /* Update C * Af and C * Ab */ for(k = 0; k <= n + 1; k++) { tmp1 = CAf[ k ]; /* Q(-rshifts) */ tmp2 = CAb[ n - k + 1 ]; /* Q(-rshifts) */ CAf[ k ] = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1); /* Q(-rshifts) */ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1); /* Q(-rshifts) */ } } if(reached_max_gain) { for(k = 0; k < D; k++) { /* Scale coefficients */ A_Q16[ k ] = -silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16); } /* Subtract energy of preceding samples from C0 */ if(rshifts > 0) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; C0 -= (opus_int32)silk_RSHIFT64(silk_inner_prod16_aligned_64(x_ptr, x_ptr, D, arch), rshifts); } } else { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; C0 -= silk_LSHIFT32(silk_inner_prod_aligned(x_ptr, x_ptr, D, arch), -rshifts); } } /* Approximate residual energy */ *res_nrg = silk_LSHIFT(silk_SMMUL(invGain_Q30, C0), 2); *res_nrg_Q = -rshifts; } else { /* Return residual energy */ nrg = CAf[ 0 ]; /* Q(-rshifts) */ tmp1 = (opus_int32)1 << 16; /* Q16 */ for(k = 0; k < D; k++) { Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16); /* Q16 */ nrg = silk_SMLAWW(nrg, CAf[ k + 1 ], Atmp1); /* Q(-rshifts) */ tmp1 = silk_SMLAWW(tmp1, Atmp1, Atmp1); /* Q16 */ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW(nrg, silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0), -tmp1);/* Q(-rshifts) */ *res_nrg_Q = -rshifts; } }
/* Compute reflection coefficients from input signal */ void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int D /* I Order */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); if( rshifts > MAX_RSHIFTS ) { C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); silk_assert( C0 > 0 ); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32( C0 ) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if( rshifts_extra > 0 ) { rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); C0 = silk_RSHIFT32( C0, rshifts_extra ); } else { rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); C0 = silk_LSHIFT32( C0, -rshifts_extra ); } rshifts += rshifts_extra; } CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts ); } } } silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ invGain_Q30 = (opus_int32)1 << 30; reached_max_gain = 0; for( n = 0; n < D; n++ ) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if( rshifts > -2 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ } tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ num = 0; /* Q( -rshifts ) */ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ for( k = 0; k < n; k++ ) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; lz = silk_min( 32 - QA, lz ); Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ } CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ /* Calculate the next order reflection (parcor) coefficient */ if( silk_abs( num ) < nrg ) { rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); } else { rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; } /* Update inverse prediction gain */ tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); if( tmp1 <= minInvGain_Q30 ) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ /* Newton-Raphson iteration */ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ if( num < 0 ) { /* Ensure adjusted reflection coefficients has the original sign */ rc_Q31 = -rc_Q31; } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; } else { invGain_Q30 = tmp1; } /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { tmp1 = Af_QA[ k ]; /* QA */ tmp2 = Af_QA[ n - k - 1 ]; /* QA */ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ } Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ if( reached_max_gain ) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for( k = n + 1; k < D; k++ ) { Af_QA[ k ] = 0; } break; } /* Update C * Af and C * Ab */ for( k = 0; k <= n + 1; k++ ) { tmp1 = CAf[ k ]; /* Q( -rshifts ) */ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ } } if( reached_max_gain ) { for( k = 0; k < D; k++ ) { /* Scale coefficients */ A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); } /* Subtract energy of preceding samples from C0 */ if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts ); } } /* Approximate residual energy */ *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); *res_nrg_Q = -rshifts; } else { /* Return residual energy */ nrg = CAf[ 0 ]; /* Q( -rshifts ) */ tmp1 = (opus_int32)1 << 16; /* Q16 */ for( k = 0; k < D; k++ ) { Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( FIND_LPC_COND_FAC, C0 ), -tmp1 ); /* Q( -rshifts ) */ *res_nrg_Q = -rshifts; } }