/* Polynomial evaluation */ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ opus_int32 *p, /* I Polynomial, Q16 */ const opus_int32 x, /* I Evaluation point, Q12 */ const opus_int dd /* I Order */ ) { opus_int n; opus_int32 x_Q16, y32; y32 = p[ dd ]; /* Q16 */ x_Q16 = silk_LSHIFT( x, 4 ); if ( opus_likely( 8 == dd ) ) { y32 = (opus_int32) silk_SMLAWW( p[ 7 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 6 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 5 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 4 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 3 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 2 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 1 ], y32, x_Q16 ); y32 = (opus_int32) silk_SMLAWW( p[ 0 ], y32, x_Q16 ); } else { for( n = dd - 1; n >= 0; n-- ) { y32 = (opus_int32) silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ } } return y32; }
/* Polynomial evaluation */ static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ opus_int32 *p, /* I Polynomial, Q16 */ const opus_int32 x, /* I Evaluation point, Q12 */ const opus_int dd /* I Order */ ) { opus_int n; opus_int32 x_Q16, y32; y32 = p[ dd ]; /* Q16 */ x_Q16 = silk_LSHIFT( x, 4 ); for( n = dd - 1; n >= 0; n-- ) { y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ } return y32; }
/* Polynomial evaluation */ static inline int32_t silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ int32_t * p, /* I Polynomial, Q16 */ const int32_t x, /* I Evaluation point, Q12 */ const int dd /* I Order */ ) { int n; int32_t x_Q16, y32; y32 = p[dd]; /* Q16 */ x_Q16 = silk_LSHIFT(x, 4); for (n = dd - 1; n >= 0; n--) { y32 = silk_SMLAWW(p[n], y32, x_Q16); /* Q16 */ } return y32; }
/* Processing of gains */ void silk_process_gains_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ opus_int condCoding /* I The type of conditional coding to use */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; opus_int k; opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; /* Gain reduction when LTP coding gain is high */ if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); } } /* Limit the quantized signal */ /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { /* Soft limit on ratio residual energy and squared gains */ ResNrg = psEncCtrl->ResNrg[ k ]; ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); if( psEncCtrl->ResNrgQ[ k ] > 0 ) { ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); } else { if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { ResNrgPart = silk_int32_MAX; } else { ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); } } gain = psEncCtrl->Gains_Q16[ k ]; gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); if( gain_squared < silk_int16_MAX ) { /* recalculate with higher precision */ gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); silk_assert( gain_squared > 0 ); gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ gain = silk_min( gain, silk_int32_MAX >> 8 ); psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ } else {
/* Step up function, converts reflection coefficients to prediction coefficients */ void silk_k2a_Q16( opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ const opus_int32 order /* I Prediction order */ ) { opus_int k, n; opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; for( k = 0; k < order; k++ ) { for( n = 0; n < k; n++ ) { Atmp[ n ] = A_Q24[ n ]; } for( n = 0; n < k; n++ ) { A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); } A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); } }
/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ static OPUS_INLINE void silk_LS_SolveLast_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ const opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32; opus_int32 tmp_32; for( i = M - 1; i >= 0; i-- ) { ptr32 = matrix_adr( L_Q16, 0, i, M ); tmp_32 = 0; for( j = M - 1; j > i; j-- ) { tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); } x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); } }
/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ static OPUS_INLINE void silk_LS_SolveFirst_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32; opus_int32 tmp_32; for( i = 0; i < M; i++ ) { ptr32 = matrix_adr( L_Q16, i, 0, M ); tmp_32 = 0; for( j = 0; j < i; j++ ) { tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); } x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); } }
static OPUS_INLINE void silk_LDL_factorize_FIX( opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ) { opus_int i, j, k, status, loop_count; const opus_int32 *ptr1, *ptr2; opus_int32 diag_min_value, tmp_32, err; opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; silk_assert( M <= MAX_MATRIX_SIZE ); status = 1; diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { status = 0; for( j = 0; j < M; j++ ) { ptr1 = matrix_adr( L_Q16, j, 0, M ); tmp_32 = 0; for( i = 0; i < j; i++ ) { v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ } tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); if( tmp_32 < diag_min_value ) { tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); /* Matrix not positive semi-definite, or ill conditioned */ for( i = 0; i < M; i++ ) { matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); } status = 1; break; } D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ /* two-step division */ one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ /* Save 1/Ds */ inv_D[ j ].Q36_part = one_div_diag_Q36; inv_D[ j ].Q48_part = one_div_diag_Q48; matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ ptr1 = matrix_adr( A, j, 0, M ); ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); for( i = j + 1; i < M; i++ ) { tmp_32 = 0; for( k = 0; k < j; k++ ) { tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ } tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ /* tmp_32 / D_Q0[j] : Divide to Q16 */ matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); /* go to next column */ ptr2 += M; } } } silk_assert( status == 0 ); }
/* Compute reflection coefficients from input signal */ void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceeding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int32 WhiteNoiseFrac_Q32, /* I Fraction added to zero-lag autocorrelation */ const opus_int D /* I Order */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra; opus_int32 C0, num, nrg, rc_Q31, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); silk_assert( nb_subfr <= MAX_NB_SUBFR ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); if( rshifts > MAX_RSHIFTS ) { C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); silk_assert( C0 > 0 ); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32( C0 ) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if( rshifts_extra > 0 ) { rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); C0 = silk_RSHIFT32( C0, rshifts_extra ); } else { rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); C0 = silk_LSHIFT32( C0, -rshifts_extra ); } rshifts += rshifts_extra; } silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts ); } } } silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( WhiteNoiseFrac_Q32, C0 ) + 1; /* Q(-rshifts)*/ for( n = 0; n < D; n++ ) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if( rshifts > -2 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts)*/ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts)*/ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16)*/ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16)*/ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts )*/ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16)*/ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16)*/ } tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts)*/ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts)*/ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift )*/ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift )*/ } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts )*/ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts )*/ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17*/ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17*/ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts )*/ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17*/ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17*/ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17*/ } tmp1 = -tmp1; /* Q17*/ tmp2 = -tmp2; /* Q17*/ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift )*/ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift )*/ } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q( -rshifts )*/ tmp2 = C_last_row[ n ]; /* Q( -rshifts )*/ num = 0; /* Q( -rshifts )*/ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts )*/ for( k = 0; k < n; k++ ) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; lz = silk_min( 32 - QA, lz ); Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz )*/ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts )*/ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts )*/ } CAf[ n + 1 ] = tmp1; /* Q( -rshifts )*/ CAb[ n + 1 ] = tmp2; /* Q( -rshifts )*/ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts )*/ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts )*/ /* Calculate the next order reflection (parcor) coefficient */ if( silk_abs( num ) < nrg ) { rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); } else { /* Negative energy or ratio too high; set remaining coefficients to zero and exit loop */ silk_memset( &Af_QA[ n ], 0, ( D - n ) * sizeof( opus_int32 ) ); silk_assert( 0 ); break; } /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { tmp1 = Af_QA[ k ]; /* QA*/ tmp2 = Af_QA[ n - k - 1 ]; /* QA*/ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA*/ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA*/ } Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA*/ /* Update C * Af and C * Ab */ for( k = 0; k <= n + 1; k++ ) { tmp1 = CAf[ k ]; /* Q( -rshifts )*/ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts )*/ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts )*/ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts )*/ } } /* Return residual energy */ nrg = CAf[ 0 ]; /* Q( -rshifts )*/ tmp1 = 1 << 16; /* Q16*/ for( k = 0; k < D; k++ ) { Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16*/ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts )*/ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16*/ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( WhiteNoiseFrac_Q32, C0 ), -tmp1 ); /* Q( -rshifts )*/ *res_nrg_Q = -rshifts; }
/* Compute reflection coefficients from input signal */ void silk_burg_modified_sse4_1( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * (D + subfr_length) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int D, /* I Order */ int arch /* I Run-time architecture */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210; __m128i CONST1 = _mm_set1_epi32(1); silk_assert(subfr_length * nb_subfr <= MAX_FRAME_SIZE); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift(&C0, &rshifts, x, nb_subfr * subfr_length); if(rshifts > MAX_RSHIFTS) { C0 = silk_LSHIFT32(C0, rshifts - MAX_RSHIFTS); silk_assert(C0 > 0); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32(C0) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if(rshifts_extra > 0) { rshifts_extra = silk_min(rshifts_extra, MAX_RSHIFTS - rshifts); C0 = silk_RSHIFT32(C0, rshifts_extra); } else { rshifts_extra = silk_max(rshifts_extra, MIN_RSHIFTS - rshifts); C0 = silk_LSHIFT32(C0, -rshifts_extra); } rshifts += rshifts_extra; } CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1; /* Q(-rshifts) */ silk_memset(C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof(opus_int32)); if(rshifts > 0) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; for(n = 1; n < D + 1; n++) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64(x_ptr, x_ptr + n, subfr_length - n, arch), rshifts); } } } else { for(s = 0; s < nb_subfr; s++) { int i; opus_int32 d; x_ptr = x + s * subfr_length; celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch); for(n = 1; n < D + 1; n++) { for (i = n + subfr_length - D, d = 0; i < subfr_length; i++) d = MAC16_16(d, x_ptr[ i ], x_ptr[ i - n ]); xcorr[ n - 1 ] += d; } for(n = 1; n < D + 1; n++) { C_first_row[ n - 1 ] += silk_LSHIFT32(xcorr[ n - 1 ], -rshifts); } } } silk_memcpy(C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof(opus_int32)); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1; /* Q(-rshifts) */ invGain_Q30 = (opus_int32)1 << 30; reached_max_gain = 0; for(n = 0; n < D; n++) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if(rshifts > -2) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32((opus_int32)x_ptr[ n ], 16 - rshifts); /* Q(16-rshifts) */ x2 = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts); /* Q(16-rshifts) */ tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ], QA - 16); /* Q(QA-16) */ tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16); /* Q(QA-16) */ for(k = 0; k < n; k++) { C_first_row[ k ] = silk_SMLAWB(C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q(-rshifts) */ C_last_row[ k ] = silk_SMLAWB(C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB(tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ tmp2 = silk_SMLAWB(tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ]); /* Q(QA-16) */ } tmp1 = silk_LSHIFT32(-tmp1, 32 - QA - rshifts); /* Q(16-rshifts) */ tmp2 = silk_LSHIFT32(-tmp2, 32 - QA - rshifts); /* Q(16-rshifts) */ for(k = 0; k <= n; k++) { CAf[ k ] = silk_SMLAWB(CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q(-rshift) */ CAb[ k ] = silk_SMLAWB(CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ]); /* Q(-rshift) */ } } } else { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32((opus_int32)x_ptr[ n ], -rshifts); /* Q(-rshifts) */ x2 = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts); /* Q(-rshifts) */ tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ], 17); /* Q17 */ tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 17); /* Q17 */ X1_3210 = _mm_set1_epi32(x1); X2_3210 = _mm_set1_epi32(x2); TMP1_3210 = _mm_setzero_si128(); TMP2_3210 = _mm_setzero_si128(); for(k = 0; k < n - 3; k += 4) { PTR_3210 = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 1 - 3 ]); SUBFR_3210 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k ]); FIRST_3210 = _mm_loadu_si128((__m128i *)&C_first_row[ k ]); PTR_3210 = _mm_shuffle_epi32(PTR_3210, _MM_SHUFFLE(0, 1, 2, 3)); LAST_3210 = _mm_loadu_si128((__m128i *)&C_last_row[ k ]); ATMP_3210 = _mm_loadu_si128((__m128i *)&Af_QA[ k ]); T1_3210 = _mm_mullo_epi32(PTR_3210, X1_3210); T2_3210 = _mm_mullo_epi32(SUBFR_3210, X2_3210); ATMP_3210 = _mm_srai_epi32(ATMP_3210, 7); ATMP_3210 = _mm_add_epi32(ATMP_3210, CONST1); ATMP_3210 = _mm_srai_epi32(ATMP_3210, 1); FIRST_3210 = _mm_add_epi32(FIRST_3210, T1_3210); LAST_3210 = _mm_add_epi32(LAST_3210, T2_3210); PTR_3210 = _mm_mullo_epi32(ATMP_3210, PTR_3210); SUBFR_3210 = _mm_mullo_epi32(ATMP_3210, SUBFR_3210); _mm_storeu_si128((__m128i *)&C_first_row[ k ], FIRST_3210); _mm_storeu_si128((__m128i *)&C_last_row[ k ], LAST_3210); TMP1_3210 = _mm_add_epi32(TMP1_3210, PTR_3210); TMP2_3210 = _mm_add_epi32(TMP2_3210, SUBFR_3210); } TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210)); TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210)); TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E)); TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E)); tmp1 += _mm_cvtsi128_si32(TMP1_3210); tmp2 += _mm_cvtsi128_si32(TMP2_3210); for(; k < n; k++) { C_first_row[ k ] = silk_MLA(C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q(-rshifts) */ C_last_row[ k ] = silk_MLA(C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */ Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 17); /* Q17 */ tmp1 = silk_MLA(tmp1, x_ptr[ n - k - 1 ], Atmp1); /* Q17 */ tmp2 = silk_MLA(tmp2, x_ptr[ subfr_length - n + k ], Atmp1); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ { __m128i xmm_tmp1, xmm_tmp2; __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1; __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1; xmm_tmp1 = _mm_set1_epi32(tmp1); xmm_tmp2 = _mm_set1_epi32(tmp2); for(k = 0; k <= n - 3; k += 4) { xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 3 ]); xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k - 1 ]); xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 1, 2, 3)); xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32(xmm_x_ptr_n_k_x2x0, -rshifts - 1); xmm_x_ptr_sub_x2x0 = _mm_slli_epi32(xmm_x_ptr_sub_x2x0, -rshifts - 1); /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/ xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 3, 2, 1)); xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_sub_x2x0, _MM_SHUFFLE(0, 3, 2, 1)); xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32(xmm_x_ptr_n_k_x2x0, xmm_tmp1); xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32(xmm_x_ptr_n_k_x3x1, xmm_tmp1); xmm_x_ptr_sub_x2x0 = _mm_mul_epi32(xmm_x_ptr_sub_x2x0, xmm_tmp2); xmm_x_ptr_sub_x3x1 = _mm_mul_epi32(xmm_x_ptr_sub_x3x1, xmm_tmp2); xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64(xmm_x_ptr_n_k_x2x0, 16); xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64(xmm_x_ptr_n_k_x3x1, 16); xmm_x_ptr_sub_x2x0 = _mm_srli_epi64(xmm_x_ptr_sub_x2x0, 16); xmm_x_ptr_sub_x3x1 = _mm_slli_epi64(xmm_x_ptr_sub_x3x1, 16); xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16(xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC); xmm_x_ptr_sub_x2x0 = _mm_blend_epi16(xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC); X1_3210 = _mm_loadu_si128((__m128i *)&CAf[ k ]); PTR_3210 = _mm_loadu_si128((__m128i *)&CAb[ k ]); X1_3210 = _mm_add_epi32(X1_3210, xmm_x_ptr_n_k_x2x0); PTR_3210 = _mm_add_epi32(PTR_3210, xmm_x_ptr_sub_x2x0); _mm_storeu_si128((__m128i *)&CAf[ k ], X1_3210); _mm_storeu_si128((__m128i *)&CAb[ k ], PTR_3210); } for(; k <= n; k++) { CAf[ k ] = silk_SMLAWW(CAf[ k ], tmp1, silk_LSHIFT32((opus_int32)x_ptr[ n - k ], -rshifts - 1)); /* Q(-rshift) */ CAb[ k ] = silk_SMLAWW(CAb[ k ], tmp2, silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1)); /* Q(-rshift) */ } } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q(-rshifts) */ tmp2 = C_last_row[ n ]; /* Q(-rshifts) */ num = 0; /* Q(-rshifts) */ nrg = silk_ADD32(CAb[ 0 ], CAf[ 0 ]); /* Q(1-rshifts) */ for(k = 0; k < n; k++) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32(silk_abs(Atmp_QA)) - 1; lz = silk_min(32 - QA, lz); Atmp1 = silk_LSHIFT32(Atmp_QA, lz); /* Q(QA + lz) */ tmp1 = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(C_last_row[ n - k - 1 ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ tmp2 = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(C_first_row[ n - k - 1 ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ num = silk_ADD_LSHIFT32(num, silk_SMMUL(CAb[ n - k ], Atmp1), 32 - QA - lz); /* Q(-rshifts) */ nrg = silk_ADD_LSHIFT32(nrg, silk_SMMUL(silk_ADD32(CAb[ k + 1 ], CAf[ k + 1 ]), Atmp1), 32 - QA - lz); /* Q(1-rshifts) */ } CAf[ n + 1 ] = tmp1; /* Q(-rshifts) */ CAb[ n + 1 ] = tmp2; /* Q(-rshifts) */ num = silk_ADD32(num, tmp2); /* Q(-rshifts) */ num = silk_LSHIFT32(-num, 1); /* Q(1-rshifts) */ /* Calculate the next order reflection (parcor) coefficient */ if(silk_abs(num) < nrg) { rc_Q31 = silk_DIV32_varQ(num, nrg, 31); } else { rc_Q31 = (num > 0) ? silk_int32_MAX : silk_int32_MIN; } /* Update inverse prediction gain */ tmp1 = ((opus_int32)1 << 30) - silk_SMMUL(rc_Q31, rc_Q31); tmp1 = silk_LSHIFT(silk_SMMUL(invGain_Q30, tmp1), 2); if(tmp1 <= minInvGain_Q30) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ((opus_int32)1 << 30) - silk_DIV32_varQ(minInvGain_Q30, invGain_Q30, 30); /* Q30 */ rc_Q31 = silk_SQRT_APPROX(tmp2); /* Q15 */ /* Newton-Raphson iteration */ rc_Q31 = silk_RSHIFT32(rc_Q31 + silk_DIV32(tmp2, rc_Q31), 1); /* Q15 */ rc_Q31 = silk_LSHIFT32(rc_Q31, 16); /* Q31 */ if(num < 0) { /* Ensure adjusted reflection coefficients has the original sign */ rc_Q31 = -rc_Q31; } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; } else { invGain_Q30 = tmp1; } /* Update the AR coefficients */ for(k = 0; k < (n + 1) >> 1; k++) { tmp1 = Af_QA[ k ]; /* QA */ tmp2 = Af_QA[ n - k - 1 ]; /* QA */ Af_QA[ k ] = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1); /* QA */ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1); /* QA */ } Af_QA[ n ] = silk_RSHIFT32(rc_Q31, 31 - QA); /* QA */ if(reached_max_gain) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for(k = n + 1; k < D; k++) { Af_QA[ k ] = 0; } break; } /* Update C * Af and C * Ab */ for(k = 0; k <= n + 1; k++) { tmp1 = CAf[ k ]; /* Q(-rshifts) */ tmp2 = CAb[ n - k + 1 ]; /* Q(-rshifts) */ CAf[ k ] = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1); /* Q(-rshifts) */ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1); /* Q(-rshifts) */ } } if(reached_max_gain) { for(k = 0; k < D; k++) { /* Scale coefficients */ A_Q16[ k ] = -silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16); } /* Subtract energy of preceding samples from C0 */ if(rshifts > 0) { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; C0 -= (opus_int32)silk_RSHIFT64(silk_inner_prod16_aligned_64(x_ptr, x_ptr, D, arch), rshifts); } } else { for(s = 0; s < nb_subfr; s++) { x_ptr = x + s * subfr_length; C0 -= silk_LSHIFT32(silk_inner_prod_aligned(x_ptr, x_ptr, D, arch), -rshifts); } } /* Approximate residual energy */ *res_nrg = silk_LSHIFT(silk_SMMUL(invGain_Q30, C0), 2); *res_nrg_Q = -rshifts; } else { /* Return residual energy */ nrg = CAf[ 0 ]; /* Q(-rshifts) */ tmp1 = (opus_int32)1 << 16; /* Q16 */ for(k = 0; k < D; k++) { Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16); /* Q16 */ nrg = silk_SMLAWW(nrg, CAf[ k + 1 ], Atmp1); /* Q(-rshifts) */ tmp1 = silk_SMLAWW(tmp1, Atmp1, Atmp1); /* Q16 */ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW(nrg, silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0), -tmp1);/* Q(-rshifts) */ *res_nrg_Q = -rshifts; } }
/* Compute reflection coefficients from input signal */ void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ const opus_int D /* I Order */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); /* Compute autocorrelations, added over subframes */ silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); if( rshifts > MAX_RSHIFTS ) { C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); silk_assert( C0 > 0 ); rshifts = MAX_RSHIFTS; } else { lz = silk_CLZ32( C0 ) - 1; rshifts_extra = N_BITS_HEAD_ROOM - lz; if( rshifts_extra > 0 ) { rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); C0 = silk_RSHIFT32( C0, rshifts_extra ); } else { rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); C0 = silk_LSHIFT32( C0, -rshifts_extra ); } rshifts += rshifts_extra; } CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts ); } } } silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); /* Initialize */ CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ invGain_Q30 = (opus_int32)1 << 30; reached_max_gain = 0; for( n = 0; n < D; n++ ) { /* Update first row of correlation matrix (without first element) */ /* Update last row of correlation matrix (without last element, stored in reversed order) */ /* Update C * Af */ /* Update C * flipud(Af) (stored in reversed order) */ if( rshifts > -2 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ } tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ for( k = 0; k < n; k++ ) { C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ } tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ for( k = 0; k <= n; k++ ) { CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ num = 0; /* Q( -rshifts ) */ nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ for( k = 0; k < n; k++ ) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; lz = silk_min( 32 - QA, lz ); Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ } CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ /* Calculate the next order reflection (parcor) coefficient */ if( silk_abs( num ) < nrg ) { rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); } else { rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; } /* Update inverse prediction gain */ tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); if( tmp1 <= minInvGain_Q30 ) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ /* Newton-Raphson iteration */ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ if( num < 0 ) { /* Ensure adjusted reflection coefficients has the original sign */ rc_Q31 = -rc_Q31; } invGain_Q30 = minInvGain_Q30; reached_max_gain = 1; } else { invGain_Q30 = tmp1; } /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { tmp1 = Af_QA[ k ]; /* QA */ tmp2 = Af_QA[ n - k - 1 ]; /* QA */ Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ } Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ if( reached_max_gain ) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for( k = n + 1; k < D; k++ ) { Af_QA[ k ] = 0; } break; } /* Update C * Af and C * Ab */ for( k = 0; k <= n + 1; k++ ) { tmp1 = CAf[ k ]; /* Q( -rshifts ) */ tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ } } if( reached_max_gain ) { for( k = 0; k < D; k++ ) { /* Scale coefficients */ A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); } /* Subtract energy of preceding samples from C0 */ if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts ); } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts ); } } /* Approximate residual energy */ *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); *res_nrg_Q = -rshifts; } else { /* Return residual energy */ nrg = CAf[ 0 ]; /* Q( -rshifts ) */ tmp1 = (opus_int32)1 << 16; /* Q16 */ for( k = 0; k < D; k++ ) { Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ A_Q16[ k ] = -Atmp1; } *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( FIND_LPC_COND_FAC, C0 ), -tmp1 ); /* Q( -rshifts ) */ *res_nrg_Q = -rshifts; } }