void silk_find_LTP_FIX( opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ const opus_int subfr_length, /* I subframe length */ const opus_int nb_subfr, /* I number of subframes */ const opus_int mem_offset, /* I number of samples in LTP memory */ opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ ) { opus_int i, k, lshift; const opus_int16 *r_ptr, *lag_ptr; opus_int16 *b_Q14_ptr; opus_int32 regu; opus_int32 *WLTP_ptr; opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; opus_int32 temp32, denom32; opus_int extra_shifts; opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; opus_int32 wd, m_Q12; b_Q14_ptr = b_Q14; WLTP_ptr = WLTP; r_ptr = &r_lpc[ mem_offset ]; for( k = 0; k < nb_subfr; k++ ) { lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ /* Assure headroom */ LZs = silk_CLZ32( rr[k] ); if( LZs < LTP_CORRS_HEAD_ROOM ) { rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); } corr_rshifts[ k ] = rr_shifts; silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ if( corr_rshifts[ k ] > rr_shifts ) { rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ } silk_assert( rr[ k ] >= 0 ); regu = 1; regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ /* Limit and store in Q14 */ silk_fit_LTP( b_Q16, b_Q14_ptr ); /* Calculate residual energy */ nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ denom32 = silk_max( denom32, 1 ); silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ /* Limit temp such that the below scaling never wraps around */ WLTP_max = 0; for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); } lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ silk_assert( 26 - 18 + lshift >= 0 ); if( 26 - 18 + lshift < 31 ) { temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); } silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ silk_assert( w[k] >= 0 ); r_ptr += subfr_length; b_Q14_ptr += LTP_ORDER; WLTP_ptr += LTP_ORDER * LTP_ORDER; } maxRshifts = 0; for( k = 0; k < nb_subfr; k++ ) { maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); } /* Compute LTP coding gain */ if( LTPredCodGain_Q7 != NULL ) { LPC_LTP_res_nrg = 0; LPC_res_nrg = 0; silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ for( k = 0; k < nb_subfr; k++ ) { LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ } LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); } /* smoothing */ /* d = sum( B, 1 ); */ b_Q14_ptr = b_Q14; for( k = 0; k < nb_subfr; k++ ) { d_Q14[ k ] = 0; for( i = 0; i < LTP_ORDER; i++ ) { d_Q14[ k ] += b_Q14_ptr[ i ]; } b_Q14_ptr += LTP_ORDER; } /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ max_abs_d_Q14 = 0; max_w_bits = 0; for( k = 0; k < nb_subfr; k++ ) { max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ /* Find bits needed in Q( 18 - maxRshifts ) */ max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); } /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; /* Subtract what we got available; bits in output var plus maxRshifts */ extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ extra_shifts = silk_max_int( extra_shifts, 0 ); maxRshifts_wxtra = maxRshifts + extra_shifts; temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ wd = 0; for( k = 0; k < nb_subfr; k++ ) { /* w has at least 2 bits of headroom so no overflow should happen */ temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ } m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); b_Q14_ptr = b_Q14; for( k = 0; k < nb_subfr; k++ ) { /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ if( 2 - corr_rshifts[k] > 0 ) { temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); } else { temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); } g_Q26 = silk_MUL( silk_DIV32( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ temp32 = 0; for( i = 0; i < LTP_ORDER; i++ ) { delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ temp32 += delta_b_Q14[ i ]; /* Q14 */ } temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ for( i = 0; i < LTP_ORDER; i++ ) { b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); } b_Q14_ptr += LTP_ORDER; } }
SKP_INLINE void SKP_Silk_LDL_factorize_FIX( SKP_int32 *A, /* I Pointer to Symetric Square Matrix */ SKP_int M, /* I Size of Matrix */ SKP_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ) { SKP_int i, j, k, status, loop_count; const SKP_int32 *ptr1, *ptr2; SKP_int32 diag_min_value, tmp_32, err; SKP_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; SKP_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; SKP_assert( M <= MAX_MATRIX_SIZE ); status = 1; diag_min_value = SKP_max_32( SKP_SMMUL( SKP_ADD_SAT32( A[ 0 ], A[ SKP_SMULBB( M, M ) - 1 ] ), FIND_LTP_COND_FAC_Q31 ), 1 << 9 ); for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { status = 0; for( j = 0; j < M; j++ ) { ptr1 = matrix_adr( L_Q16, j, 0, M ); tmp_32 = 0; for( i = 0; i < j; i++ ) { v_Q0[ i ] = SKP_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ tmp_32 = SKP_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ } tmp_32 = SKP_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); if( tmp_32 < diag_min_value ) { tmp_32 = SKP_SUB32( SKP_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); /* Matrix not positive semi-definite, or ill conditioned */ for( i = 0; i < M; i++ ) { matrix_ptr( A, i, i, M ) = SKP_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); } status = 1; break; } D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ /* two-step division */ one_div_diag_Q36 = SKP_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ one_div_diag_Q40 = SKP_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ err = SKP_SUB32( 1 << 24, SKP_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ one_div_diag_Q48 = SKP_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ /* Save 1/Ds */ inv_D[ j ].Q36_part = one_div_diag_Q36; inv_D[ j ].Q48_part = one_div_diag_Q48; matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ ptr1 = matrix_adr( A, j, 0, M ); ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); for( i = j + 1; i < M; i++ ) { tmp_32 = 0; for( k = 0; k < j; k++ ) { tmp_32 = SKP_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ } tmp_32 = SKP_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ /* tmp_32 / D_Q0[j] : Divide to Q16 */ matrix_ptr( L_Q16, i, j, M ) = SKP_ADD32( SKP_SMMUL( tmp_32, one_div_diag_Q48 ), SKP_RSHIFT( SKP_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); /* go to next column */ ptr2 += M; } } } SKP_assert( status == 0 ); }
/* Calculates correlation matrix X'*X */ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ opus_int *rshifts /* I/O Right shifts of correlations */ ) { opus_int i, j, lag, rshifts_local, head_room_rshifts; opus_int32 energy; const opus_int16 *ptr1, *ptr2; /* Calculate energy to find shift used to fit in 32 bits */ silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); /* Add shifts to get the desired head room */ head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); energy = silk_RSHIFT32( energy, head_room_rshifts ); rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for( i = 0; i < order - 1; i++ ) { energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); } if( rshifts_local < *rshifts ) { /* Adjust energy */ energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); rshifts_local = *rshifts; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ matrix_ptr( XX, 0, 0, order ) = energy; ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ for( j = 1; j < order; j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); matrix_ptr( XX, j, j, order ) = energy; } ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ if( rshifts_local > 0 ) { /* Right shifting used */ for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for( i = 0; i < L; i++ ) { energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--; /* Update pointer to first sample of next column (lag) in X */ } } else { for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = silk_inner_prod_aligned( ptr1, ptr2, L ); matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } *rshifts = rshifts_local; }