/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ static inline void silk_LS_SolveLast_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ const opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32; opus_int32 tmp_32; for( i = M - 1; i >= 0; i-- ) { ptr32 = matrix_adr( L_Q16, 0, i, M ); tmp_32 = 0; for( j = M - 1; j > i; j-- ) { tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); } x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); } }
/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ static OPUS_INLINE void silk_LS_SolveFirst_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32; opus_int32 tmp_32; for( i = 0; i < M; i++ ) { ptr32 = matrix_adr( L_Q16, i, 0, M ); tmp_32 = 0; for( j = 0; j < i; j++ ) { tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); } x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); } }
static OPUS_INLINE void silk_LDL_factorize_FIX( opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ) { opus_int i, j, k, status, loop_count; const opus_int32 *ptr1, *ptr2; opus_int32 diag_min_value, tmp_32, err; opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; silk_assert( M <= MAX_MATRIX_SIZE ); status = 1; diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { status = 0; for( j = 0; j < M; j++ ) { ptr1 = matrix_adr( L_Q16, j, 0, M ); tmp_32 = 0; for( i = 0; i < j; i++ ) { v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ } tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); if( tmp_32 < diag_min_value ) { tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); /* Matrix not positive semi-definite, or ill conditioned */ for( i = 0; i < M; i++ ) { matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); } status = 1; break; } D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ /* two-step division */ one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ /* Save 1/Ds */ inv_D[ j ].Q36_part = one_div_diag_Q36; inv_D[ j ].Q48_part = one_div_diag_Q48; matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ ptr1 = matrix_adr( A, j, 0, M ); ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); for( i = j + 1; i < M; i++ ) { tmp_32 = 0; for( k = 0; k < j; k++ ) { tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ } tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ /* tmp_32 / D_Q0[j] : Divide to Q16 */ matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); /* go to next column */ ptr2 += M; } } } silk_assert( status == 0 ); }
/* notch filter just above Nyquist. */ void silk_resampler_private_up2_HQ( opus_int32 *S, /* I/O Resampler state [ 6 ] */ opus_int16 *out, /* O Output signal [ 2 * len ] */ const opus_int16 *in, /* I Input signal [ len ] */ opus_int32 len /* I Number of input samples */ ) { opus_int32 k; opus_int32 in32, out32_1, out32_2, Y, X; silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 ); silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 ); silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 ); silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 ); silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 ); silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 ); /* Internal variables and state are in Q10 format */ for( k = 0; k < len; k++ ) { /* Convert to Q10 */ in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 ); /* First all-pass section for even output sample */ Y = silk_SUB32( in32, S[ 0 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] ); out32_1 = silk_ADD32( S[ 0 ], X ); S[ 0 ] = silk_ADD32( in32, X ); /* Second all-pass section for even output sample */ Y = silk_SUB32( out32_1, S[ 1 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] ); out32_2 = silk_ADD32( S[ 1 ], X ); S[ 1 ] = silk_ADD32( out32_1, X ); /* Third all-pass section for even output sample */ Y = silk_SUB32( out32_2, S[ 2 ] ); X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] ); out32_1 = silk_ADD32( S[ 2 ], X ); S[ 2 ] = silk_ADD32( out32_2, X ); /* Apply gain in Q15, convert back to int16 and store to output */ out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); /* First all-pass section for odd output sample */ Y = silk_SUB32( in32, S[ 3 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] ); out32_1 = silk_ADD32( S[ 3 ], X ); S[ 3 ] = silk_ADD32( in32, X ); /* Second all-pass section for odd output sample */ Y = silk_SUB32( out32_1, S[ 4 ] ); X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] ); out32_2 = silk_ADD32( S[ 4 ], X ); S[ 4 ] = silk_ADD32( out32_1, X ); /* Third all-pass section for odd output sample */ Y = silk_SUB32( out32_2, S[ 5 ] ); X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] ); out32_1 = silk_ADD32( S[ 5 ], X ); S[ 5 ] = silk_ADD32( out32_2, X ); /* Apply gain in Q15, convert back to int16 and store to output */ out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); } }
/* test if LPC coefficients are stable (all poles within unit circle) */ static opus_int32 LPC_inverse_pred_gain_QA_c( /* O Returns inverse prediction gain in energy domain, Q30 */ opus_int32 A_QA[ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ const opus_int order /* I Prediction order */ ) { opus_int k, n, mult2Q; opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp1, tmp2; invGain_Q30 = SILK_FIX_CONST( 1, 30 ); for( k = order - 1; k > 0; k-- ) { /* Check for stability */ if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { return 0; } /* Set RC equal to negated AR coef */ rc_Q31 = -silk_LSHIFT( A_QA[ k ], 31 - QA ); /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); /* Update inverse gain */ /* invGain_Q30 range: [ 0 : 2^30 ] */ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); silk_assert( invGain_Q30 >= 0 ); silk_assert( invGain_Q30 <= ( 1 << 30 ) ); if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { return 0; } /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); /* Update AR coefficient */ for( n = 0; n < (k + 1) >> 1; n++ ) { opus_int64 tmp64; tmp1 = A_QA[ n ]; tmp2 = A_QA[ k - n - 1 ]; tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp1, MUL32_FRAC_Q( tmp2, rc_Q31, 31 ) ), rc_mult2 ), mult2Q); if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { return 0; } A_QA[ n ] = ( opus_int32 )tmp64; tmp64 = silk_RSHIFT_ROUND64( silk_SMULL( silk_SUB_SAT32(tmp2, MUL32_FRAC_Q( tmp1, rc_Q31, 31 ) ), rc_mult2), mult2Q); if( tmp64 > silk_int32_MAX || tmp64 < silk_int32_MIN ) { return 0; } A_QA[ k - n - 1 ] = ( opus_int32 )tmp64; } } /* Check for stability */ if( ( A_QA[ k ] > A_LIMIT ) || ( A_QA[ k ] < -A_LIMIT ) ) { return 0; } /* Set RC equal to negated AR coef */ rc_Q31 = -silk_LSHIFT( A_QA[ 0 ], 31 - QA ); /* Range: [ 1 : 2^30 ] */ rc_mult1_Q30 = silk_SUB32( SILK_FIX_CONST( 1, 30 ), silk_SMMUL( rc_Q31, rc_Q31 ) ); /* Update inverse gain */ /* Range: [ 0 : 2^30 ] */ invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); silk_assert( invGain_Q30 >= 0 ); silk_assert( invGain_Q30 <= ( 1 << 30 ) ); if( invGain_Q30 < SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN, 30 ) ) { return 0; } return invGain_Q30; }
/* Calculates correlation matrix X'*X */ void silk_corrMatrix_FIX( const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ const opus_int L, /* I Length of vectors */ const opus_int order, /* I Max lag for correlation */ const opus_int head_room, /* I Desired headroom */ opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ opus_int *rshifts /* I/O Right shifts of correlations */ ) { opus_int i, j, lag, rshifts_local, head_room_rshifts; opus_int32 energy; const opus_int16 *ptr1, *ptr2; /* Calculate energy to find shift used to fit in 32 bits */ silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); /* Add shifts to get the desired head room */ head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); energy = silk_RSHIFT32( energy, head_room_rshifts ); rshifts_local += head_room_rshifts; /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ /* Remove contribution of first order - 1 samples */ for( i = 0; i < order - 1; i++ ) { energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); } if( rshifts_local < *rshifts ) { /* Adjust energy */ energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); rshifts_local = *rshifts; } /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ /* Fill out the diagonal of the correlation matrix */ matrix_ptr( XX, 0, 0, order ) = energy; ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ for( j = 1; j < order; j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); matrix_ptr( XX, j, j, order ) = energy; } ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ /* Calculate the remaining elements of the correlation matrix */ if( rshifts_local > 0 ) { /* Right shifting used */ for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = 0; for( i = 0; i < L; i++ ) { energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); } /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--; /* Update pointer to first sample of next column (lag) in X */ } } else { for( lag = 1; lag < order; lag++ ) { /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ energy = silk_inner_prod_aligned( ptr1, ptr2, L ); matrix_ptr( XX, lag, 0, order ) = energy; matrix_ptr( XX, 0, lag, order ) = energy; /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ for( j = 1; j < ( order - lag ); j++ ) { energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); matrix_ptr( XX, lag + j, j, order ) = energy; matrix_ptr( XX, j, lag + j, order ) = energy; } ptr2--;/* Update pointer to first sample of next column (lag) in X */ } } *rshifts = rshifts_local; }
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ const opus_int16 *in_Q14, /* I input vector to be quantized */ const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; opus_int16 diff_Q14[ 5 ]; opus_int32 sum1_Q14, sum2_Q16; /* Loop over codebook */ *rate_dist_Q14 = silk_int32_MAX; cb_row_Q7 = cb_Q7; for( k = 0; k < L; k++ ) { gain_tmp_Q7 = cb_gain_Q7[k]; diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); /* Weighted rate */ sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); /* Penalty for too large gain */ sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); silk_assert( sum1_Q14 >= 0 ); /* first row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); /* second row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); /* third row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); /* fourth row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); /* last row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); silk_assert( sum1_Q14 >= 0 ); /* find best */ if( sum1_Q14 < *rate_dist_Q14 ) { *rate_dist_Q14 = sum1_Q14; *ind = (opus_int8)k; *gain_Q7 = gain_tmp_Q7; } /* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } }
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC_c( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *res_nrg_Q15, /* O best residual energy */ opus_int32 *rate_dist_Q8, /* O best total bitrate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ const opus_int32 *XX_Q17, /* I correlation matrix */ const opus_int32 *xX_Q17, /* I correlation vector */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ const opus_int subfr_len, /* I number of samples per subframe */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ const opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; opus_int32 neg_xX_Q24[ 5 ]; opus_int32 sum1_Q15, sum2_Q24; opus_int32 bits_res_Q8, bits_tot_Q8; /* Negate and convert to new Q domain */ neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 ); neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 ); neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 ); neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 ); neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 ); /* Loop over codebook */ *rate_dist_Q8 = silk_int32_MAX; *res_nrg_Q15 = silk_int32_MAX; cb_row_Q7 = cb_Q7; /* In things go really bad, at least *ind is set to something safe. */ *ind = 0; for( k = 0; k < L; k++ ) { opus_int32 penalty; gain_tmp_Q7 = cb_gain_Q7[k]; /* Weighted rate */ /* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */ sum1_Q15 = SILK_FIX_CONST( 1.001, 15 ); /* Penalty for too large gain */ penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 ); /* first row of XX_Q17 */ sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[ 1 ], cb_row_Q7[ 1 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 2 ], cb_row_Q7[ 2 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 3 ], cb_row_Q7[ 3 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 4 ], cb_row_Q7[ 4 ] ); sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 0 ], cb_row_Q7[ 0 ] ); sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 0 ] ); /* second row of XX_Q17 */ sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[ 7 ], cb_row_Q7[ 2 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 8 ], cb_row_Q7[ 3 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 9 ], cb_row_Q7[ 4 ] ); sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 6 ], cb_row_Q7[ 1 ] ); sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 1 ] ); /* third row of XX_Q17 */ sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 14 ], cb_row_Q7[ 4 ] ); sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 12 ], cb_row_Q7[ 2 ] ); sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 2 ] ); /* fourth row of XX_Q17 */ sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] ); sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 18 ], cb_row_Q7[ 3 ] ); sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 3 ] ); /* last row of XX_Q17 */ sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 ); sum2_Q24 = silk_MLA( sum2_Q24, XX_Q17[ 24 ], cb_row_Q7[ 4 ] ); sum1_Q15 = silk_SMLAWB( sum1_Q15, sum2_Q24, cb_row_Q7[ 4 ] ); /* find best */ if( sum1_Q15 >= 0 ) { /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */ bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) ); /* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */ bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 ); if( bits_tot_Q8 <= *rate_dist_Q8 ) { *rate_dist_Q8 = bits_tot_Q8; *res_nrg_Q15 = sum1_Q15 + penalty; *ind = (opus_int8)k; *gain_Q7 = gain_tmp_Q7; } } /* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } }
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC_sse4_1( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ const opus_int16 *in_Q14, /* I input vector to be quantized */ const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ opus_int L /* I number of vectors in codebook */ ) { opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; opus_int16 diff_Q14[ 5 ]; opus_int32 sum1_Q14, sum2_Q16; __m128i C_tmp1, C_tmp2, C_tmp3, C_tmp4, C_tmp5; /* Loop over codebook */ *rate_dist_Q14 = silk_int32_MAX; cb_row_Q7 = cb_Q7; for( k = 0; k < L; k++ ) { gain_tmp_Q7 = cb_gain_Q7[k]; diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); C_tmp1 = OP_CVTEPI16_EPI32_M64( &in_Q14[ 1 ] ); C_tmp2 = OP_CVTEPI8_EPI32_M32( &cb_row_Q7[ 1 ] ); C_tmp2 = _mm_slli_epi32( C_tmp2, 7 ); C_tmp1 = _mm_sub_epi32( C_tmp1, C_tmp2 ); diff_Q14[ 1 ] = _mm_extract_epi16( C_tmp1, 0 ); diff_Q14[ 2 ] = _mm_extract_epi16( C_tmp1, 2 ); diff_Q14[ 3 ] = _mm_extract_epi16( C_tmp1, 4 ); diff_Q14[ 4 ] = _mm_extract_epi16( C_tmp1, 6 ); /* Weighted rate */ sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); /* Penalty for too large gain */ sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); silk_assert( sum1_Q14 >= 0 ); /* first row of W_Q18 */ C_tmp3 = _mm_loadu_si128( (__m128i *)(&W_Q18[ 1 ] ) ); C_tmp4 = _mm_mul_epi32( C_tmp3, C_tmp1 ); C_tmp4 = _mm_srli_si128( C_tmp4, 2 ); C_tmp1 = _mm_shuffle_epi32( C_tmp1, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ C_tmp3 = _mm_shuffle_epi32( C_tmp3, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* shift right 4 bytes */ C_tmp5 = _mm_mul_epi32( C_tmp3, C_tmp1 ); C_tmp5 = _mm_srli_si128( C_tmp5, 2 ); C_tmp5 = _mm_add_epi32( C_tmp4, C_tmp5 ); C_tmp5 = _mm_slli_epi32( C_tmp5, 1 ); C_tmp5 = _mm_add_epi32( C_tmp5, _mm_shuffle_epi32( C_tmp5, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); sum2_Q16 = _mm_cvtsi128_si32( C_tmp5 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); /* second row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); /* third row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); /* fourth row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); /* last row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); silk_assert( sum1_Q14 >= 0 ); /* find best */ if( sum1_Q14 < *rate_dist_Q14 ) { *rate_dist_Q14 = sum1_Q14; *ind = (opus_int8)k; *gain_Q7 = gain_tmp_Q7; } /* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } }