示例#1
0
/* Convert input to a linear scale    */
opus_int32 silk_log2lin( 
    const opus_int32            inLog_Q7            /* I  input on log scale                                            */
)
{
    opus_int32 out, frac_Q7;

    if( inLog_Q7 < 0 ) {
        return 0;
    }

    out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) );
    frac_Q7 = inLog_Q7 & 0x7F;
    if( inLog_Q7 < 2048 ) {
        /* Piece-wise parabolic approximation */
        out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 );
    } else {
        /* Piece-wise parabolic approximation */
        out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) );
    }
    return out;
}
示例#2
0
void silk_prefilter_FIX(
    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */
    const silk_encoder_control_FIX  *psEncCtrl,                             /* I    Encoder control                                                             */
    opus_int32                      xw_Q3[],                                /* O    Weighted signal                                                             */
    const opus_int16                x[]                                     /* I    Speech signal                                                               */
)
{
    silk_prefilter_state_FIX *P = &psEnc->sPrefilt;
    opus_int   j, k, lag;
    opus_int32 tmp_32;
    const opus_int16 *AR1_shp_Q13;
    const opus_int16 *px;
    opus_int32 *pxw_Q3;
    opus_int   HarmShapeGain_Q12, Tilt_Q14;
    opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14;
    VARDECL( opus_int32, x_filt_Q12 );
    VARDECL( opus_int32, st_res_Q2 );
    opus_int16 B_Q10[ 2 ];
    SAVE_STACK;

    /* Set up pointers */
    px  = x;
    pxw_Q3 = xw_Q3;
    lag = P->lagPrev;
    ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 );
    ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 );
    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
        /* Update Variables that change per sub frame */
        if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
            lag = psEncCtrl->pitchL[ k ];
        }

        /* Noise shape parameters */
        HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] );
        silk_assert( HarmShapeGain_Q12 >= 0 );
        HarmShapeFIRPacked_Q12  =                          silk_RSHIFT( HarmShapeGain_Q12, 2 );
        HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 );
        Tilt_Q14    = psEncCtrl->Tilt_Q14[   k ];
        LF_shp_Q14  = psEncCtrl->LF_shp_Q14[ k ];
        AR1_shp_Q13 = &psEncCtrl->AR1_Q13[   k * MAX_SHAPE_LPC_ORDER ];

        /* Short term FIR filtering*/
        silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px,
            psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder );

        /* Reduce (mainly) low frequencies during harmonic emphasis */
        B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 );
        tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 );   /* Q26 */
        tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) );    /* Q26 */
        tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] );                                                /* Q24 */
        tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 );                                                                     /* Q10 */
        B_Q10[ 1 ]= silk_SAT16( tmp_32 );
        x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] );
        for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) {
            x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] );
        }
        P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ];

        silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length );

        px  += psEnc->sCmn.subfr_length;
        pxw_Q3 += psEnc->sCmn.subfr_length;
    }

    P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ];
    RESTORE_STACK;
}
示例#3
0
/* Compute reflection coefficients from input signal */
void silk_burg_modified(
    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceeding samples)   */
    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
    const opus_int32            WhiteNoiseFrac_Q32, /* I    Fraction added to zero-lag autocorrelation                  */
    const opus_int              D                   /* I    Order                                                       */
)
{
    opus_int         k, n, s, lz, rshifts, rshifts_extra;
    opus_int32       C0, num, nrg, rc_Q31, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
    const opus_int16 *x_ptr;

    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];

    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];

    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
    silk_assert( nb_subfr <= MAX_NB_SUBFR );


    /* Compute autocorrelations, added over subframes */
    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
    if( rshifts > MAX_RSHIFTS ) {
        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
        silk_assert( C0 > 0 );
        rshifts = MAX_RSHIFTS;
    } else {
        lz = silk_CLZ32( C0 ) - 1;
        rshifts_extra = N_BITS_HEAD_ROOM - lz;
        if( rshifts_extra > 0 ) {
            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
            C0 = silk_RSHIFT32( C0, rshifts_extra );
        } else {
            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
            C0 = silk_LSHIFT32( C0, -rshifts_extra );
        }
        rshifts += rshifts_extra;
    }
    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
    if( rshifts > 0 ) {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
            }
        }
    } else {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += silk_LSHIFT32(
                    silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts );
            }
        }
    }
    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );

    /* Initialize */
    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( WhiteNoiseFrac_Q32, C0 ) + 1;                                /* Q(-rshifts)*/

    for( n = 0; n < D; n++ ) {
        /* Update first row of correlation matrix (without first element) */
        /* Update last row of correlation matrix (without last element, stored in reversed order) */
        /* Update C * Af */
        /* Update C * flipud(Af) (stored in reversed order) */
        if( rshifts > -2 ) {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts)*/
                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts)*/
                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16)*/
                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16)*/
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts )*/
                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/
                    Atmp_QA = Af_QA[ k ];
                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16)*/
                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16)*/
                }
                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts)*/
                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts)*/
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift )*/
                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift )*/
                }
            }
        } else {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts )*/
                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts )*/
                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17*/
                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17*/
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts )*/
                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts )*/
                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17*/
                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17*/
                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17*/
                }
                tmp1 = -tmp1;                                                                           /* Q17*/
                tmp2 = -tmp2;                                                                           /* Q17*/
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
                        silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift )*/
                    CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
                        silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift )*/
                }
            }
        }

        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts )*/
        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts )*/
        num  = 0;                                                                                       /* Q( -rshifts )*/
        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts )*/
        for( k = 0; k < n; k++ ) {
            Atmp_QA = Af_QA[ k ];
            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
            lz = silk_min( 32 - QA, lz );
            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz )*/

            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts )*/
            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts )*/
            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts )*/
            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts )*/
        }
        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts )*/
        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts )*/
        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts )*/
        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts )*/

        /* Calculate the next order reflection (parcor) coefficient */
        if( silk_abs( num ) < nrg ) {
            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
        } else {
            /* Negative energy or ratio too high; set remaining coefficients to zero and exit loop */
            silk_memset( &Af_QA[ n ], 0, ( D - n ) * sizeof( opus_int32 ) );
            silk_assert( 0 );
            break;
        }

        /* Update the AR coefficients */
        for( k = 0; k < (n + 1) >> 1; k++ ) {
            tmp1 = Af_QA[ k ];                                                                  /* QA*/
            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA*/
            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA*/
            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA*/
        }
        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA*/

        /* Update C * Af and C * Ab */
        for( k = 0; k <= n + 1; k++ ) {
            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts )*/
            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts )*/
            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts )*/
            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts )*/
        }
    }

    /* Return residual energy */
    nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts )*/
    tmp1 = 1 << 16;                                                                             /* Q16*/
    for( k = 0; k < D; k++ ) {
        Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16*/
        nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts )*/
        tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16*/
        A_Q16[ k ] = -Atmp1;
    }
    *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( WhiteNoiseFrac_Q32, C0 ), -tmp1 );                 /* Q( -rshifts )*/
    *res_nrg_Q = -rshifts;
}
/* Delayed-decision quantizer for NLSF residuals */
opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
    opus_int8                   indices[],                      /* O    Quantization indices [ order ]              */
    const opus_int16            x_Q10[],                        /* I    Input [ order ]                             */
    const opus_int16            w_Q5[],                         /* I    Weights [ order ]                           */
    const opus_uint8            pred_coef_Q8[],                 /* I    Backward predictor coefs [ order ]          */
    const opus_int16            ec_ix[],                        /* I    Indices to entropy coding tables [ order ]  */
    const opus_uint8            ec_rates_Q5[],                  /* I    Rates []                                    */
    const opus_int              quant_step_size_Q16,            /* I    Quantization step size                      */
    const opus_int16            inv_quant_step_size_Q6,         /* I    Inverse quantization step size              */
    const opus_int32            mu_Q20,                         /* I    R/D tradeoff                                */
    const opus_int16            order                           /* I    Number of input values                      */
)
{
    opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
    opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
    opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
    opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
    opus_int32       RD_Q25[       2 * NLSF_QUANT_DEL_DEC_STATES ];
    opus_int32       RD_min_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
    opus_int32       RD_max_Q25[       NLSF_QUANT_DEL_DEC_STATES ];
    const opus_uint8 *rates_Q5;

    silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */

    nStates = 1;
    RD_Q25[ 0 ] = 0;
    prev_out_Q10[ 0 ] = 0;
    for( i = order - 1; ; i-- ) {
        rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
        in_Q10 = x_Q10[ i ];
        for( j = 0; j < nStates; j++ ) {
            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
            res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
            ind_tmp  = silk_SMULWB( inv_quant_step_size_Q6, res_Q10 );
            ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
            ind[ j ][ i ] = (opus_int8)ind_tmp;

            /* compute outputs for ind_tmp and ind_tmp + 1 */
            out0_Q10 = silk_LSHIFT( ind_tmp, 10 );
            out1_Q10 = silk_ADD16( out0_Q10, 1024 );
            if( ind_tmp > 0 ) {
                out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
            } else if( ind_tmp == 0 ) {
                out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
            } else if( ind_tmp == -1 ) {
                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
            } else {
                out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
                out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
            }
            out0_Q10  = silk_SMULWB( out0_Q10, quant_step_size_Q16 );
            out1_Q10  = silk_SMULWB( out1_Q10, quant_step_size_Q16 );
            out0_Q10  = silk_ADD16( out0_Q10, pred_Q10 );
            out1_Q10  = silk_ADD16( out1_Q10, pred_Q10 );
            prev_out_Q10[ j           ] = out0_Q10;
            prev_out_Q10[ j + nStates ] = out1_Q10;

            /* compute RD for ind_tmp and ind_tmp + 1 */
            if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) {
                if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) {
                    rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ];
                    rate1_Q5 = 280;
                } else {
                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp );
                    rate1_Q5 = silk_ADD16( rate0_Q5, 43 );
                }
            } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) {
                if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) {
                    rate0_Q5 = 280;
                    rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
                } else {
                    rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp );
                    rate1_Q5 = silk_SUB16( rate0_Q5, 43 );
                }
            } else {
                rate0_Q5 = rates_Q5[ ind_tmp +     NLSF_QUANT_MAX_AMPLITUDE ];
                rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ];
            }
            RD_tmp_Q25            = RD_Q25[ j ];
            diff_Q10              = silk_SUB16( in_Q10, out0_Q10 );
            RD_Q25[ j ]           = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 );
            diff_Q10              = silk_SUB16( in_Q10, out1_Q10 );
            RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 );
        }

        if( nStates < NLSF_QUANT_DEL_DEC_STATES ) {
            /* double number of states and copy */
            for( j = 0; j < nStates; j++ ) {
                ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1;
            }
            nStates = silk_LSHIFT( nStates, 1 );
            for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
                ind[ j ][ i ] = ind[ j - nStates ][ i ];
            }
        } else if( i > 0 ) {
            /* sort lower and upper half of RD_Q25, pairwise */
            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
                if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) {
                    RD_max_Q25[ j ]                         = RD_Q25[ j ];
                    RD_min_Q25[ j ]                         = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
                    RD_Q25[ j ]                             = RD_min_Q25[ j ];
                    RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ];
                    /* swap prev_out values */
                    out0_Q10 = prev_out_Q10[ j ];
                    prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ];
                    prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10;
                    ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES;
                } else {
                    RD_min_Q25[ j ] = RD_Q25[ j ];
                    RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ];
                    ind_sort[ j ] = j;
                }
            }
            /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */
            /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */
            while( 1 ) {
                min_max_Q25 = silk_int32_MAX;
                max_min_Q25 = 0;
                ind_min_max = 0;
                ind_max_min = 0;
                for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
                    if( min_max_Q25 > RD_max_Q25[ j ] ) {
                        min_max_Q25 = RD_max_Q25[ j ];
                        ind_min_max = j;
                    }
                    if( max_min_Q25 < RD_min_Q25[ j ] ) {
                        max_min_Q25 = RD_min_Q25[ j ];
                        ind_max_min = j;
                    }
                }
                if( min_max_Q25 >= max_min_Q25 ) {
                    break;
                }
                /* copy ind_min_max to ind_max_min */
                ind_sort[     ind_max_min ] = ind_sort[     ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES;
                RD_Q25[       ind_max_min ] = RD_Q25[       ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
                prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ];
                RD_min_Q25[   ind_max_min ] = 0;
                RD_max_Q25[   ind_min_max ] = silk_int32_MAX;
                silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) );
            }
            /* increment index if it comes from the upper half */
            for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) {
                ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 );
            }
        } else {  /* i == 0 */
            break;
        }
    }

    /* last sample: find winner, copy indices and return RD value */
    ind_tmp = 0;
    min_Q25 = silk_int32_MAX;
    for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) {
        if( min_Q25 > RD_Q25[ j ] ) {
            min_Q25 = RD_Q25[ j ];
            ind_tmp = j;
        }
    }
    for( j = 0; j < order; j++ ) {
        indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ];
        silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT );
        silk_assert( indices[ j ] <=  NLSF_QUANT_MAX_AMPLITUDE_EXT );
    }
    indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 );
    silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT );
    silk_assert( min_Q25 >= 0 );
    return min_Q25;
}
/* Compute reflection coefficients from input signal */
void silk_burg_modified_sse4_1(
    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * (D + subfr_length)       */
    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
    const opus_int              D,                  /* I    Order                                                       */
    int                         arch                /* I    Run-time architecture                                       */
)
{
    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
    const opus_int16 *x_ptr;
    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
    opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];

    __m128i FIRST_3210, LAST_3210, ATMP_3210, TMP1_3210, TMP2_3210, T1_3210, T2_3210, PTR_3210, SUBFR_3210, X1_3210, X2_3210;
    __m128i CONST1 = _mm_set1_epi32(1);

    silk_assert(subfr_length * nb_subfr <= MAX_FRAME_SIZE);

    /* Compute autocorrelations, added over subframes */
    silk_sum_sqr_shift(&C0, &rshifts, x, nb_subfr * subfr_length);
    if(rshifts > MAX_RSHIFTS) {
        C0 = silk_LSHIFT32(C0, rshifts - MAX_RSHIFTS);
        silk_assert(C0 > 0);
        rshifts = MAX_RSHIFTS;
    } else {
        lz = silk_CLZ32(C0) - 1;
        rshifts_extra = N_BITS_HEAD_ROOM - lz;
        if(rshifts_extra > 0) {
            rshifts_extra = silk_min(rshifts_extra, MAX_RSHIFTS - rshifts);
            C0 = silk_RSHIFT32(C0, rshifts_extra);
        } else {
            rshifts_extra = silk_max(rshifts_extra, MIN_RSHIFTS - rshifts);
            C0 = silk_LSHIFT32(C0, -rshifts_extra);
        }
        rshifts += rshifts_extra;
    }
    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1;                                /* Q(-rshifts) */
    silk_memset(C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof(opus_int32));
    if(rshifts > 0) {
        for(s = 0; s < nb_subfr; s++) {
            x_ptr = x + s * subfr_length;
            for(n = 1; n < D + 1; n++) {
                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
                    silk_inner_prod16_aligned_64(x_ptr, x_ptr + n, subfr_length - n, arch), rshifts);
            }
        }
    } else {
        for(s = 0; s < nb_subfr; s++) {
            int i;
            opus_int32 d;
            x_ptr = x + s * subfr_length;
            celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch);
            for(n = 1; n < D + 1; n++) {
               for (i = n + subfr_length - D, d = 0; i < subfr_length; i++)
                  d = MAC16_16(d, x_ptr[ i ], x_ptr[ i - n ]);
               xcorr[ n - 1 ] += d;
            }
            for(n = 1; n < D + 1; n++) {
                C_first_row[ n - 1 ] += silk_LSHIFT32(xcorr[ n - 1 ], -rshifts);
            }
        }
    }
    silk_memcpy(C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof(opus_int32));

    /* Initialize */
    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0) + 1;                                /* Q(-rshifts) */

    invGain_Q30 = (opus_int32)1 << 30;
    reached_max_gain = 0;
    for(n = 0; n < D; n++) {
        /* Update first row of correlation matrix (without first element) */
        /* Update last row of correlation matrix (without last element, stored in reversed order) */
        /* Update C * Af */
        /* Update C * flipud(Af) (stored in reversed order) */
        if(rshifts > -2) {
            for(s = 0; s < nb_subfr; s++) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32((opus_int32)x_ptr[ n ],                    16 - rshifts);        /* Q(16-rshifts) */
                x2  = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts);        /* Q(16-rshifts) */
                tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ],                    QA - 16);             /* Q(QA-16) */
                tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16);             /* Q(QA-16) */
                for(k = 0; k < n; k++) {
                    C_first_row[ k ] = silk_SMLAWB(C_first_row[ k ], x1, x_ptr[ n - k - 1 ]           ); /* Q(-rshifts) */
                    C_last_row[ k ]  = silk_SMLAWB(C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */
                    Atmp_QA = Af_QA[ k ];
                    tmp1 = silk_SMLAWB(tmp1, Atmp_QA, x_ptr[ n - k - 1 ]           );                 /* Q(QA-16) */
                    tmp2 = silk_SMLAWB(tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ]);                 /* Q(QA-16) */
                }
                tmp1 = silk_LSHIFT32(-tmp1, 32 - QA - rshifts);                                       /* Q(16-rshifts) */
                tmp2 = silk_LSHIFT32(-tmp2, 32 - QA - rshifts);                                       /* Q(16-rshifts) */
                for(k = 0; k <= n; k++) {
                    CAf[ k ] = silk_SMLAWB(CAf[ k ], tmp1, x_ptr[ n - k ]                   );        /* Q(-rshift) */
                    CAb[ k ] = silk_SMLAWB(CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ]);        /* Q(-rshift) */
                }
            }
        } else {
            for(s = 0; s < nb_subfr; s++) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32((opus_int32)x_ptr[ n ],                    -rshifts);            /* Q(-rshifts) */
                x2  = -silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts);            /* Q(-rshifts) */
                tmp1 = silk_LSHIFT32((opus_int32)x_ptr[ n ],                    17);                  /* Q17 */
                tmp2 = silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n - 1 ], 17);                  /* Q17 */

                X1_3210 = _mm_set1_epi32(x1);
                X2_3210 = _mm_set1_epi32(x2);
                TMP1_3210 = _mm_setzero_si128();
                TMP2_3210 = _mm_setzero_si128();
                for(k = 0; k < n - 3; k += 4) {
                    PTR_3210   = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 1 - 3 ]);
                    SUBFR_3210 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k ]);
                    FIRST_3210 = _mm_loadu_si128((__m128i *)&C_first_row[ k ]);
                    PTR_3210   = _mm_shuffle_epi32(PTR_3210,  _MM_SHUFFLE(0, 1, 2, 3));
                    LAST_3210  = _mm_loadu_si128((__m128i *)&C_last_row[ k ]);
                    ATMP_3210  = _mm_loadu_si128((__m128i *)&Af_QA[ k ]);

                    T1_3210 = _mm_mullo_epi32(PTR_3210, X1_3210);
                    T2_3210 = _mm_mullo_epi32(SUBFR_3210, X2_3210);

                    ATMP_3210 = _mm_srai_epi32(ATMP_3210, 7);
                    ATMP_3210 = _mm_add_epi32(ATMP_3210, CONST1);
                    ATMP_3210 = _mm_srai_epi32(ATMP_3210, 1);

                    FIRST_3210 = _mm_add_epi32(FIRST_3210, T1_3210);
                    LAST_3210 = _mm_add_epi32(LAST_3210, T2_3210);

                    PTR_3210   = _mm_mullo_epi32(ATMP_3210, PTR_3210);
                    SUBFR_3210   = _mm_mullo_epi32(ATMP_3210, SUBFR_3210);

                    _mm_storeu_si128((__m128i *)&C_first_row[ k ], FIRST_3210);
                    _mm_storeu_si128((__m128i *)&C_last_row[ k ], LAST_3210);

                    TMP1_3210 = _mm_add_epi32(TMP1_3210, PTR_3210);
                    TMP2_3210 = _mm_add_epi32(TMP2_3210, SUBFR_3210);
                }

                TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_unpackhi_epi64(TMP1_3210, TMP1_3210));
                TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_unpackhi_epi64(TMP2_3210, TMP2_3210));
                TMP1_3210 = _mm_add_epi32(TMP1_3210, _mm_shufflelo_epi16(TMP1_3210, 0x0E));
                TMP2_3210 = _mm_add_epi32(TMP2_3210, _mm_shufflelo_epi16(TMP2_3210, 0x0E));

                tmp1 += _mm_cvtsi128_si32(TMP1_3210);
                tmp2 += _mm_cvtsi128_si32(TMP2_3210);

                for(; k < n; k++) {
                    C_first_row[ k ] = silk_MLA(C_first_row[ k ], x1, x_ptr[ n - k - 1 ]           ); /* Q(-rshifts) */
                    C_last_row[ k ]  = silk_MLA(C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ]); /* Q(-rshifts) */
                    Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 17);                                   /* Q17 */
                    tmp1 = silk_MLA(tmp1, x_ptr[ n - k - 1 ],            Atmp1);                      /* Q17 */
                    tmp2 = silk_MLA(tmp2, x_ptr[ subfr_length - n + k ], Atmp1);                      /* Q17 */
                }

                tmp1 = -tmp1;                /* Q17 */
                tmp2 = -tmp2;                /* Q17 */

                {
                    __m128i xmm_tmp1, xmm_tmp2;
                    __m128i xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1;
                    __m128i xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1;

                    xmm_tmp1 = _mm_set1_epi32(tmp1);
                    xmm_tmp2 = _mm_set1_epi32(tmp2);

                    for(k = 0; k <= n - 3; k += 4) {
                        xmm_x_ptr_n_k_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ n - k - 3 ]);
                        xmm_x_ptr_sub_x2x0 = OP_CVTEPI16_EPI32_M64(&x_ptr[ subfr_length - n + k - 1 ]);

                        xmm_x_ptr_n_k_x2x0 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 1, 2, 3));

                        xmm_x_ptr_n_k_x2x0 = _mm_slli_epi32(xmm_x_ptr_n_k_x2x0, -rshifts - 1);
                        xmm_x_ptr_sub_x2x0 = _mm_slli_epi32(xmm_x_ptr_sub_x2x0, -rshifts - 1);

                        /* equal shift right 4 bytes, xmm_x_ptr_n_k_x3x1 = _mm_srli_si128(xmm_x_ptr_n_k_x2x0, 4)*/
                        xmm_x_ptr_n_k_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_n_k_x2x0, _MM_SHUFFLE(0, 3, 2, 1));
                        xmm_x_ptr_sub_x3x1 = _mm_shuffle_epi32(xmm_x_ptr_sub_x2x0, _MM_SHUFFLE(0, 3, 2, 1));

                        xmm_x_ptr_n_k_x2x0 = _mm_mul_epi32(xmm_x_ptr_n_k_x2x0, xmm_tmp1);
                        xmm_x_ptr_n_k_x3x1 = _mm_mul_epi32(xmm_x_ptr_n_k_x3x1, xmm_tmp1);
                        xmm_x_ptr_sub_x2x0 = _mm_mul_epi32(xmm_x_ptr_sub_x2x0, xmm_tmp2);
                        xmm_x_ptr_sub_x3x1 = _mm_mul_epi32(xmm_x_ptr_sub_x3x1, xmm_tmp2);

                        xmm_x_ptr_n_k_x2x0 = _mm_srli_epi64(xmm_x_ptr_n_k_x2x0, 16);
                        xmm_x_ptr_n_k_x3x1 = _mm_slli_epi64(xmm_x_ptr_n_k_x3x1, 16);
                        xmm_x_ptr_sub_x2x0 = _mm_srli_epi64(xmm_x_ptr_sub_x2x0, 16);
                        xmm_x_ptr_sub_x3x1 = _mm_slli_epi64(xmm_x_ptr_sub_x3x1, 16);

                        xmm_x_ptr_n_k_x2x0 = _mm_blend_epi16(xmm_x_ptr_n_k_x2x0, xmm_x_ptr_n_k_x3x1, 0xCC);
                        xmm_x_ptr_sub_x2x0 = _mm_blend_epi16(xmm_x_ptr_sub_x2x0, xmm_x_ptr_sub_x3x1, 0xCC);

                        X1_3210  = _mm_loadu_si128((__m128i *)&CAf[ k ]);
                        PTR_3210 = _mm_loadu_si128((__m128i *)&CAb[ k ]);

                        X1_3210  = _mm_add_epi32(X1_3210, xmm_x_ptr_n_k_x2x0);
                        PTR_3210 = _mm_add_epi32(PTR_3210, xmm_x_ptr_sub_x2x0);

                        _mm_storeu_si128((__m128i *)&CAf[ k ], X1_3210);
                        _mm_storeu_si128((__m128i *)&CAb[ k ], PTR_3210);
                    }

                    for(; k <= n; k++) {
                        CAf[ k ] = silk_SMLAWW(CAf[ k ], tmp1,
                            silk_LSHIFT32((opus_int32)x_ptr[ n - k ], -rshifts - 1));                    /* Q(-rshift) */
                        CAb[ k ] = silk_SMLAWW(CAb[ k ], tmp2,
                            silk_LSHIFT32((opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1)); /* Q(-rshift) */
                    }
                }
            }
        }

        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
        tmp1 = C_first_row[ n ];                                                                        /* Q(-rshifts) */
        tmp2 = C_last_row[ n ];                                                                         /* Q(-rshifts) */
        num  = 0;                                                                                       /* Q(-rshifts) */
        nrg  = silk_ADD32(CAb[ 0 ], CAf[ 0 ]);                                                        /* Q(1-rshifts) */
        for(k = 0; k < n; k++) {
            Atmp_QA = Af_QA[ k ];
            lz = silk_CLZ32(silk_abs(Atmp_QA)) - 1;
            lz = silk_min(32 - QA, lz);
            Atmp1 = silk_LSHIFT32(Atmp_QA, lz);                                                       /* Q(QA + lz) */

            tmp1 = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(C_last_row[  n - k - 1 ], Atmp1), 32 - QA - lz);  /* Q(-rshifts) */
            tmp2 = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(C_first_row[ n - k - 1 ], Atmp1), 32 - QA - lz);  /* Q(-rshifts) */
            num  = silk_ADD_LSHIFT32(num,  silk_SMMUL(CAb[ n - k ],             Atmp1), 32 - QA - lz);  /* Q(-rshifts) */
            nrg  = silk_ADD_LSHIFT32(nrg,  silk_SMMUL(silk_ADD32(CAb[ k + 1 ], CAf[ k + 1 ]),
                                                                                Atmp1), 32 - QA - lz);    /* Q(1-rshifts) */
        }
        CAf[ n + 1 ] = tmp1;                                                                            /* Q(-rshifts) */
        CAb[ n + 1 ] = tmp2;                                                                            /* Q(-rshifts) */
        num = silk_ADD32(num, tmp2);                                                                  /* Q(-rshifts) */
        num = silk_LSHIFT32(-num, 1);                                                                 /* Q(1-rshifts) */

        /* Calculate the next order reflection (parcor) coefficient */
        if(silk_abs(num) < nrg) {
            rc_Q31 = silk_DIV32_varQ(num, nrg, 31);
        } else {
            rc_Q31 = (num > 0) ? silk_int32_MAX : silk_int32_MIN;
        }

        /* Update inverse prediction gain */
        tmp1 = ((opus_int32)1 << 30) - silk_SMMUL(rc_Q31, rc_Q31);
        tmp1 = silk_LSHIFT(silk_SMMUL(invGain_Q30, tmp1), 2);
        if(tmp1 <= minInvGain_Q30) {
            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
            tmp2 = ((opus_int32)1 << 30) - silk_DIV32_varQ(minInvGain_Q30, invGain_Q30, 30);            /* Q30 */
            rc_Q31 = silk_SQRT_APPROX(tmp2);                                                  /* Q15 */
            /* Newton-Raphson iteration */
            rc_Q31 = silk_RSHIFT32(rc_Q31 + silk_DIV32(tmp2, rc_Q31), 1);                   /* Q15 */
            rc_Q31 = silk_LSHIFT32(rc_Q31, 16);                                               /* Q31 */
            if(num < 0) {
                /* Ensure adjusted reflection coefficients has the original sign */
                rc_Q31 = -rc_Q31;
            }
            invGain_Q30 = minInvGain_Q30;
            reached_max_gain = 1;
        } else {
            invGain_Q30 = tmp1;
        }

        /* Update the AR coefficients */
        for(k = 0; k < (n + 1) >> 1; k++) {
            tmp1 = Af_QA[ k ];                                                                  /* QA */
            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
            Af_QA[ k ]         = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1);      /* QA */
            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1);      /* QA */
        }
        Af_QA[ n ] = silk_RSHIFT32(rc_Q31, 31 - QA);                                          /* QA */

        if(reached_max_gain) {
            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
            for(k = n + 1; k < D; k++) {
                Af_QA[ k ] = 0;
            }
            break;
        }

        /* Update C * Af and C * Ab */
        for(k = 0; k <= n + 1; k++) {
            tmp1 = CAf[ k ];                                                                    /* Q(-rshifts) */
            tmp2 = CAb[ n - k + 1 ];                                                            /* Q(-rshifts) */
            CAf[ k ]         = silk_ADD_LSHIFT32(tmp1, silk_SMMUL(tmp2, rc_Q31), 1);        /* Q(-rshifts) */
            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32(tmp2, silk_SMMUL(tmp1, rc_Q31), 1);        /* Q(-rshifts) */
        }
    }

    if(reached_max_gain) {
        for(k = 0; k < D; k++) {
            /* Scale coefficients */
            A_Q16[ k ] = -silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16);
        }
        /* Subtract energy of preceding samples from C0 */
        if(rshifts > 0) {
            for(s = 0; s < nb_subfr; s++) {
                x_ptr = x + s * subfr_length;
                C0 -= (opus_int32)silk_RSHIFT64(silk_inner_prod16_aligned_64(x_ptr, x_ptr, D, arch), rshifts);
            }
        } else {
            for(s = 0; s < nb_subfr; s++) {
                x_ptr = x + s * subfr_length;
                C0 -= silk_LSHIFT32(silk_inner_prod_aligned(x_ptr, x_ptr, D, arch), -rshifts);
            }
        }
        /* Approximate residual energy */
        *res_nrg = silk_LSHIFT(silk_SMMUL(invGain_Q30, C0), 2);
        *res_nrg_Q = -rshifts;
    } else {
        /* Return residual energy */
        nrg  = CAf[ 0 ];                                                                            /* Q(-rshifts) */
        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
        for(k = 0; k < D; k++) {
            Atmp1 = silk_RSHIFT_ROUND(Af_QA[ k ], QA - 16);                                       /* Q16 */
            nrg  = silk_SMLAWW(nrg, CAf[ k + 1 ], Atmp1);                                         /* Q(-rshifts) */
            tmp1 = silk_SMLAWW(tmp1, Atmp1, Atmp1);                                               /* Q16 */
            A_Q16[ k ] = -Atmp1;
        }
        *res_nrg = silk_SMLAWW(nrg, silk_SMMUL(SILK_FIX_CONST(FIND_LPC_COND_FAC, 32), C0), -tmp1);/* Q(-rshifts) */
        *res_nrg_Q = -rshifts;
    }
}
/* Compute reflection coefficients from input signal */
void silk_burg_modified(
    opus_int32                  *res_nrg,           /* O    Residual energy                                             */
    opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
    opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
    const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
    const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
    const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
    const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
    const opus_int              D                   /* I    Order                                                       */
)
{
    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
    opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
    const opus_int16 *x_ptr;
    opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
    opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
    opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
    opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
    opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];

    silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );

    /* Compute autocorrelations, added over subframes */
    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
    if( rshifts > MAX_RSHIFTS ) {
        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
        silk_assert( C0 > 0 );
        rshifts = MAX_RSHIFTS;
    } else {
        lz = silk_CLZ32( C0 ) - 1;
        rshifts_extra = N_BITS_HEAD_ROOM - lz;
        if( rshifts_extra > 0 ) {
            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
            C0 = silk_RSHIFT32( C0, rshifts_extra );
        } else {
            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
            C0 = silk_LSHIFT32( C0, -rshifts_extra );
        }
        rshifts += rshifts_extra;
    }
    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
    silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
    if( rshifts > 0 ) {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
                    silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
            }
        }
    } else {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += silk_LSHIFT32(
                    silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts );
            }
        }
    }
    silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );

    /* Initialize */
    CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */

    invGain_Q30 = (opus_int32)1 << 30;
    reached_max_gain = 0;
    for( n = 0; n < D; n++ ) {
        /* Update first row of correlation matrix (without first element) */
        /* Update last row of correlation matrix (without last element, stored in reversed order) */
        /* Update C * Af */
        /* Update C * flipud(Af) (stored in reversed order) */
        if( rshifts > -2 ) {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                    C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                    Atmp_QA = Af_QA[ k ];
                    tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
                    tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
                }
                tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
                tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
                    CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
                }
            }
        } else {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
                x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
                tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
                tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                    C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                    Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
                }
                tmp1 = -tmp1;                                                                           /* Q17 */
                tmp2 = -tmp2;                                                                           /* Q17 */
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
                        silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
                    CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
                        silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
                }
            }
        }

        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
        tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
        tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
        num  = 0;                                                                                       /* Q( -rshifts ) */
        nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
        for( k = 0; k < n; k++ ) {
            Atmp_QA = Af_QA[ k ];
            lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
            lz = silk_min( 32 - QA, lz );
            Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */

            tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
            tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
            num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
            nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
                                                                                Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
        }
        CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
        CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
        num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
        num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */

        /* Calculate the next order reflection (parcor) coefficient */
        if( silk_abs( num ) < nrg ) {
            rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
        } else {
            rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
        }

        /* Update inverse prediction gain */
        tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
        tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
        if( tmp1 <= minInvGain_Q30 ) {
            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
            tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
            rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
            /* Newton-Raphson iteration */
            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
            if( num < 0 ) {
                /* Ensure adjusted reflection coefficients has the original sign */
                rc_Q31 = -rc_Q31;
            }
            invGain_Q30 = minInvGain_Q30;
            reached_max_gain = 1;
        } else {
            invGain_Q30 = tmp1;
        }

        /* Update the AR coefficients */
        for( k = 0; k < (n + 1) >> 1; k++ ) {
            tmp1 = Af_QA[ k ];                                                                  /* QA */
            tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
            Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
            Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
        }
        Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */

        if( reached_max_gain ) {
            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
            for( k = n + 1; k < D; k++ ) {
                Af_QA[ k ] = 0;
            }
            break;
        }

        /* Update C * Af and C * Ab */
        for( k = 0; k <= n + 1; k++ ) {
            tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
            tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
            CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
            CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
        }
    }

    if( reached_max_gain ) {
        for( k = 0; k < D; k++ ) {
            /* Scale coefficients */
            A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
        }
        /* Subtract energy of preceding samples from C0 */
        if( rshifts > 0 ) {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
            }
        } else {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
            }
        }
        /* Approximate residual energy */
        *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
        *res_nrg_Q = -rshifts;
    } else {
        /* Return residual energy */
        nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
        tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
        for( k = 0; k < D; k++ ) {
            Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
            nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
            tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
            A_Q16[ k ] = -Atmp1;
        }
        *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( FIND_LPC_COND_FAC, C0 ), -tmp1 );                  /* Q( -rshifts ) */
        *res_nrg_Q = -rshifts;
    }   
}
示例#7
0
/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
void silk_VQ_WMat_EC_c(
    opus_int8                   *ind,                           /* O    index of best codebook vector               */
    opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
    opus_int32                  *rate_dist_Q8,                  /* O    best total bitrate                          */
    opus_int                    *gain_Q7,                       /* O    sum of absolute LTP coefficients            */
    const opus_int32            *XX_Q17,                        /* I    correlation matrix                          */
    const opus_int32            *xX_Q17,                        /* I    correlation vector                          */
    const opus_int8             *cb_Q7,                         /* I    codebook                                    */
    const opus_uint8            *cb_gain_Q7,                    /* I    codebook effective gain                     */
    const opus_uint8            *cl_Q5,                         /* I    code length for each codebook vector        */
    const opus_int              subfr_len,                      /* I    number of samples per subframe              */
    const opus_int32            max_gain_Q7,                    /* I    maximum sum of absolute LTP coefficients    */
    const opus_int              L                               /* I    number of vectors in codebook               */
)
{
    opus_int   k, gain_tmp_Q7;
    const opus_int8 *cb_row_Q7;
    opus_int32 neg_xX_Q24[ 5 ];
    opus_int32 sum1_Q15, sum2_Q24;
    opus_int32 bits_res_Q8, bits_tot_Q8;

    /* Negate and convert to new Q domain */
    neg_xX_Q24[ 0 ] = -silk_LSHIFT32( xX_Q17[ 0 ], 7 );
    neg_xX_Q24[ 1 ] = -silk_LSHIFT32( xX_Q17[ 1 ], 7 );
    neg_xX_Q24[ 2 ] = -silk_LSHIFT32( xX_Q17[ 2 ], 7 );
    neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );
    neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );

    /* Loop over codebook */
    *rate_dist_Q8 = silk_int32_MAX;
    *res_nrg_Q15 = silk_int32_MAX;
    cb_row_Q7 = cb_Q7;
    /* In things go really bad, at least *ind is set to something safe. */
    *ind = 0;
    for( k = 0; k < L; k++ ) {
        opus_int32 penalty;
        gain_tmp_Q7 = cb_gain_Q7[k];
        /* Weighted rate */
        /* Quantization error: 1 - 2 * xX * cb + cb' * XX * cb */
        sum1_Q15 = SILK_FIX_CONST( 1.001, 15 );

        /* Penalty for too large gain */
        penalty = silk_LSHIFT32( silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 11 );

        /* first row of XX_Q17 */
        sum2_Q24 = silk_MLA( neg_xX_Q24[ 0 ], XX_Q17[  1 ], cb_row_Q7[ 1 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  2 ], cb_row_Q7[ 2 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  3 ], cb_row_Q7[ 3 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  4 ], cb_row_Q7[ 4 ] );
        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  0 ], cb_row_Q7[ 0 ] );
        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 0 ] );

        /* second row of XX_Q17 */
        sum2_Q24 = silk_MLA( neg_xX_Q24[ 1 ], XX_Q17[  7 ], cb_row_Q7[ 2 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  8 ], cb_row_Q7[ 3 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  9 ], cb_row_Q7[ 4 ] );
        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[  6 ], cb_row_Q7[ 1 ] );
        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 1 ] );

        /* third row of XX_Q17 */
        sum2_Q24 = silk_MLA( neg_xX_Q24[ 2 ], XX_Q17[ 13 ], cb_row_Q7[ 3 ] );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 14 ], cb_row_Q7[ 4 ] );
        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 12 ], cb_row_Q7[ 2 ] );
        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 2 ] );

        /* fourth row of XX_Q17 */
        sum2_Q24 = silk_MLA( neg_xX_Q24[ 3 ], XX_Q17[ 19 ], cb_row_Q7[ 4 ] );
        sum2_Q24 = silk_LSHIFT32( sum2_Q24, 1 );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 18 ], cb_row_Q7[ 3 ] );
        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 3 ] );

        /* last row of XX_Q17 */
        sum2_Q24 = silk_LSHIFT32( neg_xX_Q24[ 4 ], 1 );
        sum2_Q24 = silk_MLA( sum2_Q24,        XX_Q17[ 24 ], cb_row_Q7[ 4 ] );
        sum1_Q15 = silk_SMLAWB( sum1_Q15,        sum2_Q24,  cb_row_Q7[ 4 ] );

        /* find best */
        if( sum1_Q15 >= 0 ) {
            /* Translate residual energy to bits using high-rate assumption (6 dB ==> 1 bit/sample) */
            bits_res_Q8 = silk_SMULBB( subfr_len, silk_lin2log( sum1_Q15 + penalty) - (15 << 7) );
            /* In the following line we reduce the codelength component by half ("-1"); seems to slghtly improve quality */
            bits_tot_Q8 = silk_ADD_LSHIFT32( bits_res_Q8, cl_Q5[ k ], 3-1 );
            if( bits_tot_Q8 <= *rate_dist_Q8 ) {
                *rate_dist_Q8 = bits_tot_Q8;
                *res_nrg_Q15 = sum1_Q15 + penalty;
                *ind = (opus_int8)k;
                *gain_Q7 = gain_tmp_Q7;
            }
        }

        /* Go to next cbk vector */
        cb_row_Q7 += LTP_ORDER;
    }
}