/* 16th order AR filter */
void SKP_Silk_LPC_synthesis_order16(const SKP_int16 *in,          /* I:   excitation signal */
                                      const SKP_int16 *A_Q12,       /* I:   AR coefficients [16], between -8_Q0 and 8_Q0 */
                                      const SKP_int32 Gain_Q26,     /* I:   gain */
                                      SKP_int32 *S,                 /* I/O: state vector [16] */
                                      SKP_int16 *out,               /* O:   output signal */
                                      const SKP_int32 len           /* I:   signal length, must be multiple of 16 */
)
{
    SKP_int   k;
    SKP_int32 SA, SB, out32_Q10, out32;
    for( k = 0; k < len; k++ ) {
        /* unrolled loop: prolog */
        /* multiply-add two prediction coefficients per iteration */
        SA = S[ 15 ];
        SB = S[ 14 ];
        S[ 14 ] = SA;
        out32_Q10 = SKP_SMULWB(                  SA, A_Q12[ 0 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 1 ] );
        SA = S[ 13 ];
        S[ 13 ] = SB;

        /* unrolled loop: main loop */
        SB = S[ 12 ];
        S[ 12 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 2 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 3 ] );
        SA = S[ 11 ];
        S[ 11 ] = SB;

        SB = S[ 10 ];
        S[ 10 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 4 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 5 ] );
        SA = S[ 9 ];
        S[ 9 ] = SB;

        SB = S[ 8 ];
        S[ 8 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 6 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 7 ] );
        SA = S[ 7 ];
        S[ 7 ] = SB;

        SB = S[ 6 ];
        S[ 6 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 8 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 9 ] );
        SA = S[ 5 ];
        S[ 5 ] = SB;

        SB = S[ 4 ];
        S[ 4 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 10 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 11 ] );
        SA = S[ 3 ];
        S[ 3 ] = SB;

        SB = S[ 2 ];
        S[ 2 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 12 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 13 ] );
        SA = S[ 1 ];
        S[ 1 ] = SB;

        /* unrolled loop: epilog */
        SB = S[ 0 ];
        S[ 0 ] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 14 ] );
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 15 ] );

        /* unrolled loop: end */
        /* apply gain to excitation signal and add to prediction */
        out32_Q10 = SKP_ADD_SAT32( out32_Q10, SKP_SMULWB( Gain_Q26, in[ k ] ) );

        /* scale to Q0 */
        out32 = SKP_RSHIFT_ROUND( out32_Q10, 10 );

        /* saturate output */
        out[ k ] = ( SKP_int16 )SKP_SAT16( out32 );

        /* move result into delay line */
        S[ 15 ] = SKP_LSHIFT_SAT32( out32_Q10, 4 );
    }
}
/* even order AR filter */
void SKP_Silk_LPC_synthesis_filter(
    const SKP_int16 *in,        /* I:   excitation signal */
    const SKP_int16 *A_Q12,     /* I:   AR coefficients [Order], between -8_Q0 and 8_Q0 */
    const SKP_int32 Gain_Q26,   /* I:   gain */
    SKP_int32 *S,               /* I/O: state vector [Order] */
    SKP_int16 *out,             /* O:   output signal */
    const SKP_int32 len,        /* I:   signal length */
    const SKP_int Order         /* I:   filter order, must be even */
)
{
    SKP_int   k, j, idx, Order_half = SKP_RSHIFT( Order, 1 );
    SKP_int32 SA, SB, out32_Q10, out32;
#if !defined(_SYSTEM_IS_BIG_ENDIAN)
    SKP_int32 Atmp, A_align_Q12[ SKP_Silk_MAX_ORDER_LPC >> 1 ];

    /* combine two A_Q12 values and ensure 32-bit alignment */
    for( k = 0; k < Order_half; k++ ) {
        idx = SKP_SMULBB( 2, k );
        A_align_Q12[ k ] = ( ( ( SKP_int32 )A_Q12[ idx ] ) & 0x0000ffff ) | SKP_LSHIFT( ( SKP_int32 )A_Q12[ idx + 1 ], 16 );
    }
#endif

    /* Order must be even */
    SKP_assert( 2 * Order_half == Order );

    /* S[] values are in Q14 */
    for( k = 0; k < len; k++ ) {
        SA = S[ Order - 1 ];
        out32_Q10 = 0;
        for( j = 0; j < ( Order_half - 1 ); j++ ) {
            idx = SKP_SMULBB( 2, j ) + 1;
#if !defined(_SYSTEM_IS_BIG_ENDIAN)
            /* multiply-add two prediction coefficients for each loop */
            /* NOTE: the code below loads two int16 values in an int32, and multiplies each using the   */
            /* SMLAWB and SMLAWT instructions. On a big-endian CPU the two int16 variables would be     */
            /* loaded in reverse order and the code will give the wrong result. In that case swapping   */
            /* the SMLAWB and SMLAWT instructions should solve the problem.                             */
            Atmp = A_align_Q12[ j ];
            SB = S[ Order - 1 - idx ];
            S[ Order - 1 - idx ] = SA;
            out32_Q10 = SKP_SMLAWB( out32_Q10, SA, Atmp );
            out32_Q10 = SKP_SMLAWT( out32_Q10, SB, Atmp );
            SA = S[ Order - 2 - idx ];
            S[ Order - 2 - idx ] = SB;
#else
            SB = S[ Order - 1 - idx ];
            S[ Order - 1 - idx ] = SA;
            out32_Q10 = SKP_SMLAWB( out32_Q10, SA, A_Q12[ ( j << 1 ) ] );
            out32_Q10 = SKP_SMLAWB( out32_Q10, SB, A_Q12[ ( j << 1 ) + 1 ] );
            SA = S[ Order - 2 - idx ];
            S[ Order - 2 - idx ] = SB;
#endif
        }

#if !defined(_SYSTEM_IS_BIG_ENDIAN)
        /* unrolled loop: epilog */
        Atmp = A_align_Q12[ Order_half - 1 ];
        SB = S[ 0 ];
        S[ 0 ] = SA;
        out32_Q10 = SKP_SMLAWB( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT( out32_Q10, SB, Atmp );
#else
        /* unrolled loop: epilog */
        SB = S[ 0 ];
        S[ 0 ] = SA;
        out32_Q10 = SKP_SMLAWB( out32_Q10, SA, A_Q12[ Order - 2 ] );
        out32_Q10 = SKP_SMLAWB( out32_Q10, SB, A_Q12[ Order - 1 ] );
#endif
        /* apply gain to excitation signal and add to prediction */
        out32_Q10 = SKP_ADD_SAT32( out32_Q10, SKP_SMULWB( Gain_Q26, in[ k ] ) );

        /* scale to Q0 */
        out32 = SKP_RSHIFT_ROUND( out32_Q10, 10 );

        /* saturate output */
        out[ k ] = ( SKP_int16 )SKP_SAT16( out32 );

        /* move result into delay line */
        S[ Order - 1 ] = SKP_LSHIFT_SAT32( out32_Q10, 4 );
    }
}
void SKP_Silk_noise_shape_analysis_FIX(
    SKP_Silk_encoder_state_FIX      *psEnc,         /* I/O  Encoder state FIX                           */
    SKP_Silk_encoder_control_FIX    *psEncCtrl,     /* I/O  Encoder control FIX                         */
    const SKP_int16                 *pitch_res,     /* I    LPC residual from pitch analysis            */
    const SKP_int16                 *x              /* I    Input signal [ frame_length + la_shape ]    */
)
{
    SKP_Silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
    SKP_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;
    SKP_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
    SKP_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
    SKP_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
    SKP_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];
    SKP_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];
    SKP_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];
    SKP_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];
    SKP_int16   x_windowed[    SHAPE_LPC_WIN_MAX ];
    const SKP_int16 *x_ptr, *pitch_res_ptr;

    SKP_int32   sqrt_nrg[ NB_SUBFR ], Qnrg_vec[ NB_SUBFR ];

    /* Point to start of first LPC analysis block */
    x_ptr = x - psEnc->sCmn.la_shape;

    /****************/
    /* CONTROL SNR  */
    /****************/
    /* Reduce SNR_dB values if recent bitstream has exceeded TargetRate */
    psEncCtrl->current_SNR_dB_Q7 = psEnc->SNR_dB_Q7 - SKP_SMULWB( SKP_LSHIFT( ( SKP_int32 )psEnc->BufferedInChannel_ms, 7 ),
        SKP_FIX_CONST( 0.05, 16 ) );

    /* Reduce SNR_dB if inband FEC used */
    if( psEnc->speech_activity_Q8 > SKP_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
        psEncCtrl->current_SNR_dB_Q7 -= SKP_RSHIFT( psEnc->inBandFEC_SNR_comp_Q8, 1 );
    }

    /****************/
    /* GAIN CONTROL */
    /****************/
    /* Input quality is the average of the quality in the lowest two VAD bands */
    psEncCtrl->input_quality_Q14 = ( SKP_int )SKP_RSHIFT( ( SKP_int32 )psEncCtrl->input_quality_bands_Q15[ 0 ]
        + psEncCtrl->input_quality_bands_Q15[ 1 ], 2 );

    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
    psEncCtrl->coding_quality_Q14 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_RSHIFT_ROUND( psEncCtrl->current_SNR_dB_Q7 -
        SKP_FIX_CONST( 18.0, 7 ), 4 ) ), 1 );

    /* Reduce coding SNR during low speech activity */
    b_Q8 = SKP_FIX_CONST( 1.0, 8 ) - psEnc->speech_activity_Q8;
    b_Q8 = SKP_SMULWB( SKP_LSHIFT( b_Q8, 8 ), b_Q8 );
    SNR_adj_dB_Q7 = SKP_SMLAWB( psEncCtrl->current_SNR_dB_Q7,
        SKP_SMULBB( SKP_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       // Q11
        SKP_SMULWB( SKP_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     // Q12

    if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) {
        /* Reduce gains for periodic signals */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );
    } else {
        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7,
            SKP_SMLAWB( SKP_FIX_CONST( 6.0, 9 ), -SKP_FIX_CONST( 0.4, 18 ), psEncCtrl->current_SNR_dB_Q7 ),
            SKP_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );
    }

    /*************************/
    /* SPARSENESS PROCESSING */
    /*************************/
    /* Set quantizer offset */
    if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) {
        /* Initally set to 0; may be overruled in process_gains(..) */
        psEncCtrl->sCmn.QuantOffsetType = 0;
        psEncCtrl->sparseness_Q8 = 0;
    } else {
        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
        nSamples = SKP_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
        energy_variation_Q7 = 0;
        log_energy_prev_Q7  = 0;
        pitch_res_ptr = pitch_res;
        for( k = 0; k < FRAME_LENGTH_MS / 2; k++ ) {
            SKP_Silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
            nrg += SKP_RSHIFT( nSamples, scale );           // Q(-scale)

            log_energy_Q7 = SKP_Silk_lin2log( nrg );
            if( k > 0 ) {
                energy_variation_Q7 += SKP_abs( log_energy_Q7 - log_energy_prev_Q7 );
            }
            log_energy_prev_Q7 = log_energy_Q7;
            pitch_res_ptr += nSamples;
        }

        psEncCtrl->sparseness_Q8 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_SMULWB( energy_variation_Q7 -
            SKP_FIX_CONST( 5.0, 7 ), SKP_FIX_CONST( 0.1, 16 ) ) ), 7 );

        /* Set quantization offset depending on sparseness measure */
        if( psEncCtrl->sparseness_Q8 > SKP_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {
            psEncCtrl->sCmn.QuantOffsetType = 0;
        } else {
            psEncCtrl->sCmn.QuantOffsetType = 1;
        }

        /* Increase coding SNR for sparse signals */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SKP_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SKP_FIX_CONST( 0.5, 8 ) );
    }

    /*******************************/
    /* Control bandwidth expansion */
    /*******************************/
    /* More BWE for signals with high prediction gain */
    strength_Q16 = SKP_SMULWB( psEncCtrl->predGain_Q16, SKP_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );
    BWExp1_Q16 = BWExp2_Q16 = SKP_DIV32_varQ( SKP_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),
        SKP_SMLAWW( SKP_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );
    delta_Q16  = SKP_SMULWB( SKP_FIX_CONST( 1.0, 16 ) - SKP_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),
        SKP_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );
    BWExp1_Q16 = SKP_SUB32( BWExp1_Q16, delta_Q16 );
    BWExp2_Q16 = SKP_ADD32( BWExp2_Q16, delta_Q16 );
    /* BWExp1 will be applied after BWExp2, so make it relative */
    BWExp1_Q16 = SKP_DIV32_16( SKP_LSHIFT( BWExp1_Q16, 14 ), SKP_RSHIFT( BWExp2_Q16, 2 ) );

    if( psEnc->sCmn.warping_Q16 > 0 ) {
        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */
        warping_Q16 = SKP_SMLAWB( psEnc->sCmn.warping_Q16, psEncCtrl->coding_quality_Q14, SKP_FIX_CONST( 0.01, 18 ) );
    } else {
        warping_Q16 = 0;
    }

    /********************************************/
    /* Compute noise shaping AR coefs and gains */
    /********************************************/
    for( k = 0; k < NB_SUBFR; k++ ) {
        /* Apply window: sine slope followed by flat part followed by cosine slope */
        SKP_int shift, slope_part, flat_part;
        flat_part = psEnc->sCmn.fs_kHz * 5;
        slope_part = SKP_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );

        SKP_Silk_apply_sine_window_new( x_windowed, x_ptr, 1, slope_part );
        shift = slope_part;
        SKP_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(SKP_int16) );
        shift += flat_part;
        SKP_Silk_apply_sine_window_new( x_windowed + shift, x_ptr + shift, 2, slope_part );

        /* Update pointer: next LPC analysis block */
        x_ptr += psEnc->sCmn.subfr_length;

        if( psEnc->sCmn.warping_Q16 > 0 ) {
            /* Calculate warped auto correlation */
            SKP_Silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
        } else {
            /* Calculate regular auto correlation */
            SKP_Silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
        }

        /* Add white noise, as a fraction of energy */
        auto_corr[0] = SKP_ADD32( auto_corr[0], SKP_max_32( SKP_SMULWB( SKP_RSHIFT( auto_corr[ 0 ], 4 ),
            SKP_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );

        /* Calculate the reflection coefficients using schur */
        nrg = SKP_Silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
        SKP_assert( nrg >= 0 );

        /* Convert reflection coefficients to prediction coefficients */
        SKP_Silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );

        Qnrg = -scale;          // range: -12...30
        SKP_assert( Qnrg >= -12 );
        SKP_assert( Qnrg <=  30 );

        /* Make sure that Qnrg is an even number */
        if( Qnrg & 1 ) {
            Qnrg -= 1;
            nrg >>= 1;
        }

        tmp32 = SKP_Silk_SQRT_APPROX( nrg );
        Qnrg >>= 1;             // range: -6...15

        sqrt_nrg[ k ] = tmp32;
        Qnrg_vec[ k ] = Qnrg;

        psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( tmp32, 16 - Qnrg );

        if( psEnc->sCmn.warping_Q16 > 0 ) {
            /* Adjust gain for warping */
            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );
            SKP_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );
            psEncCtrl->Gains_Q16[ k ] = SKP_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );
            if( psEncCtrl->Gains_Q16[ k ] < 0 ) {
                psEncCtrl->Gains_Q16[ k ] = SKP_int32_MAX;
            }
        }

        /* Bandwidth expansion for synthesis filter shaping */
        SKP_Silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );

        /* Compute noise shaping filter coefficients */
        SKP_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( SKP_int32 ) );

        /* Bandwidth expansion for analysis filter shaping */
        SKP_assert( BWExp1_Q16 <= SKP_FIX_CONST( 1.0, 16 ) );
        SKP_Silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );

        /* Ratio of prediction gains, in energy domain */
        SKP_Silk_LPC_inverse_pred_gain_Q24( &pre_nrg_Q30, AR2_Q24, psEnc->sCmn.shapingLPCOrder );
        SKP_Silk_LPC_inverse_pred_gain_Q24( &nrg,         AR1_Q24, psEnc->sCmn.shapingLPCOrder );

        //psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;
        pre_nrg_Q30 = SKP_LSHIFT32( SKP_SMULWB( pre_nrg_Q30, SKP_FIX_CONST( 0.7, 15 ) ), 1 );
        psEncCtrl->GainsPre_Q14[ k ] = ( SKP_int ) SKP_FIX_CONST( 0.3, 14 ) + SKP_DIV32_varQ( pre_nrg_Q30, nrg, 14 );

        /* Convert to monic warped prediction coefficients and limit absolute values */
        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SKP_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );

        /* Convert from Q24 to Q13 and store in int16 */
        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {
            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );
            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );
        }
    }
void SKP_Silk_noise_shape_analysis_FIX(
    SKP_Silk_encoder_state_FIX      *psEnc,         /* I/O  Encoder state FIX                           */
    SKP_Silk_encoder_control_FIX    *psEncCtrl,     /* I/O  Encoder control FIX                         */
    const SKP_int16                 *pitch_res,     /* I    LPC residual from pitch analysis            */
    const SKP_int16                 *x              /* I    Input signal [ frame_length + la_shape ]    */
)
{
    SKP_Silk_shape_state_FIX *psShapeSt = &psEnc->sShape;
    SKP_int     k, nSamples, lz, Qnrg, b_Q14, scale = 0, sz;
    SKP_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;
    SKP_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;
    SKP_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;
    SKP_int32   auto_corr[     SHAPE_LPC_ORDER_MAX + 1 ];
    SKP_int32   refl_coef_Q16[ SHAPE_LPC_ORDER_MAX ];
    SKP_int32   AR_Q24[        SHAPE_LPC_ORDER_MAX ];
    SKP_int16   x_windowed[    SHAPE_LPC_WIN_MAX ];
    const SKP_int16 *x_ptr, *pitch_res_ptr;

    SKP_int32   sqrt_nrg[ NB_SUBFR ], Qnrg_vec[ NB_SUBFR ];

    /* Point to start of first LPC analysis block */
    x_ptr = x + psEnc->sCmn.la_shape - SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz ) + psEnc->sCmn.subfr_length;

    /****************/
    /* CONTROL SNR  */
    /****************/
    /* Reduce SNR_dB values if recent bitstream has exceeded TargetRate */
    psEncCtrl->current_SNR_dB_Q7 = psEnc->SNR_dB_Q7 - SKP_SMULWB( SKP_LSHIFT( ( SKP_int32 )psEnc->BufferedInChannel_ms, 7 ), 3277 );

    /* Reduce SNR_dB if inband FEC used */
    if( psEnc->speech_activity_Q8 > LBRR_SPEECH_ACTIVITY_THRES_Q8 ) {
        psEncCtrl->current_SNR_dB_Q7 -= SKP_RSHIFT( psEnc->inBandFEC_SNR_comp_Q8, 1 );
    }

    /****************/
    /* GAIN CONTROL */
    /****************/
    /* Input quality is the average of the quality in the lowest two VAD bands */
    psEncCtrl->input_quality_Q14 = ( SKP_int )SKP_RSHIFT( ( SKP_int32 )psEncCtrl->input_quality_bands_Q15[ 0 ] 
        + psEncCtrl->input_quality_bands_Q15[ 1 ], 2 );
    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */
    psEncCtrl->coding_quality_Q14 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_RSHIFT_ROUND( psEncCtrl->current_SNR_dB_Q7 - ( 18 << 7 ), 4 ) ), 1 );

    /* Reduce coding SNR during low speech activity */
    b_Q8 = ( 1 << 8 ) - psEnc->speech_activity_Q8;
    b_Q8 = SKP_SMULWB( SKP_LSHIFT( b_Q8, 8 ), b_Q8 );
    SNR_adj_dB_Q7 = SKP_SMLAWB( psEncCtrl->current_SNR_dB_Q7,
        SKP_SMULBB( -BG_SNR_DECR_dB_Q7 >> ( 4 + 1 ), b_Q8 ),                                            // Q11
        SKP_SMULWB( ( 1 << 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );      // Q12

    if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) {
        /* Reduce gains for periodic signals */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, HARM_SNR_INCR_dB_Q7 << 1, psEnc->LTPCorr_Q15 );
    } else { 
        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, 
            SKP_SMLAWB( 6 << ( 7 + 2 ), -104856, psEncCtrl->current_SNR_dB_Q7 ),    //-104856_Q18 = -0.4_Q0, Q9
            ( 1 << 14 ) - psEncCtrl->input_quality_Q14 );                           // Q14
    }

    /*************************/
    /* SPARSENESS PROCESSING */
    /*************************/
    /* Set quantizer offset */
    if( psEncCtrl->sCmn.sigtype == SIG_TYPE_VOICED ) {
        /* Initally set to 0; may be overruled in process_gains(..) */
        psEncCtrl->sCmn.QuantOffsetType = 0;
        psEncCtrl->sparseness_Q8 = 0;
    } else {
        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */
        nSamples = SKP_LSHIFT( psEnc->sCmn.fs_kHz, 1 );
        energy_variation_Q7 = 0;
        log_energy_prev_Q7  = 0;
        pitch_res_ptr = pitch_res;
        for( k = 0; k < FRAME_LENGTH_MS / 2; k++ ) {    
            SKP_Silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );
            nrg += SKP_RSHIFT( nSamples, scale );           // Q(-scale)
            
            log_energy_Q7 = SKP_Silk_lin2log( nrg );
            if( k > 0 ) {
                energy_variation_Q7 += SKP_abs( log_energy_Q7 - log_energy_prev_Q7 );
            }
            log_energy_prev_Q7 = log_energy_Q7;
            pitch_res_ptr += nSamples;
        }

        psEncCtrl->sparseness_Q8 = SKP_RSHIFT( SKP_Silk_sigm_Q15( SKP_SMULWB( energy_variation_Q7 - ( 5 << 7 ), 6554 ) ), 7 );    // 6554_Q16 = 0.1_Q0

        /* Set quantization offset depending on sparseness measure */
        if( psEncCtrl->sparseness_Q8 > SPARSENESS_THRESHOLD_QNT_OFFSET_Q8 ) {
            psEncCtrl->sCmn.QuantOffsetType = 0;
        } else {
            psEncCtrl->sCmn.QuantOffsetType = 1;
        }
        
        /* Increase coding SNR for sparse signals */
        SNR_adj_dB_Q7 = SKP_SMLAWB( SNR_adj_dB_Q7, SPARSE_SNR_INCR_dB_Q7 << 8, psEncCtrl->sparseness_Q8 - ( 1 << 7 ) );
    }

    /*******************************/
    /* Control bandwidth expansion */
    /*******************************/
    delta_Q16  = SKP_SMULWB( ( 1 << 16 ) - SKP_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), LOW_RATE_BANDWIDTH_EXPANSION_DELTA_Q16 );
    BWExp1_Q16 = BANDWIDTH_EXPANSION_Q16 - delta_Q16;
    BWExp2_Q16 = BANDWIDTH_EXPANSION_Q16 + delta_Q16;
    if( psEnc->sCmn.fs_kHz == 24 ) {
        /* Less bandwidth expansion for super wideband */
        BWExp1_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp1_Q16 );
        BWExp2_Q16 = ( 1 << 16 ) - SKP_SMULWB( SWB_BANDWIDTH_EXPANSION_REDUCTION_Q16, ( 1 << 16 ) - BWExp2_Q16 );
    }
    /* BWExp1 will be applied after BWExp2, so make it relative */
    BWExp1_Q16 = SKP_DIV32_16( SKP_LSHIFT( BWExp1_Q16, 14 ), SKP_RSHIFT( BWExp2_Q16, 2 ) );

    /********************************************/
    /* Compute noise shaping AR coefs and gains */
    /********************************************/
    sz = ( SKP_int )SKP_SMULBB( SHAPE_LPC_WIN_MS, psEnc->sCmn.fs_kHz );
    for( k = 0; k < NB_SUBFR; k++ ) {
        /* Apply window */
        SKP_Silk_apply_sine_window( x_windowed, x_ptr, 0, SHAPE_LPC_WIN_MS * psEnc->sCmn.fs_kHz );

        /* Update pointer: next LPC analysis block */
        x_ptr += psEnc->sCmn.subfr_length;

        /* Calculate auto correlation */
        SKP_Silk_autocorr( auto_corr, &scale, x_windowed, sz, psEnc->sCmn.shapingLPCOrder + 1 );

        /* Add white noise, as a fraction of energy */
        auto_corr[0] = SKP_ADD32( auto_corr[0], SKP_max_32( SKP_SMULWB( SKP_RSHIFT( auto_corr[ 0 ], 4 ), SHAPE_WHITE_NOISE_FRACTION_Q20 ), 1 ) ); 

        /* Calculate the reflection coefficients using schur */
        nrg = SKP_Silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );

        /* Convert reflection coefficients to prediction coefficients */
        SKP_Silk_k2a_Q16( AR_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );

        /* Bandwidth expansion for synthesis filter shaping */
        SKP_Silk_bwexpander_32( AR_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );

        /* Make sure to fit in Q13 SKP_int16 */
        SKP_Silk_LPC_fit( &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], AR_Q24, 13, psEnc->sCmn.shapingLPCOrder );

        /* Compute noise shaping filter coefficients */
        SKP_memcpy(
            &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], 
            &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], 
            psEnc->sCmn.shapingLPCOrder * sizeof( SKP_int16 ) );

        /* Bandwidth expansion for analysis filter shaping */
        SKP_assert( BWExp1_Q16 <= ( 1 << 16 ) ); // If ever breaking, use LPC_stabilize() in these cases to stay within range
        SKP_Silk_bwexpander( &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );

        /* Increase residual energy */
        nrg = SKP_SMLAWB( nrg, SKP_RSHIFT( auto_corr[ 0 ], 8 ), SHAPE_MIN_ENERGY_RATIO_Q24 );

        Qnrg = -scale;          // range: -12...30
        SKP_assert( Qnrg >= -12 );
        SKP_assert( Qnrg <=  30 );

        /* Make sure that Qnrg is an even number */
        if( Qnrg & 1 ) {
            Qnrg -= 1;
            nrg >>= 1;
        }

        tmp32 = SKP_Silk_SQRT_APPROX( nrg );
        Qnrg >>= 1;             // range: -6...15

        sqrt_nrg[ k ] = tmp32;
        Qnrg_vec[ k ] = Qnrg;

        psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( tmp32, 16 - Qnrg );

        /* Ratio of prediction gains, in energy domain */
        SKP_Silk_LPC_inverse_pred_gain_Q13( &pre_nrg_Q30, &psEncCtrl->AR2_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder );
        SKP_Silk_LPC_inverse_pred_gain_Q13( &nrg,         &psEncCtrl->AR1_Q13[ k * SHAPE_LPC_ORDER_MAX ], psEnc->sCmn.shapingLPCOrder );

        lz = SKP_min_32( SKP_Silk_CLZ32( pre_nrg_Q30 ) - 1, 19 );
        pre_nrg_Q30 = SKP_DIV32( SKP_LSHIFT( pre_nrg_Q30, lz ), SKP_RSHIFT( nrg, 20 - lz ) + 1 ); // Q20
        pre_nrg_Q30 = SKP_RSHIFT( SKP_LSHIFT_SAT32( pre_nrg_Q30, 9 ), 1 );  /* Q28 */
        psEncCtrl->GainsPre_Q14[ k ] = ( SKP_int )SKP_Silk_SQRT_APPROX( pre_nrg_Q30 );
    }
/* 16th order AR filter */
void SKP_Silk_LPC_synthesis_order16(const SKP_int16 *in,          /* I:   excitation signal */
                                      const SKP_int16 *A_Q12,       /* I:   AR coefficients [16], between -8_Q0 and 8_Q0 */
                                      const SKP_int32 Gain_Q26,     /* I:   gain */
                                      SKP_int32 *S,                 /* I/O: state vector [16] */
                                      SKP_int16 *out,               /* O:   output signal */
                                      const SKP_int32 len           /* I:   signal length, must be multiple of 16 */
)
{
    SKP_int   k;
    SKP_int32 SA, SB, Atmp, A_align_Q12[8], out32_Q10, out32;

    /* combine two A_Q12 values and ensure 32-bit alignment */
    for( k = 0; k < 8; k++ ) {
        A_align_Q12[k] = (((SKP_int32)A_Q12[ 2*k ]) & 0x0000ffff) | SKP_LSHIFT( (SKP_int32)A_Q12[ 2*k + 1 ], 16 );
    }

    /* S[] values are in Q14 */
    /* NOTE: the code below loads two int16 values in an int32, and multiplies each using the   */
    /* SMLAWB and SMLAWT instructions. On a big-endian CPU the two int16 variables would be     */
    /* loaded in reverse order and the code will give the wrong result. In that case swapping   */
    /* the SMLAWB and SMLAWT instructions should solve the problem.                             */
    for( k = 0; k < len; k++ ) {
        /* unrolled loop: prolog */
        /* multiply-add two prediction coefficients per iteration */
        SA = S[15];
        Atmp = A_align_Q12[0];
        SB = S[14];
        S[14] = SA;
        out32_Q10 = SKP_SMULWB(                  SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[13];
        S[13] = SB;

        /* unrolled loop: main loop */
        Atmp = A_align_Q12[1];
        SB = S[12];
        S[12] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[11];
        S[11] = SB;

        Atmp = A_align_Q12[2];
        SB = S[10];
        S[10] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[9];
        S[9] = SB;

        Atmp = A_align_Q12[3];
        SB = S[8];
        S[8] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[7];
        S[7] = SB;

        Atmp = A_align_Q12[4];
        SB = S[6];
        S[6] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[5];
        S[5] = SB;

        Atmp = A_align_Q12[5];
        SB = S[4];
        S[4] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[3];
        S[3] = SB;

        Atmp = A_align_Q12[6];
        SB = S[2];
        S[2] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );
        SA = S[1];
        S[1] = SB;

        /* unrolled loop: epilog */
        Atmp = A_align_Q12[7];
        SB = S[0];
        S[0] = SA;
        out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, Atmp );
        out32_Q10 = SKP_SMLAWT_ovflw( out32_Q10, SB, Atmp );

        /* unrolled loop: end */
        /* apply gain to excitation signal and add to prediction */
        out32_Q10 = SKP_ADD_SAT32( out32_Q10, SKP_SMULWB( Gain_Q26, in[k] ) );

        /* scale to Q0 */
        out32 = SKP_RSHIFT_ROUND( out32_Q10, 10 );

        /* saturate output */
        out[k] = (SKP_int16)SKP_SAT16( out32 );

        /* move result into delay line */
        S[15] = SKP_LSHIFT_SAT32( out32_Q10, 4 );
    }
}
                ResNrgPart = SKP_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] );
            }
        }
        gain = psEncCtrl->Gains_Q16[ k ];
        gain_squared = SKP_ADD_SAT32( ResNrgPart, SKP_SMMUL( gain, gain ) );
        if( gain_squared < SKP_int16_MAX ) {
            /* recalculate with higher precision */
            gain_squared = SKP_SMLAWW( SKP_LSHIFT( ResNrgPart, 16 ), gain, gain );
            SKP_assert( gain_squared > 0 );
            gain = silk_SQRT_APPROX( gain_squared );                    /* Q8   */
            gain = SKP_min( gain, SKP_int32_MAX >> 8 );
            psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( gain, 8 );        /* Q16  */
        } else {
            gain = silk_SQRT_APPROX( gain_squared );                    /* Q0   */
            gain = SKP_min( gain, SKP_int32_MAX >> 16 );
            psEncCtrl->Gains_Q16[ k ] = SKP_LSHIFT_SAT32( gain, 16 );       /* Q16  */
        }
    }

    /* Noise shaping quantization */
    silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16,
                      &psShapeSt->LastGainIndex, psEnc->sCmn.nFramesEncoded, psEnc->sCmn.nb_subfr );

    /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */
    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
        if( psEncCtrl->LTPredCodGain_Q7 + SKP_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) {
            psEnc->sCmn.indices.quantOffsetType = 0;
        } else {
            psEnc->sCmn.indices.quantOffsetType = 1;
        }
    }