/* Autocorrelations for a warped frequency axis */
void silk_warped_autocorrelation_FIX(
          opus_int32                 *corr,              /* O    Result [order + 1]                      */
          opus_int                   *scale,             /* O    Scaling of the correlation vector       */
    const opus_int16                 *input,             /* I    Input data to correlate                 */
    const opus_int                   warping_Q16,        /* I    Warping coefficient                     */
    const opus_int                   length,             /* I    Length of input                         */
    const opus_int                   order               /* I    Correlation order (even)                */
)
{
    opus_int   n, i, lsh;
    opus_int32 tmp1_QS, tmp2_QS;
    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };
    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };

    /* Order must be even */
    SKP_assert( ( order & 1 ) == 0 );
    SKP_assert( 2 * QS - QC >= 0 );

    /* Loop over samples */
    for( n = 0; n < length; n++ ) {
        tmp1_QS = SKP_LSHIFT32( ( opus_int32 )input[ n ], QS );
        /* Loop over allpass sections */
        for( i = 0; i < order; i += 2 ) {
            /* Output of allpass section */
            tmp2_QS = SKP_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );
            state_QS[ i ]  = tmp1_QS;
            corr_QC[  i ] += SKP_RSHIFT64( SKP_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
            /* Output of allpass section */
            tmp1_QS = SKP_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );
            state_QS[ i + 1 ]  = tmp2_QS;
            corr_QC[  i + 1 ] += SKP_RSHIFT64( SKP_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC );
        }
        state_QS[ order ] = tmp1_QS;
        corr_QC[  order ] += SKP_RSHIFT64( SKP_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC );
    }

    lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35;
    lsh = SKP_LIMIT( lsh, -12 - QC, 30 - QC );
    *scale = -( QC + lsh ); 
    SKP_assert( *scale >= -30 && *scale <= 12 );
    if( lsh >= 0 ) {
        for( i = 0; i < order + 1; i++ ) {
            corr[ i ] = ( opus_int32 )SKP_CHECK_FIT32( SKP_LSHIFT64( corr_QC[ i ], lsh ) );
        }
    } else {
        for( i = 0; i < order + 1; i++ ) {
            corr[ i ] = ( opus_int32 )SKP_CHECK_FIT32( SKP_RSHIFT64( corr_QC[ i ], -lsh ) );
        }    
    }
    SKP_assert( corr_QC[ 0 ] >= 0 ); // If breaking, decrease QC
}
void SKP_Silk_LTP_analysis_filter_FIX(
    SKP_int16       *LTP_res,                           /* O:   LTP residual signal of length NB_SUBFR * ( pre_length + subfr_length )  */
    const SKP_int16 *x,                                 /* I:   Pointer to input signal with at least max( pitchL ) preceeding samples  */
    const SKP_int16 LTPCoef_Q14[ LTP_ORDER * NB_SUBFR ],/* I:   LTP_ORDER LTP coefficients for each NB_SUBFR subframe                   */
    const SKP_int   pitchL[ NB_SUBFR ],                 /* I:   Pitch lag, one for each subframe                                        */
    const SKP_int32 invGains_Qxx[ NB_SUBFR ],           /* I:   Inverse quantization gains, one for each subframe                       */
    const SKP_int   Qxx,                                /* I:   Inverse quantization gains Q domain                                     */
    const SKP_int   subfr_length,                       /* I:   Length of each subframe                                                 */
    const SKP_int   pre_length                          /* I:   Length of the preceeding samples starting at &x[0] for each subframe    */
)
{
    const SKP_int16 *x_ptr, *x_lag_ptr;
    SKP_int16   Btmp_Q14[ LTP_ORDER ];
    SKP_int16   *LTP_res_ptr;
    SKP_int     k, i, j;
    SKP_int32   LTP_est;

    x_ptr = x;
    LTP_res_ptr = LTP_res;
    for( k = 0; k < NB_SUBFR; k++ ) {

        x_lag_ptr = x_ptr - pitchL[ k ];
        for( i = 0; i < LTP_ORDER; i++ ) {
            Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
        }

        /* LTP analysis FIR filter */
        for( i = 0; i < subfr_length + pre_length; i++ ) {
            LTP_res_ptr[ i ] = x_ptr[ i ];
            
            /* Long-term prediction */
            LTP_est = SKP_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
            for( j = 1; j < LTP_ORDER; j++ ) {
                LTP_est = SKP_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
            }
            LTP_est = SKP_RSHIFT_ROUND( LTP_est, 14 ); // round and -> Q0

            /* Subtract long-term prediction */
            LTP_res_ptr[ i ] = ( SKP_int16 )SKP_SAT16( ( SKP_int32 )x_ptr[ i ] - LTP_est );

            /* Scale residual */
            if( Qxx == 16 ) {
                LTP_res_ptr[ i ] = SKP_SMULWB( invGains_Qxx[ k ], LTP_res_ptr[ i ] );
            } else {
                LTP_res_ptr[ i ] = ( SKP_int16 )SKP_CHECK_FIT16( SKP_RSHIFT64( SKP_SMULL( invGains_Qxx[ k ], LTP_res_ptr[ i ] ), Qxx ) );
            }

            x_lag_ptr++;
        }

        /* Update pointers */
        LTP_res_ptr += subfr_length + pre_length; 
        x_ptr       += subfr_length;
    }
}
/* Compute autocorrelation */
void SKP_Silk_autocorr( 
    SKP_int32        *results,                   /* O    Result (length correlationCount)            */
    SKP_int          *scale,                     /* O    Scaling of the correlation vector           */
    const SKP_int16  *inputData,                 /* I    Input data to correlate                     */
    const SKP_int    inputDataSize,              /* I    Length of input                             */
    const SKP_int    correlationCount            /* I    Number of correlation taps to compute       */
)
{
    SKP_int   i, lz, nRightShifts, corrCount;
    SKP_int64 corr64;

    corrCount = SKP_min_int( inputDataSize, correlationCount );

    /* compute energy (zero-lag correlation) */
    corr64 = SKP_Silk_inner_prod16_aligned_64( inputData, inputData, inputDataSize );

    /* deal with all-zero input data */
    corr64 += 1;

    /* number of leading zeros */
    lz = SKP_Silk_CLZ64( corr64 );

    /* scaling: number of right shifts applied to correlations */
    nRightShifts = 35 - lz;
    *scale = nRightShifts;

    if( nRightShifts <= 0 ) {
        results[ 0 ] = SKP_LSHIFT( (SKP_int32)SKP_CHECK_FIT32( corr64 ), -nRightShifts );

        /* compute remaining correlations based on int32 inner product */
          for( i = 1; i < corrCount; i++ ) {
            results[ i ] = SKP_LSHIFT( SKP_Silk_inner_prod_aligned( inputData, inputData + i, inputDataSize - i ), -nRightShifts );
        }
    } else {
        results[ 0 ] = (SKP_int32)SKP_CHECK_FIT32( SKP_RSHIFT64( corr64, nRightShifts ) );

        /* compute remaining correlations based on int64 inner product */
          for( i = 1; i < corrCount; i++ ) {
            results[ i ] =  (SKP_int32)SKP_CHECK_FIT32( SKP_RSHIFT64( SKP_Silk_inner_prod16_aligned_64( inputData, inputData + i, inputDataSize - i ), nRightShifts ) );
        }
    }
}
/* Multiply a vector by a constant */
void SKP_Silk_scale_vector32_Q26_lshift_18( 
    SKP_int32           *data1,                     /* (I/O): Q0/Q18        */
    SKP_int32           gain_Q26,                   /* (I):   Q26           */
    SKP_int             dataSize                    /* (I):   length        */
)
{
    SKP_int  i;

    for( i = 0; i < dataSize; i++ ) {
        data1[ i ] = (SKP_int32)SKP_CHECK_FIT32( SKP_RSHIFT64( SKP_SMULL( data1[ i ], gain_Q26 ), 8 ) );// OUTPUT: Q18
    }
}
/* Compute reflection coefficients from input signal */
void SKP_Silk_burg_modified(
    SKP_int32       *res_nrg,           /* O    residual energy                                                 */
    SKP_int         *res_nrg_Q,         /* O    residual energy Q value                                         */
    SKP_int32       A_Q16[],            /* O    prediction coefficients (length order)                          */
    const SKP_int16 x[],                /* I    input signal, length: nb_subfr * ( D + subfr_length )           */
    const SKP_int   subfr_length,       /* I    input signal subframe length (including D preceeding samples)   */
    const SKP_int   nb_subfr,           /* I    number of subframes stacked in x                                */
    const SKP_int32 WhiteNoiseFrac_Q32, /* I    fraction added to zero-lag autocorrelation                      */
    const SKP_int   D                   /* I    order                                                           */
)
{
    SKP_int         k, n, s, lz, rshifts, rshifts_extra;
    SKP_int32       C0, num, nrg, rc_Q31, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
    const SKP_int16 *x_ptr;

    SKP_int32       C_first_row[ SKP_Silk_MAX_ORDER_LPC ];
    SKP_int32       C_last_row[  SKP_Silk_MAX_ORDER_LPC ];
    SKP_int32       Af_QA[       SKP_Silk_MAX_ORDER_LPC ];

    SKP_int32       CAf[ SKP_Silk_MAX_ORDER_LPC + 1 ];
    SKP_int32       CAb[ SKP_Silk_MAX_ORDER_LPC + 1 ];

    SKP_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
    SKP_assert( nb_subfr <= MAX_NB_SUBFR );


    /* Compute autocorrelations, added over subframes */
    SKP_Silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
    if( rshifts > MAX_RSHIFTS ) {
        C0 = SKP_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
        SKP_assert( C0 > 0 );
        rshifts = MAX_RSHIFTS;
    } else {
        lz = SKP_Silk_CLZ32( C0 ) - 1;
        rshifts_extra = N_BITS_HEAD_ROOM - lz;
        if( rshifts_extra > 0 ) {
            rshifts_extra = SKP_min( rshifts_extra, MAX_RSHIFTS - rshifts );
            C0 = SKP_RSHIFT32( C0, rshifts_extra );
        } else {
            rshifts_extra = SKP_max( rshifts_extra, MIN_RSHIFTS - rshifts );
            C0 = SKP_LSHIFT32( C0, -rshifts_extra );
        }
        rshifts += rshifts_extra;
    }
    SKP_memset( C_first_row, 0, SKP_Silk_MAX_ORDER_LPC * sizeof( SKP_int32 ) );
    if( rshifts > 0 ) {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += (SKP_int32)SKP_RSHIFT64( 
                    SKP_Silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
            }
        }
    } else {
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            for( n = 1; n < D + 1; n++ ) {
                C_first_row[ n - 1 ] += SKP_LSHIFT32( 
                    SKP_Silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts );
            }
        }
    }
    SKP_memcpy( C_last_row, C_first_row, SKP_Silk_MAX_ORDER_LPC * sizeof( SKP_int32 ) );
    
    /* Initialize */
    CAb[ 0 ] = CAf[ 0 ] = C0 + SKP_SMMUL( WhiteNoiseFrac_Q32, C0 ) + 1;         // Q(-rshifts)

    for( n = 0; n < D; n++ ) {
        /* Update first row of correlation matrix (without first element) */
        /* Update last row of correlation matrix (without last element, stored in reversed order) */
        /* Update C * Af */
        /* Update C * flipud(Af) (stored in reversed order) */
        if( rshifts > -2 ) {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -SKP_LSHIFT32( (SKP_int32)x_ptr[ n ],                    16 - rshifts );      // Q(16-rshifts)
                x2  = -SKP_LSHIFT32( (SKP_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );      // Q(16-rshifts)
                tmp1 = SKP_LSHIFT32( (SKP_int32)x_ptr[ n ],                    QA - 16 );           // Q(QA-16)
                tmp2 = SKP_LSHIFT32( (SKP_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );           // Q(QA-16)
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = SKP_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); // Q( -rshifts )
                    C_last_row[ k ]  = SKP_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); // Q( -rshifts )
                    Atmp_QA = Af_QA[ k ];
                    tmp1 = SKP_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );              // Q(QA-16)
                    tmp2 = SKP_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );              // Q(QA-16)
                }
                tmp1 = SKP_LSHIFT32( -tmp1, 32 - QA - rshifts );                                    // Q(16-rshifts)
                tmp2 = SKP_LSHIFT32( -tmp2, 32 - QA - rshifts );                                    // Q(16-rshifts)
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = SKP_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );     // Q( -rshift )
                    CAb[ k ] = SKP_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );     // Q( -rshift )
                }
            }
        } else {
            for( s = 0; s < nb_subfr; s++ ) {
                x_ptr = x + s * subfr_length;
                x1  = -SKP_LSHIFT32( (SKP_int32)x_ptr[ n ],                    -rshifts );          // Q( -rshifts )
                x2  = -SKP_LSHIFT32( (SKP_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );          // Q( -rshifts )
                tmp1 = SKP_LSHIFT32( (SKP_int32)x_ptr[ n ],                    17 );                // Q17
                tmp2 = SKP_LSHIFT32( (SKP_int32)x_ptr[ subfr_length - n - 1 ], 17 );                // Q17
                for( k = 0; k < n; k++ ) {
                    C_first_row[ k ] = SKP_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); // Q( -rshifts )
                    C_last_row[ k ]  = SKP_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); // Q( -rshifts )
                    Atmp1 = SKP_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                // Q17
                    tmp1 = SKP_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                   // Q17
                    tmp2 = SKP_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                   // Q17
                }
                tmp1 = -tmp1;                                                                       // Q17
                tmp2 = -tmp2;                                                                       // Q17
                for( k = 0; k <= n; k++ ) {
                    CAf[ k ] = SKP_SMLAWW( CAf[ k ], tmp1, 
                        SKP_LSHIFT32( (SKP_int32)x_ptr[ n - k ], -rshifts - 1 ) );                  // Q( -rshift )
                    CAb[ k ] = SKP_SMLAWW( CAb[ k ], tmp2, 
                        SKP_LSHIFT32( (SKP_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) );// Q( -rshift )
                }
            }
        }

        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
        tmp1 = C_first_row[ n ];                                                            // Q( -rshifts )
        tmp2 = C_last_row[ n ];                                                             // Q( -rshifts )
        num  = 0;                                                                           // Q( -rshifts )
        nrg  = SKP_ADD32( CAb[ 0 ], CAf[ 0 ] );                                             // Q( 1-rshifts )
        for( k = 0; k < n; k++ ) {
            Atmp_QA = Af_QA[ k ];
            lz = SKP_Silk_CLZ32( SKP_abs( Atmp_QA ) ) - 1;
            lz = SKP_min( 32 - QA, lz );
            Atmp1 = SKP_LSHIFT32( Atmp_QA, lz );                                            // Q( QA + lz )

            tmp1 = SKP_ADD_LSHIFT32( tmp1, SKP_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );    // Q( -rshifts )
            tmp2 = SKP_ADD_LSHIFT32( tmp2, SKP_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );    // Q( -rshifts )
            num  = SKP_ADD_LSHIFT32( num,  SKP_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );    // Q( -rshifts )
            nrg  = SKP_ADD_LSHIFT32( nrg,  SKP_SMMUL( SKP_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), 
                                                                                Atmp1 ), 32 - QA - lz );    // Q( 1-rshifts )
        }
        CAf[ n + 1 ] = tmp1;                                                                // Q( -rshifts )
        CAb[ n + 1 ] = tmp2;                                                                // Q( -rshifts )
        num = SKP_ADD32( num, tmp2 );                                                       // Q( -rshifts )
        num = SKP_LSHIFT32( -num, 1 );                                                      // Q( 1-rshifts )

        /* Calculate the next order reflection (parcor) coefficient */
        if( SKP_abs( num ) < nrg ) {
            rc_Q31 = SKP_DIV32_varQ( num, nrg, 31 );
        } else {
            /* Negative energy or ratio too high; set remaining coefficients to zero and exit loop */
            SKP_memset( &Af_QA[ n ], 0, ( D - n ) * sizeof( SKP_int32 ) );
            SKP_assert( 0 );
            break;
        }

        /* Update the AR coefficients */
        for( k = 0; k < (n + 1) >> 1; k++ ) {
            tmp1 = Af_QA[ k ];                                                              // QA
            tmp2 = Af_QA[ n - k - 1 ];                                                      // QA
            Af_QA[ k ]         = SKP_ADD_LSHIFT32( tmp1, SKP_SMMUL( tmp2, rc_Q31 ), 1 );    // QA
            Af_QA[ n - k - 1 ] = SKP_ADD_LSHIFT32( tmp2, SKP_SMMUL( tmp1, rc_Q31 ), 1 );    // QA
        }
        Af_QA[ n ] = SKP_RSHIFT32( rc_Q31, 31 - QA );                                       // QA

        /* Update C * Af and C * Ab */
        for( k = 0; k <= n + 1; k++ ) {
            tmp1 = CAf[ k ];                                                                // Q( -rshifts )
            tmp2 = CAb[ n - k + 1 ];                                                        // Q( -rshifts )
            CAf[ k ]         = SKP_ADD_LSHIFT32( tmp1, SKP_SMMUL( tmp2, rc_Q31 ), 1 );      // Q( -rshifts )
            CAb[ n - k + 1 ] = SKP_ADD_LSHIFT32( tmp2, SKP_SMMUL( tmp1, rc_Q31 ), 1 );      // Q( -rshifts )
        }
    }

    /* Return residual energy */
    nrg  = CAf[ 0 ];                                                                        // Q( -rshifts )
    tmp1 = 1 << 16;                                                                         // Q16
    for( k = 0; k < D; k++ ) {
        Atmp1 = SKP_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                    // Q16
        nrg  = SKP_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                      // Q( -rshifts )
        tmp1 = SKP_SMLAWW( tmp1, Atmp1, Atmp1 );                                            // Q16
        A_Q16[ k ] = -Atmp1;
    }
    *res_nrg = SKP_SMLAWW( nrg, SKP_SMMUL( WhiteNoiseFrac_Q32, C0 ), -tmp1 );               // Q( -rshifts )
    *res_nrg_Q = -rshifts;
}