/* Convert adaptive Mid/Side representation to Left/Right stereo signal */
void silk_stereo_MS_to_LR( 
    stereo_dec_state    *state,                         /* I/O  State                                       */
    opus_int16           x1[],                           /* I/O  Left input signal, becomes mid signal       */
    opus_int16           x2[],                           /* I/O  Right input signal, becomes side signal     */
    const opus_int32     pred_Q13[],                     /* I    Predictors                                  */
    opus_int             fs_kHz,                         /* I    Samples rate (kHz)                          */
    opus_int             frame_length                    /* I    Number of samples                           */
)
{
    opus_int   n, denom_Q16, delta0_Q13, delta1_Q13;
    opus_int32 sum, diff, pred0_Q13, pred1_Q13;

    /* Buffering */
    SKP_memcpy( x1, state->sMid,  2 * sizeof( opus_int16 ) );
    SKP_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) );
    SKP_memcpy( state->sMid,  &x1[ frame_length ], 2 * sizeof( opus_int16 ) );
    SKP_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) );

    /* Interpolate predictors and add prediction to side channel */
    pred0_Q13  = state->pred_prev_Q13[ 0 ];
    pred1_Q13  = state->pred_prev_Q13[ 1 ];
    denom_Q16  = SKP_DIV32_16( 1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
    delta0_Q13 = SKP_RSHIFT_ROUND( SKP_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
    delta1_Q13 = SKP_RSHIFT_ROUND( SKP_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
    for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
        pred0_Q13 += delta0_Q13;
        pred1_Q13 += delta1_Q13;
        sum = SKP_LSHIFT( SKP_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );         /* Q11 */ 
        sum = SKP_SMLAWB( SKP_LSHIFT( ( opus_int32 )x2[ n + 1 ], 8 ), sum, pred0_Q13 );          /* Q8  */
        sum = SKP_SMLAWB( sum, SKP_LSHIFT( ( opus_int32 )x1[ n + 1 ], 11 ), pred1_Q13 );         /* Q8  */
        x2[ n + 1 ] = (opus_int16)SKP_SAT16( SKP_RSHIFT_ROUND( sum, 8 ) );
    }
    pred0_Q13 = pred_Q13[ 0 ];
    pred1_Q13 = pred_Q13[ 1 ];
    for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
        sum = SKP_LSHIFT( SKP_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 );         /* Q11 */ 
        sum = SKP_SMLAWB( SKP_LSHIFT( ( opus_int32 )x2[ n + 1 ], 8 ), sum, pred0_Q13 );          /* Q8  */
        sum = SKP_SMLAWB( sum, SKP_LSHIFT( ( opus_int32 )x1[ n + 1 ], 11 ), pred1_Q13 );         /* Q8  */
        x2[ n + 1 ] = (opus_int16)SKP_SAT16( SKP_RSHIFT_ROUND( sum, 8 ) );
    }
    state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ];
    state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ];

    /* Convert to left/right signals */
    for( n = 0; n < frame_length; n++ ) {
        sum  = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ];
        diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ];
        x1[ n + 1 ] = (opus_int16)SKP_SAT16( sum );
        x2[ n + 1 ] = (opus_int16)SKP_SAT16( diff );
    }
}
/* SKP_Silk_prefilter. Prefilter for finding Quantizer input signal                           */
SKP_INLINE void SKP_Silk_prefilt_FIX(
    SKP_Silk_prefilter_state_FIX *P,                    /* I/O state                          */
    SKP_int32   st_res_Q12[],                           /* I short term residual signal       */
    SKP_int16   xw[],                                   /* O prefiltered signal               */
    SKP_int32   HarmShapeFIRPacked_Q12,                 /* I Harmonic shaping coeficients     */
    SKP_int     Tilt_Q14,                               /* I Tilt shaping coeficient          */
    SKP_int32   LF_shp_Q14,                             /* I Low-frequancy shaping coeficients*/
    SKP_int     lag,                                    /* I Lag for harmonic shaping         */
    SKP_int     length                                  /* I Length of signals                */
)
{
    SKP_int   i, idx, LTP_shp_buf_idx;
    SKP_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;
    SKP_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;
    SKP_int16 *LTP_shp_buf;

    /* To speed up use temp variables instead of using the struct */
    LTP_shp_buf     = P->sLTP_shp;
    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;
    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;
    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;

    for( i = 0; i < length; i++ ) {
        if( lag > 0 ) {
            /* unrolled loop */
            SKP_assert( HARM_SHAPE_FIR_TAPS == 3 );
            idx = lag + LTP_shp_buf_idx;
            n_LTP_Q12 = SKP_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
            n_LTP_Q12 = SKP_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
            n_LTP_Q12 = SKP_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );
        } else {
            n_LTP_Q12 = 0;
        }

        n_Tilt_Q10 = SKP_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );
        n_LF_Q10   = SKP_SMLAWB( SKP_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );

        sLF_AR_shp_Q12 = SKP_SUB32( st_res_Q12[ i ], SKP_LSHIFT( n_Tilt_Q10, 2 ) );
        sLF_MA_shp_Q12 = SKP_SUB32( sLF_AR_shp_Q12,  SKP_LSHIFT( n_LF_Q10,   2 ) );

        LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;
        LTP_shp_buf[ LTP_shp_buf_idx ] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );

        xw[i] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND( SKP_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 12 ) );
    }

    /* Copy temp variable back to state */
    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;
    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;
    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;
}
Exemple #3
0
/* Generates excitation for CNG LPC synthesis */
SKP_INLINE void SKP_Silk_CNG_exc(
    SKP_int16                       residual[],         /* O    CNG residual signal Q0                      */
    SKP_int32                       exc_buf_Q10[],      /* I    Random samples buffer Q10                   */
    SKP_int32                       Gain_Q16,           /* I    Gain to apply                               */
    SKP_int                         length,             /* I    Length                                      */
    SKP_int32                       *rand_seed          /* I/O  Seed to random index generator              */
)
{
    SKP_int32 seed;
    SKP_int   i, idx, exc_mask;

    exc_mask = CNG_BUF_MASK_MAX;
    while( exc_mask > length ) {
        exc_mask = SKP_RSHIFT( exc_mask, 1 );
    }

    seed = *rand_seed;
    for( i = 0; i < length; i++ ) {
        seed = SKP_RAND( seed );
        idx = ( SKP_int )( SKP_RSHIFT( seed, 24 ) & exc_mask );
        SKP_assert( idx >= 0 );
        SKP_assert( idx <= CNG_BUF_MASK_MAX );
        residual[ i ] = ( SKP_int16 )SKP_SAT16( SKP_RSHIFT_ROUND( SKP_SMULWW( exc_buf_Q10[ idx ], Gain_Q16 ), 10 ) );
    }
    *rand_seed = seed;
}
/* Resamples by a factor 3/1 */
void SKP_Silk_resample_3_1(
    SKP_int16           *out,       /* O:   Fs_high signal [inLen*3]        */
    SKP_int32           *S,         /* I/O: State vector   [7]              */
    const SKP_int16     *in,        /* I:   Fs_low signal  [inLen]          */
    const SKP_int32     inLen       /* I:   Input length                    */
)
{
    SKP_int     k, LSubFrameIn, LSubFrameOut;
    SKP_int32   out_tmp, idx, inLenTmp = inLen;
    SKP_int32   scratch00[    IN_SUBFR_LEN_RESAMPLE_3_1 ];
    SKP_int32   scratch0[ 3 * IN_SUBFR_LEN_RESAMPLE_3_1 ];
    SKP_int32   scratch1[ 3 * IN_SUBFR_LEN_RESAMPLE_3_1 ];
    
    /* Coefficients for 3-fold resampling */
    const SKP_int16 A30[ 2 ] = {  1773, 17818 };
    const SKP_int16 A31[ 2 ] = {  4942, 25677 };
    const SKP_int16 A32[ 2 ] = { 11786, 29304 };

    while( inLenTmp > 0 ) {
        LSubFrameIn  = SKP_min_int( IN_SUBFR_LEN_RESAMPLE_3_1, inLenTmp );
        LSubFrameOut = SKP_SMULBB( 3, LSubFrameIn );

        /* Convert Q15 -> Q25 */
        for( k = 0; k < LSubFrameIn; k++ ) {
            scratch00[k] = SKP_LSHIFT( (SKP_int32)in[ k ], 10 );
        }

        /* Allpass filtering */
        /* Scratch size: 2 * 3* LSubFrame * sizeof(SKP_int32) */
        SKP_Silk_allpass_int( scratch00, S + 1, A30[ 0 ], scratch1, LSubFrameIn );
        SKP_Silk_allpass_int( scratch1,  S + 2, A30[ 1 ], scratch0, LSubFrameIn );

        SKP_Silk_allpass_int( scratch00, S + 3, A31[ 0 ], scratch1, LSubFrameIn );
        SKP_Silk_allpass_int( scratch1,  S + 4, A31[ 1 ], scratch0 +     IN_SUBFR_LEN_RESAMPLE_3_1, LSubFrameIn );

        SKP_Silk_allpass_int( scratch00, S + 5, A32[ 0 ], scratch1, LSubFrameIn );
        SKP_Silk_allpass_int( scratch1,  S + 6, A32[ 1 ], scratch0 + 2 * IN_SUBFR_LEN_RESAMPLE_3_1, LSubFrameIn );

        /* Interleave three allpass outputs */
        for( k = 0; k < LSubFrameIn; k++ ) {
            idx = SKP_SMULBB( 3, k );
            scratch1[ idx     ] = scratch0[ k ];
            scratch1[ idx + 1 ] = scratch0[ k +     IN_SUBFR_LEN_RESAMPLE_3_1 ];
            scratch1[ idx + 2 ] = scratch0[ k + 2 * IN_SUBFR_LEN_RESAMPLE_3_1 ];
        }

        /* Low-pass filtering */
        SKP_Silk_lowpass_int( scratch1, S, scratch0, LSubFrameOut );

        /* Saturate and convert to SKP_int16 */
        for( k = 0; k < LSubFrameOut; k++ ) {
            out_tmp  = scratch0[ k ];
            out[ k ] = (SKP_int16) SKP_SAT16( SKP_RSHIFT_ROUND( out_tmp, 10 ) );
        }

        in       += LSubFrameIn;
        inLenTmp -= LSubFrameIn;
        out      += LSubFrameOut;
    }
}
Exemple #5
0
/* Can handle slowly varying filter coefficients */
void SKP_Silk_biquad(
    const SKP_int16      *in,        /* I:    input signal               */
    const SKP_int16      *B,         /* I:    MA coefficients, Q13 [3]   */
    const SKP_int16      *A,         /* I:    AR coefficients, Q13 [2]   */
    SKP_int32            *S,         /* I/O:  state vector [2]           */
    SKP_int16            *out,       /* O:    output signal              */
    const SKP_int32      len         /* I:    signal length              */
)
{
    SKP_int   k, in16;
    SKP_int32 A0_neg, A1_neg, S0, S1, out32, tmp32;

    S0 = S[ 0 ];
    S1 = S[ 1 ];
    A0_neg = -A[ 0 ];
    A1_neg = -A[ 1 ];
    for( k = 0; k < len; k++ ) {
        /* S[ 0 ], S[ 1 ]: Q13 */
        in16  = in[ k ];
        out32 = SKP_SMLABB( S0, in16, B[ 0 ] );

        S0 = SKP_SMLABB( S1, in16, B[ 1 ] );
        S0 += SKP_LSHIFT( SKP_SMULWB( out32, A0_neg ), 3 );

        S1 = SKP_LSHIFT( SKP_SMULWB( out32, A1_neg ), 3 );
        S1 = SKP_SMLABB( S1, in16, B[ 2 ] );
        tmp32    = SKP_RSHIFT_ROUND( out32, 13 ) + 1;
        out[ k ] = (SKP_int16)SKP_SAT16( tmp32 );
    }
    S[ 0 ] = S0;
    S[ 1 ] = S1;
}
Exemple #6
0
/* Variable order MA filter */
void SKP_Silk_MA(
    const SKP_int16      *in,            /* I:   input signal                                */
    const SKP_int16      *B,             /* I:   MA coefficients, Q13 [order+1]              */
    SKP_int32            *S,             /* I/O: state vector [order]                        */
    SKP_int16            *out,           /* O:   output signal                               */
    const SKP_int32      len,            /* I:   signal length                               */
    const SKP_int32      order           /* I:   filter order                                */
)
{
    SKP_int   k, d, in16;
    SKP_int32 out32;
    
    for( k = 0; k < len; k++ ) {
        in16 = in[ k ];
        out32 = SKP_SMLABB( S[ 0 ], in16, B[ 0 ] );
        out32 = SKP_RSHIFT_ROUND( out32, 13 );
        
        for( d = 1; d < order; d++ ) {
            S[ d - 1 ] = SKP_SMLABB( S[ d ], in16, B[ d ] );
        }
        S[ order - 1 ] = SKP_SMULBB( in16, B[ order ] );

        /* Limit */
        out[ k ] = (SKP_int16)SKP_SAT16( out32 );
    }
}
/*
  Variable order MA prediction error filter. Inverse filter of
  SKP_Silk_LPC_synthesis_filter.

  The inlined assembly makes it over 30% faster than the generic C.
*/
void SKP_Silk_LPC_analysis_filter(
    const SKP_int16      *in,            /* I:   Input signal                                */
    const SKP_int16      *B,             /* I:   MA prediction coefficients, Q12 [order]     */
    SKP_int16            *S,             /* I/O: State vector [order]                        */
    SKP_int16            *out,           /* O:   Output signal                               */
    const SKP_int32      len,            /* I:   Signal length                               */
    const SKP_int32      Order           /* I:   Filter order                                */
)
{
	SKP_int  k;
	SKP_int32 out32_Q12, out32;
	/* Order must be even */
	SKP_assert( 2 * Order_half == Order );

	/* S[] values are in Q0 */
	for( k = 0; k < len; k++ ) {
		LPC_ANALYSYS(out32_Q12, S, B, Order);
		/* Subtract prediction */
		out32_Q12 = SKP_SUB_SAT32( SKP_LSHIFT(
			(SKP_int32)in[ k ], 12 ), out32_Q12 );
		/* Scale to Q0 */
		out32 = SKP_RSHIFT_ROUND( out32_Q12, 12 );
		out[ k ] = (SKP_int16)SKP_SAT16( out32 );
		/* Move input line */
		S[ 0 ] = in[ k ];
	}
}
Exemple #8
0
/* Variable order MA prediction error filter */
void SKP_Silk_MA_Prediction(
    const SKP_int16      *in,            /* I:   Input signal                                */
    const SKP_int16      *B,             /* I:   MA prediction coefficients, Q12 [order]     */
    SKP_int32            *S,             /* I/O: State vector [order]                        */
    SKP_int16            *out,           /* O:   Output signal                               */
    const SKP_int32      len,            /* I:   Signal length                               */
    const SKP_int32      order           /* I:   Filter order                                */
)
{
    SKP_int   k, d, in16;
    SKP_int32 out32;

    for( k = 0; k < len; k++ ) {
        in16 = in[ k ];
        out32 = SKP_LSHIFT( in16, 12 ) - S[ 0 ];
        out32 = SKP_RSHIFT_ROUND( out32, 12 );
        
        for( d = 0; d < order - 1; d++ ) {
            S[ d ] = SKP_SMLABB_ovflw( S[ d + 1 ], in16, B[ d ] );
        }
        S[ order - 1 ] = SKP_SMULBB( in16, B[ order - 1 ] );

        /* Limit */
        out[ k ] = (SKP_int16)SKP_SAT16( out32 );
    }
}
SKP_INLINE SKP_int16 *SKP_Silk_resampler_private_down_FIR_INTERPOL1(
	SKP_int16 *out, SKP_int32 *buf2, const SKP_int16 *FIR_Coefs, SKP_int32 max_index_Q16, SKP_int32 index_increment_Q16, SKP_int32 FIR_Fracs){
	
	SKP_int32 index_Q16, res_Q6;
	SKP_int32 *buf_ptr;
	SKP_int32 interpol_ind;
	const SKP_int16 *interpol_ptr;
	for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
		/* Integer part gives pointer to buffered input */
		buf_ptr = buf2 + SKP_RSHIFT( index_Q16, 16 );

		/* Fractional part gives interpolation coefficients */
		interpol_ind = SKP_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs );

		/* Inner product */
		interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR / 2 * interpol_ind ];
		res_Q6 = SKP_SMULWB(         buf_ptr[ 0 ], interpol_ptr[ 0 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] );
		interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR / 2 * ( FIR_Fracs - 1 - interpol_ind ) ];
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 0 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 1 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[  9 ], interpol_ptr[ 2 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[  8 ], interpol_ptr[ 3 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[  7 ], interpol_ptr[ 4 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, buf_ptr[  6 ], interpol_ptr[ 5 ] );

		/* Scale down, saturate and store in output array */
		*out++ = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( res_Q6, 6 ) );
	}
	return out;
}
/* Downsample by a factor 2, coarsest */
void SKP_Silk_resample_1_2_coarsest(
    const SKP_int16     *in,                /* I:   16 kHz signal [2*len]   */
    SKP_int32           *S,                 /* I/O: State vector [2]        */
    SKP_int16           *out,               /* O:   8 kHz signal [len]      */
    SKP_int32           *scratch,           /* I:   Scratch memory [3*len]  */
    const SKP_int32     len                 /* I:   Number of OUTPUT samples*/
)
{
    SKP_int32 k, idx;

    /* De-interleave allpass inputs, and convert Q15 -> Q25 */
    for( k = 0; k < len; k++ ) {
        idx = SKP_LSHIFT( k, 1 );
        scratch[ k ]       = SKP_LSHIFT( (SKP_int32)in[ idx     ], 10 );
        scratch[ k + len ] = SKP_LSHIFT( (SKP_int32)in[ idx + 1 ], 10 );
    }

    idx = SKP_LSHIFT( len, 1 );
    /* Allpass filters */
    SKP_Silk_allpass_int( scratch,       S,     A21cst[ 0 ], scratch + idx, len );
    SKP_Silk_allpass_int( scratch + len, S + 1, A20cst[ 0 ], scratch,       len );

    /* Add two allpass outputs */
    for( k = 0; k < len; k++ ) {
        out[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( scratch[ k ] + scratch[ k + idx ], 11 ) );
    }
}
void SKP_Silk_resampler_private_ARMA4(
	SKP_int32					    S[],		    /* I/O: State vector [ 4 ]			    	    */
	SKP_int16					    out[],		    /* O:	Output signal				    	    */
	const SKP_int16				    in[],			/* I:	Input signal				    	    */
	const SKP_int16				    Coef[],		    /* I:	ARMA coefficients [ 7 ]                 */
	SKP_int32				        len				/* I:	Signal length				        	*/
)
{
	SKP_int32 k;
	SKP_int32 in_Q8, out1_Q8, out2_Q8, X;

	for( k = 0; k < len; k++ ) {
        in_Q8  = SKP_LSHIFT32( (SKP_int32)in[ k ], 8 );

        /* Outputs of first and second biquad */
        out1_Q8 = SKP_ADD_LSHIFT32( in_Q8,   S[ 0 ], 2 );
        out2_Q8 = SKP_ADD_LSHIFT32( out1_Q8, S[ 2 ], 2 );

        /* Update states, which are stored in Q6. Coefficients are in Q14 here */
        X      = SKP_SMLAWB( S[ 1 ], in_Q8,   Coef[ 0 ] );
        S[ 0 ] = SKP_SMLAWB( X,      out1_Q8, Coef[ 2 ] );

        X      = SKP_SMLAWB( S[ 3 ], out1_Q8, Coef[ 1 ] );
        S[ 2 ] = SKP_SMLAWB( X,      out2_Q8, Coef[ 4 ] );

        S[ 1 ] = SKP_SMLAWB( SKP_RSHIFT32( in_Q8,   2 ), out1_Q8, Coef[ 3 ] );
        S[ 3 ] = SKP_SMLAWB( SKP_RSHIFT32( out1_Q8, 2 ), out2_Q8, Coef[ 5 ] );

        /* Apply gain and store to output. The coefficient is in Q16 */
        out[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT32( SKP_SMLAWB( 128, out2_Q8, Coef[ 6 ] ), 8 ) );
	}
}
/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */
void SKP_Silk_resampler_private_IIR_FIR(
	void	                        *SS,		    /* I/O: Resampler state 						*/
	SKP_int16						out[],		    /* O:	Output signal 							*/
	const SKP_int16					in[],		    /* I:	Input signal							*/
	SKP_int32					    inLen		    /* I:	Number of input samples					*/
)
{
    SKP_Silk_resampler_state_struct *S = (SKP_Silk_resampler_state_struct *)SS;
	SKP_int32 nSamplesIn, table_index;
	SKP_int32 max_index_Q16, index_Q16, index_increment_Q16, res_Q15;
	SKP_int16 buf[ 2 * RESAMPLER_MAX_BATCH_SIZE_IN + RESAMPLER_ORDER_FIR_144 ];
    SKP_int16 *buf_ptr;

	/* Copy buffered samples to start of buffer */	
	SKP_memcpy( buf, S->sFIR, RESAMPLER_ORDER_FIR_144 * sizeof( SKP_int32 ) );

	/* Iterate over blocks of frameSizeIn input samples */
    index_increment_Q16 = S->invRatio_Q16;
	while( 1 ) {
		nSamplesIn = SKP_min( inLen, S->batchSize );

        if( S->input2x == 1 ) {
		    /* Upsample 2x */
            S->up2_function( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_144 ], in, nSamplesIn );
        } else {
		    /* Fourth-order ARMA filter */
            SKP_Silk_resampler_private_ARMA4( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_144 ], in, S->Coefs, nSamplesIn );
        }

        max_index_Q16 = SKP_LSHIFT32( nSamplesIn, 16 + S->input2x );         /* +1 if 2x upsampling */

		/* Interpolate upsampled signal and store in output array */
	    for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
            table_index = SKP_SMULWB( index_Q16 & 0xFFFF, 144 );
            buf_ptr = &buf[ index_Q16 >> 16 ];
            res_Q15 = SKP_SMULBB(          buf_ptr[ 0 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 0 ] );
            res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 1 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 1 ] );
            res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 2 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 2 ] );
            res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 3 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 2 ] );
            res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 4 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 1 ] );
            res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 5 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 0 ] );
			*out++ = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( res_Q15, 15 ) );
	    }
		in += nSamplesIn;
		inLen -= nSamplesIn;

		if( inLen > 0 ) {
			/* More iterations to do; copy last part of filtered signal to beginning of buffer */
			SKP_memcpy( buf, &buf[ nSamplesIn << S->input2x ], RESAMPLER_ORDER_FIR_144 * sizeof( SKP_int32 ) );
		} else {
			break;
		}
	}

	/* Copy last part of filtered signal to the state for the next call */
	SKP_memcpy( S->sFIR, &buf[nSamplesIn << S->input2x ], RESAMPLER_ORDER_FIR_144 * sizeof( SKP_int32 ) );
}
Exemple #13
0
/* Inverse filter of SKP_Silk_LPC_synthesis_filter */
void SKP_Silk_LPC_analysis_filter(
    const SKP_int16      *in,            /* I:   Input signal                                */
    const SKP_int16      *B,             /* I:   MA prediction coefficients, Q12 [order]     */
    SKP_int16            *S,             /* I/O: State vector [order]                        */
    SKP_int16            *out,           /* O:   Output signal                               */
    const SKP_int32      len,            /* I:   Signal length                               */
    const SKP_int32      Order           /* I:   Filter order                                */
)
{
    SKP_int   k, j, idx, Order_half = SKP_RSHIFT( Order, 1 );
    SKP_int32 Btmp, B_align_Q12[ SigProc_MAX_ORDER_LPC >> 1 ], out32_Q12, out32;
    SKP_int16 SA, SB;
    /* Order must be even */
    SKP_assert( 2 * Order_half == Order );

    /* Combine two A_Q12 values and ensure 32-bit alignment */
    for( k = 0; k < Order_half; k++ ) {
        idx = SKP_SMULBB( 2, k );
        B_align_Q12[ k ] = ( ( (SKP_int32)B[ idx ] ) & 0x0000ffff ) | SKP_LSHIFT( (SKP_int32)B[ idx + 1 ], 16 );
    }

    /* S[] values are in Q0 */
    for( k = 0; k < len; k++ ) {
        SA = S[ 0 ];
        out32_Q12 = 0;
        for( j = 0; j < ( Order_half - 1 ); j++ ) {
            idx = SKP_SMULBB( 2, j ) + 1;
            /* Multiply-add two prediction coefficients for each loop */
            Btmp = B_align_Q12[ j ];
            SB = S[ idx ];
            S[ idx ] = SA;
            out32_Q12 = SKP_SMLABB( out32_Q12, SA, Btmp );
            out32_Q12 = SKP_SMLABT( out32_Q12, SB, Btmp );
            SA = S[ idx + 1 ];
            S[ idx + 1 ] = SB;
        }

        /* Unrolled loop: epilog */
        Btmp = B_align_Q12[ Order_half - 1 ];
        SB = S[ Order - 1 ];
        S[ Order - 1 ] = SA;
        out32_Q12 = SKP_SMLABB( out32_Q12, SA, Btmp );
        out32_Q12 = SKP_SMLABT( out32_Q12, SB, Btmp );

        /* Subtract prediction */
        out32_Q12 = SKP_SUB_SAT32( SKP_LSHIFT( (SKP_int32)in[ k ], 12 ), out32_Q12 );

        /* Scale to Q0 */
        out32 = SKP_RSHIFT_ROUND( out32_Q12, 12 );

        /* Saturate output */
        out[ k ] = (SKP_int16)SKP_SAT16( out32 );

        /* Move input line */
        S[ 0 ] = in[ k ];
    }
}
void SKP_Silk_LTP_analysis_filter_FIX(
    SKP_int16       *LTP_res,                           /* O:   LTP residual signal of length NB_SUBFR * ( pre_length + subfr_length )  */
    const SKP_int16 *x,                                 /* I:   Pointer to input signal with at least max( pitchL ) preceeding samples  */
    const SKP_int16 LTPCoef_Q14[ LTP_ORDER * NB_SUBFR ],/* I:   LTP_ORDER LTP coefficients for each NB_SUBFR subframe                   */
    const SKP_int   pitchL[ NB_SUBFR ],                 /* I:   Pitch lag, one for each subframe                                        */
    const SKP_int32 invGains_Qxx[ NB_SUBFR ],           /* I:   Inverse quantization gains, one for each subframe                       */
    const SKP_int   Qxx,                                /* I:   Inverse quantization gains Q domain                                     */
    const SKP_int   subfr_length,                       /* I:   Length of each subframe                                                 */
    const SKP_int   pre_length                          /* I:   Length of the preceeding samples starting at &x[0] for each subframe    */
)
{
    const SKP_int16 *x_ptr, *x_lag_ptr;
    SKP_int16   Btmp_Q14[ LTP_ORDER ];
    SKP_int16   *LTP_res_ptr;
    SKP_int     k, i, j;
    SKP_int32   LTP_est;

    x_ptr = x;
    LTP_res_ptr = LTP_res;
    for( k = 0; k < NB_SUBFR; k++ ) {

        x_lag_ptr = x_ptr - pitchL[ k ];
        for( i = 0; i < LTP_ORDER; i++ ) {
            Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ];
        }

        /* LTP analysis FIR filter */
        for( i = 0; i < subfr_length + pre_length; i++ ) {
            LTP_res_ptr[ i ] = x_ptr[ i ];
            
            /* Long-term prediction */
            LTP_est = SKP_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] );
            for( j = 1; j < LTP_ORDER; j++ ) {
                LTP_est = SKP_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] );
            }
            LTP_est = SKP_RSHIFT_ROUND( LTP_est, 14 ); // round and -> Q0

            /* Subtract long-term prediction */
            LTP_res_ptr[ i ] = ( SKP_int16 )SKP_SAT16( ( SKP_int32 )x_ptr[ i ] - LTP_est );

            /* Scale residual */
            if( Qxx == 16 ) {
                LTP_res_ptr[ i ] = SKP_SMULWB( invGains_Qxx[ k ], LTP_res_ptr[ i ] );
            } else {
                LTP_res_ptr[ i ] = ( SKP_int16 )SKP_CHECK_FIT16( SKP_RSHIFT64( SKP_SMULL( invGains_Qxx[ k ], LTP_res_ptr[ i ] ), Qxx ) );
            }

            x_lag_ptr++;
        }

        /* Update pointers */
        LTP_res_ptr += subfr_length + pre_length; 
        x_ptr       += subfr_length;
    }
}
Exemple #15
0
/* uses SMLAWB(), requiring armv5E and higher.                          */ 
SKP_int32 SKP_Silk_schur(                     /* O:    Returns residual energy                     */
    SKP_int16            *rc_Q15,               /* O:    reflection coefficients [order] Q15         */
    const SKP_int32      *c,                    /* I:    correlations [order+1]                      */
    const SKP_int32      order                  /* I:    prediction order                            */
)
{
    SKP_int        k, n, lz;
    SKP_int32    C[ SKP_Silk_MAX_ORDER_LPC + 1 ][ 2 ];
    SKP_int32    Ctmp1, Ctmp2, rc_tmp_Q15;

    /* Get number of leading zeros */
    lz = SKP_Silk_CLZ32( c[ 0 ] );

    /* Copy correlations and adjust level to Q30 */
    if( lz < 2 ) {
        /* lz must be 1, so shift one to the right */
        for( k = 0; k < order + 1; k++ ) {
            C[ k ][ 0 ] = C[ k ][ 1 ] = SKP_RSHIFT( c[ k ], 1 );
        }
    } else if( lz > 2 ) {
        /* Shift to the left */
        lz -= 2; 
        for( k = 0; k < order + 1; k++ ) {
            C[ k ][ 0 ] = C[ k ][ 1 ] = SKP_LSHIFT( c[k], lz );
        }
    } else {
        /* No need to shift */
        for( k = 0; k < order + 1; k++ ) {
            C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ];
        }
    }

    for( k = 0; k < order; k++ ) {
        
        /* Get reflection coefficient */
        rc_tmp_Q15 = -SKP_DIV32_16( C[ k + 1 ][ 0 ], SKP_max_32( SKP_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) );

        /* Clip (shouldn't happen for properly conditioned inputs) */
        rc_tmp_Q15 = SKP_SAT16( rc_tmp_Q15 );

        /* Store */
        rc_Q15[ k ] = (SKP_int16)rc_tmp_Q15;

        /* Update correlations */
        for( n = 0; n < order - k; n++ ) {
            Ctmp1 = C[ n + k + 1 ][ 0 ];
            Ctmp2 = C[ n ][ 1 ];
            C[ n + k + 1 ][ 0 ] = SKP_SMLAWB( Ctmp1, SKP_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 );
            C[ n ][ 1 ]         = SKP_SMLAWB( Ctmp2, SKP_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 );
        }
    }

    /* return residual energy */
    return C[0][1];
}
/* Upsample by a factor 4, Note: very low quality, only use with output sampling rates above 96 kHz. */
void SKP_Silk_resampler_private_up4(
    SKP_int32                       *S,             /* I/O: State vector [ 2 ]                      */
    SKP_int16                       *out,           /* O:   Output signal [ 4 * len ]               */
    const SKP_int16                 *in,            /* I:   Input signal [ len ]                    */
    SKP_int32                       len             /* I:   Number of INPUT samples                 */
)
{
    SKP_int32 k;
    SKP_int32 in32, out32, Y, X;
    SKP_int16 out16;

    SKP_assert( SKP_Silk_resampler_up2_lq_0 > 0 );
    SKP_assert( SKP_Silk_resampler_up2_lq_1 < 0 );

    /* Internal variables and state are in Q10 format */
    for( k = 0; k < len; k++ ) {
        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ k ], 10 );

        /* All-pass section for even output sample */
        Y      = SKP_SUB32( in32, S[ 0 ] );
        X      = SKP_SMULWB( Y, SKP_Silk_resampler_up2_lq_0 );
        out32  = SKP_ADD32( S[ 0 ], X );
        S[ 0 ] = SKP_ADD32( in32, X );

        /* Convert back to int16 and store to output */
        out16 = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( out32, 10 ) );
        out[ 4 * k ]     = out16;
        out[ 4 * k + 1 ] = out16;

        /* All-pass section for odd output sample */
        Y      = SKP_SUB32( in32, S[ 1 ] );
        X      = SKP_SMLAWB( Y, Y, SKP_Silk_resampler_up2_lq_1 );
        out32  = SKP_ADD32( S[ 1 ], X );
        S[ 1 ] = SKP_ADD32( in32, X );

        /* Convert back to int16 and store to output */
        out16 = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( out32, 10 ) );
        out[ 4 * k + 2 ] = out16;
        out[ 4 * k + 3 ] = out16;
    }
}
/* Helper function, interpolates the filter taps */
SKP_INLINE void silk_LP_interpolate_filter_taps( 
    SKP_int32           B_Q28[ TRANSITION_NB ], 
    SKP_int32           A_Q28[ TRANSITION_NA ],
    const SKP_int       ind,
    const SKP_int32     fac_Q16
)
{
    SKP_int nb, na;

    if( ind < TRANSITION_INT_NUM - 1 ) {
        if( fac_Q16 > 0 ) {
            if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */
                /* Piece-wise linear interpolation of B and A */
                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
                    B_Q28[ nb ] = SKP_SMLAWB(
                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
                        fac_Q16 );
                }
                for( na = 0; na < TRANSITION_NA; na++ ) {
                    A_Q28[ na ] = SKP_SMLAWB(
                        silk_Transition_LP_A_Q28[ ind     ][ na ],
                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
                        silk_Transition_LP_A_Q28[ ind     ][ na ],
                        fac_Q16 );
                }
            } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */
                SKP_assert( fac_Q16 - ( 1 << 16 ) == SKP_SAT16( fac_Q16 - ( 1 << 16 ) ) );
                /* Piece-wise linear interpolation of B and A */
                for( nb = 0; nb < TRANSITION_NB; nb++ ) {
                    B_Q28[ nb ] = SKP_SMLAWB(
                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ],
                        silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] -
                        silk_Transition_LP_B_Q28[ ind     ][ nb ],
                        fac_Q16 - ( 1 << 16 ) );
                }
                for( na = 0; na < TRANSITION_NA; na++ ) {
                    A_Q28[ na ] = SKP_SMLAWB(
                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ],
                        silk_Transition_LP_A_Q28[ ind + 1 ][ na ] -
                        silk_Transition_LP_A_Q28[ ind     ][ na ],
                        fac_Q16 - ( 1 << 16 ) );
                }
            }
        } else {
            SKP_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( SKP_int32 ) );
            SKP_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( SKP_int32 ) );
        }
    } else {
        SKP_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( SKP_int32 ) );
        SKP_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( SKP_int32 ) );
    }
}
/* Downsample by a factor 3, low quality */
void SKP_Silk_resampler_down3(
    SKP_int32                           *S,         /* I/O: State vector [ 8 ]                  */
    SKP_int16                           *out,       /* O:   Output signal [ floor(inLen/3) ]    */
    const SKP_int16                     *in,        /* I:   Input signal [ inLen ]              */
    SKP_int32                           inLen       /* I:   Number of input samples             */
)
{
	SKP_int32 nSamplesIn, counter, res_Q6;
	SKP_int32 buf[ RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR ];
	SKP_int32 *buf_ptr;

	/* Copy buffered samples to start of buffer */
	SKP_memcpy( buf, S, ORDER_FIR * sizeof( SKP_int32 ) );

	/* Iterate over blocks of frameSizeIn input samples */
	while( 1 ) {
		nSamplesIn = SKP_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN );

	    /* Second-order AR filter (output in Q8) */
	    SKP_Silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in,
            SKP_Silk_Resampler_1_3_COEFS_LQ, nSamplesIn );

		/* Interpolate filtered signal */
        buf_ptr = buf;
        counter = nSamplesIn;
        while( counter > 2 ) {
            /* Inner product */
            res_Q6 = SKP_SMULWB(         SKP_ADD32( buf_ptr[ 0 ], buf_ptr[ 5 ] ), SKP_Silk_Resampler_1_3_COEFS_LQ[ 2 ] );
            res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 1 ], buf_ptr[ 4 ] ), SKP_Silk_Resampler_1_3_COEFS_LQ[ 3 ] );
            res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 2 ], buf_ptr[ 3 ] ), SKP_Silk_Resampler_1_3_COEFS_LQ[ 4 ] );

            /* Scale down, saturate and store in output array */
            *out++ = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( res_Q6, 6 ) );

            buf_ptr += 3;
            counter -= 3;
        }

		in += nSamplesIn;
		inLen -= nSamplesIn;

		if( inLen > 0 ) {
			/* More iterations to do; copy last part of filtered signal to beginning of buffer */
			SKP_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( SKP_int32 ) );
		} else {
			break;
		}
	}

	/* Copy last part of filtered signal to the state for the next call */
	SKP_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( SKP_int32 ) );
}
/* Split signal into two decimated bands using first-order allpass filters */
void SKP_Silk_ana_filt_bank_1(
    const SKP_int16      *in,        /* I:   Input signal [N]        */
    SKP_int32            *S,         /* I/O: State vector [2]        */
    SKP_int16            *outL,      /* O:   Low band [N/2]          */
    SKP_int16            *outH,      /* O:   High band [N/2]         */
    SKP_int32            *scratch,   /* I:   Scratch memory [3*N/2]  */   // todo: remove - no longer used
    const SKP_int32      N           /* I:   Number of input samples */
)
{
    SKP_int      k, N2 = SKP_RSHIFT( N, 1 );
    SKP_int32    in32, X, Y, out_1, out_2;

    /* Internal variables and state are in Q10 format */
    for( k = 0; k < N2; k++ ) {
        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ 2 * k ], 10 );

        /* All-pass section for even input sample */
        Y      = SKP_SUB32( in32, S[ 0 ] );
        X      = SKP_SMLAWB( Y, Y, A_fb1_21[ 0 ] );
        out_1  = SKP_ADD32( S[ 0 ], X );
        S[ 0 ] = SKP_ADD32( in32, X );

        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ 2 * k + 1 ], 10 );

        /* All-pass section for odd input sample */
        Y      = SKP_SUB32( in32, S[ 1 ] );
        X      = SKP_SMULWB( Y, A_fb1_20[ 0 ] );
        out_2  = SKP_ADD32( S[ 1 ], X );
        S[ 1 ] = SKP_ADD32( in32, X );

        /* Add/subtract, convert back to int16 and store to output */
        outL[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( SKP_ADD32( out_2, out_1 ), 11 ) );
        outH[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( SKP_SUB32( out_2, out_1 ), 11 ) );
    }
}
/* Upsample by a factor 2, coarser */
void SKP_Silk_resample_2_1_coarse(const int16_t * in,	/* I:   8 kHz signal [len]      */
				  int32_t * S,	/* I/O: State vector [4]        */
				  int16_t * out,	/* O:   16 kHz signal [2*len]   */
				  int32_t * scratch,	/* I:   Scratch memory [3*len]  */
				  const int32_t len	/* I:   Number of INPUT samples */
    )
{
	int32_t k, idx;

	/* Coefficients for coarser 2-fold resampling */
	const int16_t A20c[2] = { 2119, 16663 };
	const int16_t A21c[2] = { 8050, 26861 };

	/* Convert Q15 -> Q25 */
	for (k = 0; k < len; k++) {
		scratch[k] = SKP_LSHIFT((int32_t) in[k], 10);
	}

	idx = SKP_LSHIFT(len, 1);

	/* Allpass filters */
	SKP_Silk_allpass_int(scratch, S, A20c[0], scratch + idx, len);
	SKP_Silk_allpass_int(scratch + idx, S + 1, A20c[1], scratch + len, len);

	SKP_Silk_allpass_int(scratch, S + 2, A21c[0], scratch + idx, len);
	SKP_Silk_allpass_int(scratch + idx, S + 3, A21c[1], scratch, len);

	/* Interleave two allpass outputs */
	for (k = 0; k < len; k++) {
		idx = SKP_LSHIFT(k, 1);
		out[idx] =
		    (int16_t) SKP_SAT16(SKP_RSHIFT_ROUND(scratch[k + len], 10));
		out[idx + 1] =
		    (int16_t) SKP_SAT16(SKP_RSHIFT_ROUND(scratch[k], 10));
	}
}
/* even order AR filter */
void SKP_Silk_LPC_synthesis_filter(
    const SKP_int16 *in,        /* I:   excitation signal */
    const SKP_int16 *A_Q12,     /* I:   AR coefficients [Order], between -8_Q0 and 8_Q0 */
    const SKP_int32 Gain_Q26,   /* I:   gain */
    SKP_int32 *S,               /* I/O: state vector [Order] */
    SKP_int16 *out,             /* O:   output signal */
    const SKP_int32 len,        /* I:   signal length */
    const SKP_int Order         /* I:   filter order, must be even */
)
{
    SKP_int   k, j, idx, Order_half = SKP_RSHIFT( Order, 1 );
    SKP_int32 SA, SB, out32_Q10, out32;

    /* Order must be even */
    SKP_assert( 2 * Order_half == Order );

    /* S[] values are in Q14 */
    for( k = 0; k < len; k++ ) {
        SA = S[ Order - 1 ];
        out32_Q10 = 0;
        for( j = 0; j < ( Order_half - 1 ); j++ ) {
            idx = SKP_SMULBB( 2, j ) + 1;
            SB = S[ Order - 1 - idx ];
            S[ Order - 1 - idx ] = SA;
            out32_Q10 = SKP_SMLAWB( out32_Q10, SA, A_Q12[ ( j << 1 ) ] );
            out32_Q10 = SKP_SMLAWB( out32_Q10, SB, A_Q12[ ( j << 1 ) + 1 ] );
            SA = S[ Order - 2 - idx ];
            S[ Order - 2 - idx ] = SB;
        }

        /* unrolled loop: epilog */
        SB = S[ 0 ];
        S[ 0 ] = SA;
        out32_Q10 = SKP_SMLAWB( out32_Q10, SA, A_Q12[ Order - 2 ] );
        out32_Q10 = SKP_SMLAWB( out32_Q10, SB, A_Q12[ Order - 1 ] );
        /* apply gain to excitation signal and add to prediction */
        out32_Q10 = SKP_ADD_SAT32( out32_Q10, SKP_SMULWB( Gain_Q26, in[ k ] ) );

        /* scale to Q0 */
        out32 = SKP_RSHIFT_ROUND( out32_Q10, 10 );

        /* saturate output */
        out[ k ] = ( SKP_int16 )SKP_SAT16( out32 );

        /* move result into delay line */
        S[ Order - 1 ] = SKP_LSHIFT_SAT32( out32_Q10, 4 );
    }
}
SKP_INLINE SKP_int16 *SKP_Silk_resampler_private_IIR_FIR_INTERPOL( 
			SKP_int16 * out, SKP_int16 * buf, SKP_int32 max_index_Q16 , SKP_int32 index_increment_Q16 ){
	SKP_int32 index_Q16, res_Q15;
	SKP_int16 *buf_ptr;
	SKP_int32 table_index;
	/* Interpolate upsampled signal and store in output array */
	for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
        table_index = SKP_SMULWB( index_Q16 & 0xFFFF, 144 );
        buf_ptr = &buf[ index_Q16 >> 16 ];
            
        res_Q15 = SKP_SMULBB(          buf_ptr[ 0 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 0 ] );
        res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 1 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 1 ] );
        res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 2 ], SKP_Silk_resampler_frac_FIR_144[       table_index ][ 2 ] );
        res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 3 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 2 ] );
        res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 4 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 1 ] );
        res_Q15 = SKP_SMLABB( res_Q15, buf_ptr[ 5 ], SKP_Silk_resampler_frac_FIR_144[ 143 - table_index ][ 0 ] );          
		*out++ = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( res_Q15, 15 ) );
	}
	return out;	
}
void SKP_Silk_warped_LPC_analysis_filter_FIX(
          SKP_int32                 state[],            /* I/O  State [order + 1]                       */
          SKP_int16                 res[],              /* O    Residual signal [length]                */
    const SKP_int16                 coef_Q13[],         /* I    Coefficients [order]                    */
    const SKP_int16                 input[],            /* I    Input signal [length]                   */
    const SKP_int16                 lambda_Q16,         /* I    Warping factor                          */
    const SKP_int                   length,             /* I    Length of input signal                  */
    const SKP_int                   order               /* I    Filter order (even)                     */
)
{
    SKP_int     n, i;
    SKP_int32   acc_Q11, tmp1, tmp2;

    /* Order must be even */
    SKP_assert( ( order & 1 ) == 0 );

    for( n = 0; n < length; n++ ) {
        /* Output of lowpass section */  
        tmp2 = SKP_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );
        state[ 0 ] = SKP_LSHIFT( input[ n ], 14 );
        /* Output of allpass section */
        tmp1 = SKP_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );
        state[ 1 ] = tmp2;
        acc_Q11 = SKP_SMULWB( tmp2, coef_Q13[ 0 ] );
        /* Loop over allpass sections */
        for( i = 2; i < order; i += 2 ) {
            /* Output of allpass section */
            tmp2 = SKP_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );
            state[ i ] = tmp1;
            acc_Q11 = SKP_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );
            /* Output of allpass section */
            tmp1 = SKP_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );
            state[ i + 1 ] = tmp2;
            acc_Q11 = SKP_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );
        }
        state[ order ] = tmp1;
        acc_Q11 = SKP_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );
        res[ n ] = ( SKP_int16 )SKP_SAT16( ( SKP_int32 )input[ n ] - SKP_RSHIFT_ROUND( acc_Q11, 11 ) );
    }
}
SKP_INLINE SKP_int16 *SKP_Silk_resampler_private_down_FIR_INTERPOL0(
	SKP_int16 *out, SKP_int32 *buf2, const SKP_int16 *FIR_Coefs, SKP_int32 max_index_Q16, SKP_int32 index_increment_Q16){
	
	SKP_int32 index_Q16, res_Q6;
	SKP_int32 *buf_ptr;
	for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) {
		/* Integer part gives pointer to buffered input */
		buf_ptr = buf2 + SKP_RSHIFT( index_Q16, 16 );

		/* Inner product */
		res_Q6 = SKP_SMULWB(         SKP_ADD32( buf_ptr[ 0 ], buf_ptr[ 11 ] ), FIR_Coefs[ 0 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 1 ], buf_ptr[ 10 ] ), FIR_Coefs[ 1 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 2 ], buf_ptr[  9 ] ), FIR_Coefs[ 2 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 3 ], buf_ptr[  8 ] ), FIR_Coefs[ 3 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 4 ], buf_ptr[  7 ] ), FIR_Coefs[ 4 ] );
		res_Q6 = SKP_SMLAWB( res_Q6, SKP_ADD32( buf_ptr[ 5 ], buf_ptr[  6 ] ), FIR_Coefs[ 5 ] );

			    /* Scale down, saturate and store in output array */
		*out++ = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( res_Q6, 6 ) );
	}
	return out;
}
/* Downsample by a factor 2, mediocre quality */
void SKP_Silk_resampler_down2(
    SKP_int32                           *S,         /* I/O: State vector [ 2 ]                  */
    SKP_int16                           *out,       /* O:   Output signal [ len ]               */
    const SKP_int16                     *in,        /* I:   Input signal [ floor(len/2) ]       */
    SKP_int32                           inLen       /* I:   Number of input samples             */
)
{
    SKP_int32 k, len2 = SKP_RSHIFT32( inLen, 1 );
    SKP_int32 in32, out32, Y, X;

    SKP_assert( SKP_Silk_resampler_down2_0 > 0 );
    SKP_assert( SKP_Silk_resampler_down2_1 < 0 );

    /* Internal variables and state are in Q10 format */
    for( k = 0; k < len2; k++ ) {
        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ 2 * k ], 10 );

        /* All-pass section for even input sample */
        Y      = SKP_SUB32( in32, S[ 0 ] );
        X      = SKP_SMLAWB( Y, Y, SKP_Silk_resampler_down2_1 );
        out32  = SKP_ADD32( S[ 0 ], X );
        S[ 0 ] = SKP_ADD32( in32, X );

        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ 2 * k + 1 ], 10 );

        /* All-pass section for odd input sample, and add to output of previous section */
        Y      = SKP_SUB32( in32, S[ 1 ] );
        X      = SKP_SMULWB( Y, SKP_Silk_resampler_down2_0 );
        out32  = SKP_ADD32( out32, S[ 1 ] );
        out32  = SKP_ADD32( out32, X );
        S[ 1 ] = SKP_ADD32( in32, X );

        /* Add, convert back to int16 and store to output */
        out[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT_ROUND( out32, 11 ) );
    }
}
/* Predict number of bytes used to encode q */
int SKP_Silk_pulses_to_bytes(	/* O  Return value, predicted number of bytes used to encode q */
				    SKP_Silk_encoder_state * psEncC,	/* I/O  Encoder State */
				    int q[]	/* I    Pulse signal  */
    )
{
	int i, j, iter, *q_ptr;
	int32_t sum_abs_val, nBytes, acc_nBytes;
	/* Take the absolute value of the pulses */
	iter = psEncC->frame_length / SHELL_CODEC_FRAME_LENGTH;

	/* Calculate rate as a nonlinaer mapping of sum abs value of each Shell block */
	q_ptr = q;
	acc_nBytes = 0;
	for (j = 0; j < iter; j++) {
		sum_abs_val = 0;
		for (i = 0; i < SHELL_CODEC_FRAME_LENGTH; i += 4) {
			sum_abs_val += SKP_abs(q_ptr[i + 0]);
			sum_abs_val += SKP_abs(q_ptr[i + 1]);
			sum_abs_val += SKP_abs(q_ptr[i + 2]);
			sum_abs_val += SKP_abs(q_ptr[i + 3]);
		}
		/* Calculate nBytes used for thi sshell frame */
		nBytes = SKP_SMULWB(SKP_SMULBB(sum_abs_val, sum_abs_val), POLY_FIT_2_Q20);	// Q4
		nBytes = SKP_LSHIFT_SAT32(nBytes, 11);	// Q15
		nBytes += SKP_SMULBB(sum_abs_val, POLY_FIT_1_Q15);	// Q15
		nBytes += POLY_FIT_0_Q15;	// Q15

		acc_nBytes += nBytes;

		q_ptr += SHELL_CODEC_FRAME_LENGTH;	/* update pointer */
	}

	acc_nBytes = SKP_RSHIFT_ROUND(acc_nBytes, 15);	// Q0
	acc_nBytes = SKP_SAT16(acc_nBytes);	// just to be sure                            // Q0

	return ((int)acc_nBytes);
}
/* Second order ARMA filter, alternative implementation */
void SKP_Silk_biquad_alt(
    const SKP_int16      *in,            /* I:    Input signal                   */
    const SKP_int32      *B_Q28,         /* I:    MA coefficients [3]            */
    const SKP_int32      *A_Q28,         /* I:    AR coefficients [2]            */
    SKP_int32            *S,             /* I/O: State vector [2]                */
    SKP_int16            *out,           /* O:    Output signal                  */
    const SKP_int32      len             /* I:    Signal length (must be even)   */
)
{
    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
    SKP_int   k;
    SKP_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14;

    /* Negate A_Q28 values and split in two parts */
    A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF;        /* lower part */
    A0_U_Q28 = SKP_RSHIFT( -A_Q28[ 0 ], 14 );       /* upper part */
    A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF;        /* lower part */
    A1_U_Q28 = SKP_RSHIFT( -A_Q28[ 1 ], 14 );       /* upper part */
    
    for( k = 0; k < len; k++ ) {
        /* S[ 0 ], S[ 1 ]: Q12 */
        inval = in[ k ];
        out32_Q14 = SKP_LSHIFT( SKP_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 );

        S[ 0 ] = S[1] + SKP_RSHIFT_ROUND( SKP_SMULWB( out32_Q14, A0_L_Q28 ), 14 );
        S[ 0 ] = SKP_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 );
        S[ 0 ] = SKP_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval);

        S[ 1 ] = SKP_RSHIFT_ROUND( SKP_SMULWB( out32_Q14, A1_L_Q28 ), 14 );
        S[ 1 ] = SKP_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 );
        S[ 1 ] = SKP_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval );

        /* Scale back to Q0 and saturate */
        out[ k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) );
    }
}
void SKP_Silk_find_pred_coefs_FIX(
    SKP_Silk_encoder_state_FIX      *psEnc,         /* I/O  encoder state                               */
    SKP_Silk_encoder_control_FIX    *psEncCtrl,     /* I/O  encoder control                             */
    const SKP_int16                 res_pitch[],    /* I    Residual from pitch analysis                */
    const SKP_int16                 x[]             /* I    Speech signal                               */
)
{
    SKP_int         i;
    SKP_int32       WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ];
    SKP_int32       invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ];
    SKP_int16       NLSF_Q15[ MAX_LPC_ORDER ];
    const SKP_int16 *x_ptr;
    SKP_int16       *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ];
    SKP_int32       tmp, min_gain_Q16;
    SKP_int         LTP_corrs_rshift[ MAX_NB_SUBFR ];

    /* weighting for weighted least squares */
    min_gain_Q16 = SKP_int32_MAX >> 6;
    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
        min_gain_Q16 = SKP_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] );
    }
    for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
        /* Divide to Q16 */
        SKP_assert( psEncCtrl->Gains_Q16[ i ] > 0 );
        /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */
        invGains_Q16[ i ] = SKP_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 );

        /* Ensure Wght_Q15 a minimum value 1 */
        invGains_Q16[ i ] = SKP_max( invGains_Q16[ i ], 363 ); 
        
        /* Square the inverted gains */
        SKP_assert( invGains_Q16[ i ] == SKP_SAT16( invGains_Q16[ i ] ) );
        tmp = SKP_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] );
        Wght_Q15[ i ] = SKP_RSHIFT( tmp, 1 );

        /* Invert the inverted and normalized gains */
        local_gains[ i ] = SKP_DIV32( ( 1 << 16 ), invGains_Q16[ i ] );
    }

    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {
        /**********/
        /* VOICED */
        /**********/
        SKP_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );

        /* LTP analysis */
        SKP_Silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, 
            res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, 
            psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift );

        /* Quantize LTP gain parameters */
        SKP_Silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, 
            WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr);

        /* Control LTP scaling */
        SKP_Silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl );

        /* Create LTP residual */
        SKP_Silk_LTP_analysis_filter_FIX( LPC_in_pre, psEnc->x_buf + psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder, 
            psEncCtrl->LTPCoef_Q14, psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );

    } else {
        /************/
        /* UNVOICED */
        /************/
        /* Create signal with prepended subframes, scaled by inverse gains */
        x_ptr     = x - psEnc->sCmn.predictLPCOrder;
        x_pre_ptr = LPC_in_pre;
        for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
            SKP_Silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], 
                psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder );
            x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder;
            x_ptr     += psEnc->sCmn.subfr_length;
        }

        SKP_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( SKP_int16 ) );
        psEncCtrl->LTPredCodGain_Q7 = 0;
    }

    /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
    TIC(FIND_LPC)
    SKP_Silk_find_LPC_FIX( NLSF_Q15, &psEnc->sCmn.indices.NLSFInterpCoef_Q2, psEnc->sCmn.prev_NLSFq_Q15, 
        psEnc->sCmn.useInterpolatedNLSFs, psEnc->sCmn.first_frame_after_reset, psEnc->sCmn.predictLPCOrder, 
        LPC_in_pre, psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder, psEnc->sCmn.nb_subfr );
    TOC(FIND_LPC)

    /* Quantize LSFs */
    TIC(PROCESS_LSFS)
    SKP_Silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
    TOC(PROCESS_LSFS)

    /* Calculate residual energy using quantized LPC coefficients */
    SKP_Silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains,
        psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder );

    /* Copy to prediction struct for use in next frame for fluctuation reduction */
    SKP_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
}
void SKP_Silk_resampler_private_up2_HQ(
	SKP_int32	                    *S,			    /* I/O: Resampler state [ 6 ]					*/
    SKP_int16                       *out,           /* O:   Output signal [ 2 * len ]               */
    const SKP_int16                 *in,            /* I:   Input signal [ len ]                    */
    SKP_int32                       len             /* I:   Number of INPUT samples                 */
)
{
    SKP_int32 k;
    SKP_int32 in32, out32_1, out32_2, Y, X;

    SKP_assert( SKP_Silk_resampler_up2_hq_0[ 0 ] > 0 );
    SKP_assert( SKP_Silk_resampler_up2_hq_0[ 1 ] < 0 );
    SKP_assert( SKP_Silk_resampler_up2_hq_1[ 0 ] > 0 );
    SKP_assert( SKP_Silk_resampler_up2_hq_1[ 1 ] < 0 );
    
    /* Internal variables and state are in Q10 format */
    for( k = 0; k < len; k++ ) {
        /* Convert to Q10 */
        in32 = SKP_LSHIFT( (SKP_int32)in[ k ], 10 );

        /* First all-pass section for even output sample */
        Y       = SKP_SUB32( in32, S[ 0 ] );
        X       = SKP_SMULWB( Y, SKP_Silk_resampler_up2_hq_0[ 0 ] );
        out32_1 = SKP_ADD32( S[ 0 ], X );
        S[ 0 ]  = SKP_ADD32( in32, X );

        /* Second all-pass section for even output sample */
        Y       = SKP_SUB32( out32_1, S[ 1 ] );
        X       = SKP_SMLAWB( Y, Y, SKP_Silk_resampler_up2_hq_0[ 1 ] );
        out32_2 = SKP_ADD32( S[ 1 ], X );
        S[ 1 ]  = SKP_ADD32( out32_1, X );

        /* Biquad notch filter */
        out32_2 = SKP_SMLAWB( out32_2, S[ 5 ], SKP_Silk_resampler_up2_hq_notch[ 2 ] );
        out32_2 = SKP_SMLAWB( out32_2, S[ 4 ], SKP_Silk_resampler_up2_hq_notch[ 1 ] );
        out32_1 = SKP_SMLAWB( out32_2, S[ 4 ], SKP_Silk_resampler_up2_hq_notch[ 0 ] );
        S[ 5 ]  = SKP_SUB32(  out32_2, S[ 5 ] );
        
        /* Apply gain in Q15, convert back to int16 and store to output */
        out[ 2 * k ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT32( 
            SKP_SMLAWB( 256, out32_1, SKP_Silk_resampler_up2_hq_notch[ 3 ] ), 9 ) );

        /* First all-pass section for odd output sample */
        Y       = SKP_SUB32( in32, S[ 2 ] );
        X       = SKP_SMULWB( Y, SKP_Silk_resampler_up2_hq_1[ 0 ] );
        out32_1 = SKP_ADD32( S[ 2 ], X );
        S[ 2 ]  = SKP_ADD32( in32, X );

        /* Second all-pass section for odd output sample */
        Y       = SKP_SUB32( out32_1, S[ 3 ] );
        X       = SKP_SMLAWB( Y, Y, SKP_Silk_resampler_up2_hq_1[ 1 ] );
        out32_2 = SKP_ADD32( S[ 3 ], X );
        S[ 3 ]  = SKP_ADD32( out32_1, X );

        /* Biquad notch filter */
        out32_2 = SKP_SMLAWB( out32_2, S[ 4 ], SKP_Silk_resampler_up2_hq_notch[ 2 ] );
        out32_2 = SKP_SMLAWB( out32_2, S[ 5 ], SKP_Silk_resampler_up2_hq_notch[ 1 ] );
        out32_1 = SKP_SMLAWB( out32_2, S[ 5 ], SKP_Silk_resampler_up2_hq_notch[ 0 ] );
        S[ 4 ]  = SKP_SUB32(  out32_2, S[ 4 ] );
        
        /* Apply gain in Q15, convert back to int16 and store to output */
        out[ 2 * k + 1 ] = (SKP_int16)SKP_SAT16( SKP_RSHIFT32( 
            SKP_SMLAWB( 256, out32_1, SKP_Silk_resampler_up2_hq_notch[ 3 ] ), 9 ) );
    }
}
/* 16th order AR filter */
void SKP_Silk_LPC_synthesis_order16(const int16_t * in,	/* I:   excitation signal */
				    const int16_t * A_Q12,	/* I:   AR coefficients [16], between -8_Q0 and 8_Q0 */
				    const int32_t Gain_Q26,	/* I:   gain */
				    int32_t * S,	/* I/O: state vector [16] */
				    int16_t * out,	/* O:   output signal */
				    const int32_t len	/* I:   signal length, must be multiple of 16 */
    )
{
	int k;
	int32_t SA, SB, Atmp, A_align_Q12[8], out32_Q10, out32;

	/* combine two A_Q12 values and ensure 32-bit alignment */
	for (k = 0; k < 8; k++) {
		A_align_Q12[k] =
		    (((int32_t) A_Q12[2 * k]) & 0x0000ffff) |
		    SKP_LSHIFT((int32_t) A_Q12[2 * k + 1], 16);
	}

	/* S[] values are in Q14 */
	/* NOTE: the code below loads two int16 values in an int32, and multiplies each using the   */
	/* SMLAWB and SMLAWT instructions. On a big-endian CPU the two int16 variables would be     */
	/* loaded in reverse order and the code will give the wrong result. In that case swapping   */
	/* the SMLAWB and SMLAWT instructions should solve the problem.                             */
	for (k = 0; k < len; k++) {
		/* unrolled loop: prolog */
		/* multiply-add two prediction coefficients per iteration */
		SA = S[15];
		Atmp = A_align_Q12[0];
		SB = S[14];
		S[14] = SA;
		out32_Q10 = SKP_SMULWB(SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[13];
		S[13] = SB;

		/* unrolled loop: main loop */
		Atmp = A_align_Q12[1];
		SB = S[12];
		S[12] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[11];
		S[11] = SB;

		Atmp = A_align_Q12[2];
		SB = S[10];
		S[10] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[9];
		S[9] = SB;

		Atmp = A_align_Q12[3];
		SB = S[8];
		S[8] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[7];
		S[7] = SB;

		Atmp = A_align_Q12[4];
		SB = S[6];
		S[6] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[5];
		S[5] = SB;

		Atmp = A_align_Q12[5];
		SB = S[4];
		S[4] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[3];
		S[3] = SB;

		Atmp = A_align_Q12[6];
		SB = S[2];
		S[2] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);
		SA = S[1];
		S[1] = SB;

		/* unrolled loop: epilog */
		Atmp = A_align_Q12[7];
		SB = S[0];
		S[0] = SA;
		out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp);
		out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp);

		/* unrolled loop: end */
		/* apply gain to excitation signal and add to prediction */
		out32_Q10 =
		    SKP_ADD_SAT32(out32_Q10, SKP_SMULWB(Gain_Q26, in[k]));

		/* scale to Q0 */
		out32 = SKP_RSHIFT_ROUND(out32_Q10, 10);

		/* saturate output */
		out[k] = (int16_t) SKP_SAT16(out32);

		/* move result into delay line */
		S[15] = SKP_LSHIFT_SAT32(out32_Q10, 4);
	}
}