/* 16th order AR filter */ void SKP_Silk_LPC_synthesis_order16(const int16_t * in, /* I: excitation signal */ const int16_t * A_Q12, /* I: AR coefficients [16], between -8_Q0 and 8_Q0 */ const int32_t Gain_Q26, /* I: gain */ int32_t * S, /* I/O: state vector [16] */ int16_t * out, /* O: output signal */ const int32_t len /* I: signal length, must be multiple of 16 */ ) { int k; int32_t SA, SB, Atmp, A_align_Q12[8], out32_Q10, out32; /* combine two A_Q12 values and ensure 32-bit alignment */ for (k = 0; k < 8; k++) { A_align_Q12[k] = (((int32_t) A_Q12[2 * k]) & 0x0000ffff) | SKP_LSHIFT((int32_t) A_Q12[2 * k + 1], 16); } /* S[] values are in Q14 */ /* NOTE: the code below loads two int16 values in an int32, and multiplies each using the */ /* SMLAWB and SMLAWT instructions. On a big-endian CPU the two int16 variables would be */ /* loaded in reverse order and the code will give the wrong result. In that case swapping */ /* the SMLAWB and SMLAWT instructions should solve the problem. */ for (k = 0; k < len; k++) { /* unrolled loop: prolog */ /* multiply-add two prediction coefficients per iteration */ SA = S[15]; Atmp = A_align_Q12[0]; SB = S[14]; S[14] = SA; out32_Q10 = SKP_SMULWB(SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[13]; S[13] = SB; /* unrolled loop: main loop */ Atmp = A_align_Q12[1]; SB = S[12]; S[12] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[11]; S[11] = SB; Atmp = A_align_Q12[2]; SB = S[10]; S[10] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[9]; S[9] = SB; Atmp = A_align_Q12[3]; SB = S[8]; S[8] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[7]; S[7] = SB; Atmp = A_align_Q12[4]; SB = S[6]; S[6] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[5]; S[5] = SB; Atmp = A_align_Q12[5]; SB = S[4]; S[4] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[3]; S[3] = SB; Atmp = A_align_Q12[6]; SB = S[2]; S[2] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); SA = S[1]; S[1] = SB; /* unrolled loop: epilog */ Atmp = A_align_Q12[7]; SB = S[0]; S[0] = SA; out32_Q10 = SKP_SMLAWB_ovflw(out32_Q10, SA, Atmp); out32_Q10 = SKP_SMLAWT_ovflw(out32_Q10, SB, Atmp); /* unrolled loop: end */ /* apply gain to excitation signal and add to prediction */ out32_Q10 = SKP_ADD_SAT32(out32_Q10, SKP_SMULWB(Gain_Q26, in[k])); /* scale to Q0 */ out32 = SKP_RSHIFT_ROUND(out32_Q10, 10); /* saturate output */ out[k] = (int16_t) SKP_SAT16(out32); /* move result into delay line */ S[15] = SKP_LSHIFT_SAT32(out32_Q10, 4); } }
/* 16th order AR filter */ void SKP_Silk_LPC_synthesis_order16(const SKP_int16 *in, /* I: excitation signal */ const SKP_int16 *A_Q12, /* I: AR coefficients [16], between -8_Q0 and 8_Q0 */ const SKP_int32 Gain_Q26, /* I: gain */ SKP_int32 *S, /* I/O: state vector [16] */ SKP_int16 *out, /* O: output signal */ const SKP_int32 len /* I: signal length, must be multiple of 16 */ ) { SKP_int k; SKP_int32 SA, SB, out32_Q10, out32; for( k = 0; k < len; k++ ) { /* unrolled loop: prolog */ /* multiply-add two prediction coefficients per iteration */ SA = S[ 15 ]; SB = S[ 14 ]; S[ 14 ] = SA; out32_Q10 = SKP_SMULWB( SA, A_Q12[ 0 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 1 ] ); SA = S[ 13 ]; S[ 13 ] = SB; /* unrolled loop: main loop */ SB = S[ 12 ]; S[ 12 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 2 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 3 ] ); SA = S[ 11 ]; S[ 11 ] = SB; SB = S[ 10 ]; S[ 10 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 4 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 5 ] ); SA = S[ 9 ]; S[ 9 ] = SB; SB = S[ 8 ]; S[ 8 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 6 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 7 ] ); SA = S[ 7 ]; S[ 7 ] = SB; SB = S[ 6 ]; S[ 6 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 8 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 9 ] ); SA = S[ 5 ]; S[ 5 ] = SB; SB = S[ 4 ]; S[ 4 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 10 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 11 ] ); SA = S[ 3 ]; S[ 3 ] = SB; SB = S[ 2 ]; S[ 2 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 12 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 13 ] ); SA = S[ 1 ]; S[ 1 ] = SB; /* unrolled loop: epilog */ SB = S[ 0 ]; S[ 0 ] = SA; out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SA, A_Q12[ 14 ] ); out32_Q10 = SKP_SMLAWB_ovflw( out32_Q10, SB, A_Q12[ 15 ] ); /* unrolled loop: end */ /* apply gain to excitation signal and add to prediction */ out32_Q10 = SKP_ADD_SAT32( out32_Q10, SKP_SMULWB( Gain_Q26, in[ k ] ) ); /* scale to Q0 */ out32 = SKP_RSHIFT_ROUND( out32_Q10, 10 ); /* saturate output */ out[ k ] = ( SKP_int16 )SKP_SAT16( out32 ); /* move result into delay line */ S[ 15 ] = SKP_LSHIFT_SAT32( out32_Q10, 4 ); } }