C++ (Cpp) MULT16_32_Q14 Beispiele

Beispiel #1

0

Datei anzeigen

Datei: filters.c Projekt: BackupTheBerlios/semsivr

/* FIXME: These functions are ugly and probably introduce too much error */
void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
{
   int i;
   for (i=0;i<len;i++)
   {
      y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7);
   }
}

Beispiel #2

0

Datei anzeigen

Datei: lsp.c Projekt: BackupTheBerlios/semsivr

void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)
/*  float *freq 	array of LSP frequencies in the x domain	*/
/*  float *ak 		array of LPC coefficients 			*/
/*  int lpcrdr  	order of LPC coefficients 			*/


{
    int i,j;
    spx_word32_t xout1,xout2,xin1,xin2;
    spx_word32_t *Wp;
    spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL;
    spx_word16_t *freqn;
    int m = lpcrdr>>1;
    
    freqn = PUSH(stack, lpcrdr, spx_word16_t);
    for (i=0;i<lpcrdr;i++)
       freqn[i] = ANGLE2X(freq[i]);

    Wp = PUSH(stack, 4*m+2, spx_word32_t);
    pw = Wp;


    /* initialise contents of array */

    for(i=0;i<=4*m+1;i++){       	/* set contents of buffer to 0 */
	*pw++ = 0;
    }

    /* Set pointers up */

    pw = Wp;
    xin1 = 1048576;
    xin2 = 1048576;

    /* reconstruct P(z) and Q(z) by  cascading second order
      polynomials in form 1 - 2xz(-1) +z(-2), where x is the
      LSP coefficient */

    for(j=0;j<=lpcrdr;j++){
       spx_word16_t *fr=freqn;
	for(i=0;i<m;i++){
	    n1 = pw+(i<<2);
	    n2 = n1 + 1;
	    n3 = n2 + 1;
	    n4 = n3 + 1;
	    xout1 = ADD32(SUB32(xin1, MULT16_32_Q14(*fr,*n1)), *n2);
            fr++;
            xout2 = ADD32(SUB32(xin2, MULT16_32_Q14(*fr,*n3)), *n4);
            fr++;
	    *n2 = *n1;
	    *n4 = *n3;
	    *n1 = xin1;
	    *n3 = xin2;
	    xin1 = xout1;
	    xin2 = xout2;
	}
	xout1 = xin1 + *(n4+1);
	xout2 = xin2 - *(n4+2);
        /* FIXME: perhaps apply bandwidth expansion in case of overflow? */
        if (xout1 + xout2>256*32766)
           ak[j] = 32767;
        else if (xout1 + xout2 < -256*32767)
           ak[j] = -32768;
        else
           ak[j] = PSHR(ADD32(xout1,xout2),8);
	*(n4+1) = xin1;
	*(n4+2) = xin2;

	xin1 = 0;
	xin2 = 0;
    }
}

Beispiel #3

0

Datei anzeigen

Datei: lsp.c Projekt: bahadir89/freertos-networked-arm-cortex-m3

void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)
/*  float *freq     array of LSP frequencies in the x domain    */
/*  float *ak         array of LPC coefficients             */
/*  int lpcrdr      order of LPC coefficients             */
{
    int i,j;
    spx_word32_t xout1,xout2,xin;
    spx_word32_t mult, a;
    VARDECL(spx_word32_t *xpmem);
    VARDECL(spx_word32_t *xqmem);
#ifndef FIXED_LPC_SIZE
    VARDECL(spx_word16_t *freqn);
    VARDECL(spx_word32_t **xp);
    VARDECL(spx_word32_t **xq);
#else
    spx_word16_t freqn[FIXED_LPC_SIZE];
    spx_word32_t *xp[(FIXED_LPC_SIZE/2)+1];
    spx_word32_t *xq[(FIXED_LPC_SIZE/2)+1];
#endif

    int m = lpcrdr>>1;

    /*

       Reconstruct P(z) and Q(z) by cascading second order polynomials
       in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency.
       In the time domain this is:

       y(n) = x(n) - 2cos(w)x(n-1) + x(n-2)

       This is what the ALLOCS below are trying to do:

         int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP
         int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP

       These matrices store the output of each stage on each row.  The
       final (m-th) row has the output of the final (m-th) cascaded
       2nd order filter.  The first row is the impulse input to the
       system (not written as it is known).

       The version below takes advantage of the fact that a lot of the
       outputs are zero or known, for example if we put an inpulse
       into the first section the "clock" it 10 times only the first 3
       outputs samples are non-zero (it's an FIR filter).
    */

#ifndef FIXED_LPC_SIZE
    ALLOC(xp, (m+1), spx_word32_t*);
#endif
    ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t);

#ifndef FIXED_LPC_SIZE
    ALLOC(xq, (m+1), spx_word32_t*);
#endif
    ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t);

#ifndef FIXED_LPC_SIZE
    for(i=0; i<=m; i++) {
      xp[i] = xpmem + i*(lpcrdr+1+2);
      xq[i] = xqmem + i*(lpcrdr+1+2);
    }
#else
    for(i=0; i<=m; i++) {
      xp[i] = xpmem + i*(FIXED_LPC_SIZE+1+2);
      xq[i] = xqmem + i*(FIXED_LPC_SIZE+1+2);
    }
#endif
    /* work out 2cos terms in Q14 */

#ifndef FIXED_LPC_SIZE
    ALLOC(freqn, lpcrdr, spx_word16_t);
    for (i=0;i<lpcrdr;i++)
       freqn[i] = ANGLE2X(freq[i]);
#else
    for (i=0;i<FIXED_LPC_SIZE;i++)
       freqn[i] = ANGLE2X(freq[i]);
#endif

    #define QIMP  21   /* scaling for impulse */

    xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */

    /* first col and last non-zero values of each row are trivial */

    for(i=0;i<=m;i++) {
     xp[i][1] = 0;
     xp[i][2] = xin;
     xp[i][2+2*i] = xin;
     xq[i][1] = 0;
     xq[i][2] = xin;
     xq[i][2+2*i] = xin;
    }

    /* 2nd row (first output row) is trivial */

    xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]);
    xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]);

    xout1 = xout2 = 0;

    /* now generate remaining rows */

    for(i=1;i<m;i++) {

      for(j=1;j<2*(i+1)-1;j++) {
    mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
    xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]);
    mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
    xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]);
      }

      /* for last col xp[i][j+2] = xq[i][j+2] = 0 */

      mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
      xp[i+1][j+2] = SUB32(xp[i][j], mult);
      mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
      xq[i+1][j+2] = SUB32(xq[i][j], mult);
    }

    /* process last row to extra a{k} */

#ifndef FIXED_LPC_SIZE
    for(j=1;j<=lpcrdr;j++) {
#else
    for(j=1;j<=FIXED_LPC_SIZE;j++) {
#endif
      int shift = QIMP-13;

      /* final filter sections */
      a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift);
      xout1 = xp[m][j+2];
      xout2 = xq[m][j+2];

      /* hard limit ak's to +/- 32767 */

      if (a < -32767) a = -32767;
      if (a > 32767) a = 32767;
      ak[j-1] = (short)a;

    }

}

#else

void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)
/*  float *freq     array of LSP frequencies in the x domain    */
/*  float *ak         array of LPC coefficients             */
/*  int lpcrdr      order of LPC coefficients             */


{
    int i,j;
    float xout1,xout2,xin1,xin2;
    VARDECL(float *Wp);
    float *pw,*n1,*n2,*n3,*n4=NULL;
    VARDECL(float *x_freq);
    int m = lpcrdr>>1;

    ALLOC(Wp, 4*m+2, float);
    pw = Wp;

    /* initialise contents of array */

    for(i=0;i<=4*m+1;i++){           /* set contents of buffer to 0 */
    *pw++ = 0.0;
    }

    /* Set pointers up */

    pw = Wp;
    xin1 = 1.0;
    xin2 = 1.0;

    ALLOC(x_freq, lpcrdr, float);
    for (i=0;i<lpcrdr;i++)
       x_freq[i] = ANGLE2X(freq[i]);

    /* reconstruct P(z) and Q(z) by  cascading second order
      polynomials in form 1 - 2xz(-1) +z(-2), where x is the
      LSP coefficient */

    for(j=0;j<=lpcrdr;j++){
       int i2=0;
    for(i=0;i<m;i++,i2+=2){
        n1 = pw+(i*4);
        n2 = n1 + 1;
        n3 = n2 + 1;
        n4 = n3 + 1;
        xout1 = xin1 - 2.f*x_freq[i2] * *n1 + *n2;
        xout2 = xin2 - 2.f*x_freq[i2+1] * *n3 + *n4;
        *n2 = *n1;
        *n4 = *n3;
        *n1 = xin1;
        *n3 = xin2;
        xin1 = xout1;
        xin2 = xout2;
    }
    xout1 = xin1 + *(n4+1);
    xout2 = xin2 - *(n4+2);
    if (j>0)
       ak[j-1] = (xout1 + xout2)*0.5f;
    *(n4+1) = xin1;
    *(n4+2) = xin2;

    xin1 = 0.0;
    xin2 = 0.0;
    }

}

Beispiel #4

0

Datei anzeigen

Datei: lsp.c Projekt: TomDataworks/whisper_client

void lsp_to_lpc(const spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack)
/*  float *freq 	array of LSP frequencies in the x domain	*/
/*  float *ak 		array of LPC coefficients 			*/
/*  int lpcrdr  	order of LPC coefficients 			*/
{
    int i,j;
    spx_word32_t xout1,xout2,xin;
    spx_word32_t mult, a;
    VARDECL(spx_word16_t *freqn);
    VARDECL(spx_word32_t **xp);
    VARDECL(spx_word32_t *xpmem);
    VARDECL(spx_word32_t **xq);
    VARDECL(spx_word32_t *xqmem);
    int m = lpcrdr>>1;

    /* 
    
       Reconstruct P(z) and Q(z) by cascading second order polynomials
       in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency.
       In the time domain this is:

       y(n) = x(n) - 2cos(w)x(n-1) + x(n-2)
    
       This is what the ALLOCS below are trying to do:

         int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP
         int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP

       These matrices store the output of each stage on each row.  The
       final (m-th) row has the output of the final (m-th) cascaded
       2nd order filter.  The first row is the impulse input to the
       system (not written as it is known).

       The version below takes advantage of the fact that a lot of the
       outputs are zero or known, for example if we put an inpulse
       into the first section the "clock" it 10 times only the first 3
       outputs samples are non-zero (it's an FIR filter).
    */

    ALLOC(xp, (m+1), spx_word32_t*);
    ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t);

    ALLOC(xq, (m+1), spx_word32_t*);
    ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t);
    
    for(i=0; i<=m; i++) {
      xp[i] = xpmem + i*(lpcrdr+1+2);
      xq[i] = xqmem + i*(lpcrdr+1+2);
    }

    /* work out 2cos terms in Q14 */

    ALLOC(freqn, lpcrdr, spx_word16_t);
    for (i=0;i<lpcrdr;i++) 
       freqn[i] = ANGLE2X(freq[i]);

    #define QIMP  21   /* scaling for impulse */

    xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */
   
    /* first col and last non-zero values of each row are trivial */
    
    for(i=0;i<=m;i++) {
     xp[i][1] = 0;
     xp[i][2] = xin;
     xp[i][2+2*i] = xin;
     xq[i][1] = 0;
     xq[i][2] = xin;
     xq[i][2+2*i] = xin;
    }

    /* 2nd row (first output row) is trivial */

    xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]);
    xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]);

    xout1 = xout2 = 0;

    /* now generate remaining rows */

    for(i=1;i<m;i++) {

      for(j=1;j<2*(i+1)-1;j++) {
	mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
	xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]);
	mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
	xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]);
      }

      /* for last col xp[i][j+2] = xq[i][j+2] = 0 */

      mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]);
      xp[i+1][j+2] = SUB32(xp[i][j], mult);
      mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]);
      xq[i+1][j+2] = SUB32(xq[i][j], mult);
    }

    /* process last row to extra a{k} */

    for(j=1;j<=lpcrdr;j++) {
      int shift = QIMP-13;

      /* final filter sections */
      a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); 
      xout1 = xp[m][j+2];
      xout2 = xq[m][j+2];
      
      /* hard limit ak's to +/- 32767 */

      if (a < -32767) a = -32767;
      if (a > 32767) a = 32767;
      ak[j-1] = (short)a;
     
    }

}

Beispiel #5

0

Datei anzeigen

Datei: ltp.c Projekt: Affix/fgcom

/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
static spx_word64_t pitch_gain_search_3tap(
    const spx_sig_t target[],       /* Target vector */
    const spx_coef_t ak[],          /* LPCs for this subframe */
    const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
    const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
    spx_sig_t exc[],                /* Excitation */
    const void *par,
    int   pitch,                    /* Pitch value */
    int   p,                        /* Number of LPC coeffs */
    int   nsf,                      /* Number of samples in subframe */
    SpeexBits *bits,
    char *stack,
    const spx_sig_t *exc2,
    const spx_word16_t *r,
    spx_sig_t *new_target,
    int  *cdbk_index,
    int cdbk_offset,
    int plc_tuning
)
{
    int i,j;
    VARDECL(spx_sig_t *tmp1);
    VARDECL(spx_sig_t *tmp2);
    spx_sig_t *x[3];
    spx_sig_t *e[3];
    spx_word32_t corr[3];
    spx_word32_t A[3][3];
    int   gain_cdbk_size;
    const signed char *gain_cdbk;
    spx_word16_t gain[3];
    spx_word64_t err;

    const ltp_params *params;
    params = (const ltp_params*) par;
    gain_cdbk_size = 1<<params->gain_bits;
    gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset;
    ALLOC(tmp1, 3*nsf, spx_sig_t);
    ALLOC(tmp2, 3*nsf, spx_sig_t);

    x[0]=tmp1;
    x[1]=tmp1+nsf;
    x[2]=tmp1+2*nsf;

    e[0]=tmp2;
    e[1]=tmp2+nsf;
    e[2]=tmp2+2*nsf;
    for (i=2; i>=0; i--)
    {
        int pp=pitch+1-i;
        for (j=0; j<nsf; j++)
        {
            if (j-pp<0)
                e[i][j]=exc2[j-pp];
            else if (j-pp-pitch<0)
                e[i][j]=exc2[j-pp-pitch];
            else
                e[i][j]=0;
        }

        if (i==2)
            syn_percep_zero(e[i], ak, awk1, awk2, x[i], nsf, p, stack);
        else {
            for (j=0; j<nsf-1; j++)
                x[i][j+1]=x[i+1][j];
            x[i][0]=0;
            for (j=0; j<nsf; j++)
            {
                x[i][j]=ADD32(x[i][j],SHL32(MULT16_32_Q15(r[j], e[i][0]),1));
            }
        }
    }

#ifdef FIXED_POINT
    {
        /* If using fixed-point, we need to normalize the signals first */
        spx_word16_t *y[3];
        VARDECL(spx_word16_t *ytmp);
        VARDECL(spx_word16_t *t);

        spx_sig_t max_val=1;
        int sig_shift;

        ALLOC(ytmp, 3*nsf, spx_word16_t);
#if 0
        ALLOC(y[0], nsf, spx_word16_t);
        ALLOC(y[1], nsf, spx_word16_t);
        ALLOC(y[2], nsf, spx_word16_t);
#else
        y[0] = ytmp;
        y[1] = ytmp+nsf;
        y[2] = ytmp+2*nsf;
#endif
        ALLOC(t, nsf, spx_word16_t);
        for (j=0; j<3; j++)
        {
            for (i=0; i<nsf; i++)
            {
                spx_sig_t tmp = x[j][i];
                if (tmp<0)
                    tmp = -tmp;
                if (tmp > max_val)
                    max_val = tmp;
            }
        }
        for (i=0; i<nsf; i++)
        {
            spx_sig_t tmp = target[i];
            if (tmp<0)
                tmp = -tmp;
            if (tmp > max_val)
                max_val = tmp;
        }

        sig_shift=0;
        while (max_val>16384)
        {
            sig_shift++;
            max_val >>= 1;
        }

        for (j=0; j<3; j++)
        {
            for (i=0; i<nsf; i++)
            {
                y[j][i] = EXTRACT16(SHR32(x[j][i],sig_shift));
            }
        }
        for (i=0; i<nsf; i++)
        {
            t[i] = EXTRACT16(SHR32(target[i],sig_shift));
        }

        for (i=0; i<3; i++)
            corr[i]=inner_prod(y[i],t,nsf);

        for (i=0; i<3; i++)
            for (j=0; j<=i; j++)
                A[i][j]=A[j][i]=inner_prod(y[i],y[j],nsf);
    }
#else
    {
        for (i=0; i<3; i++)
            corr[i]=inner_prod(x[i],target,nsf);

        for (i=0; i<3; i++)
            for (j=0; j<=i; j++)
                A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
    }
#endif

    {
        spx_word32_t C[9];
        const signed char *ptr=gain_cdbk;
        int best_cdbk=0;
        spx_word32_t best_sum=0;
        C[0]=corr[2];
        C[1]=corr[1];
        C[2]=corr[0];
        C[3]=A[1][2];
        C[4]=A[0][1];
        C[5]=A[0][2];
        C[6]=A[2][2];
        C[7]=A[1][1];
        C[8]=A[0][0];

        /*plc_tuning *= 2;*/
        if (plc_tuning<2)
            plc_tuning=2;
#ifdef FIXED_POINT
        C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]);
        C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]);
        C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]);
#else
        C[0]*=1-.01*plc_tuning;
        C[1]*=1-.01*plc_tuning;
        C[2]*=1-.01*plc_tuning;
        C[6]*=.5*(1+.01*plc_tuning);
        C[7]*=.5*(1+.01*plc_tuning);
        C[8]*=.5*(1+.01*plc_tuning);
#endif
        for (i=0; i<gain_cdbk_size; i++)
        {
            spx_word32_t sum=0;
            spx_word16_t g0,g1,g2;
            spx_word16_t pitch_control=64;
            spx_word16_t gain_sum;

            ptr = gain_cdbk+3*i;
            g0=ADD16((spx_word16_t)ptr[0],32);
            g1=ADD16((spx_word16_t)ptr[1],32);
            g2=ADD16((spx_word16_t)ptr[2],32);

            gain_sum = g1;
            if (g0>0)
                gain_sum += g0;
            if (g2>0)
                gain_sum += g2;
            if (gain_sum > 64)
            {
                gain_sum = SUB16(gain_sum, 64);
                if (gain_sum > 127)
                    gain_sum = 127;
#ifdef FIXED_POINT
                pitch_control =  SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10)));
#else
                pitch_control = 64*(1.-.001*plc_tuning*gain_sum);
#endif
                if (pitch_control < 0)
                    pitch_control = 0;
            }

            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g0,pitch_control),C[0]));
            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g1,pitch_control),C[1]));
            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g2,pitch_control),C[2]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g0,g1),C[3]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g1),C[4]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g0),C[5]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g0,g0),C[6]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g1,g1),C[7]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g2,g2),C[8]));
            /* We could force "safe" pitch values to handle packet loss better */

            if (sum>best_sum || i==0)
            {
                best_sum=sum;
                best_cdbk=i;
            }
        }
#ifdef FIXED_POINT
        gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]);
        gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]);
        gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]);
        /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
#else
        gain[0] = 0.015625*gain_cdbk[best_cdbk*3]  + .5;
        gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5;
        gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5;
#endif
        *cdbk_index=best_cdbk;
    }

#ifdef FIXED_POINT
    for (i=0; i<nsf; i++)
        exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])),
                           MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2);

    err=0;
    for (i=0; i<nsf; i++)
    {
        spx_word16_t perr2;
        spx_sig_t tmp = SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),x[2][i]),MULT16_32_Q15(SHL16(gain[1],7),x[1][i])),
                                    MULT16_32_Q15(SHL16(gain[2],7),x[0][i])),2);
        spx_sig_t perr=SUB32(target[i],tmp);
        new_target[i] = SUB32(target[i], tmp);
        perr2 = EXTRACT16(PSHR32(perr,15));
        err = ADD64(err,MULT16_16(perr2,perr2));

    }
#else
    for (i=0; i<nsf; i++)
        exc[i]=gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i];

    err=0;
    for (i=0; i<nsf; i++)
    {
        spx_sig_t tmp = gain[2]*x[0][i]+gain[1]*x[1][i]+gain[0]*x[2][i];
        new_target[i] = target[i] - tmp;
        err+=new_target[i]*new_target[i];
    }
#endif

    return err;
}