Пример #1
1
word32_t ChebyshevPolynomial(word16_t x, word32_t f[])
{
    /* bk in Q15*/
    word32_t bk;
    word32_t bk1 = ADD32(SHL(x,1), f[1]); /* init: b4=2x+f1 */
    word32_t bk2 = ONE_IN_Q15; /* init: b5=1 */
    uint8_t k;
    for (k=3; k>0; k--) { /* at the end of loop execution we have b1 in bk1 and b2 in bk2 */
        bk = SUB32(ADD32(SHL(MULT16_32_Q15(x,bk1), 1), f[5-k]), bk2); /* bk = 2*x*bk1 − bk2 + f(5-k) all in Q15*/
        bk2 = bk1;
        bk1 = bk;
    }

    return SUB32(ADD32(MULT16_32_Q15(x,bk1), SHR(f[5],1)), bk2); /* C(x) = x*b1 - b2 + f(5)/2 */
}
Пример #2
0
/* Special case for stereo with no downsampling and no accumulation. This is
   quite common and we can make it faster by processing both channels in the
   same loop, reducing overhead due to the dependency loop in the IIR filter. */
static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
      celt_sig *mem)
{
   celt_sig * OPUS_RESTRICT x0;
   celt_sig * OPUS_RESTRICT x1;
   celt_sig m0, m1;
   int j;
   x0=in[0];
   x1=in[1];
   m0 = mem[0];
   m1 = mem[1];
   for (j=0;j<N;j++)
   {
      celt_sig tmp0, tmp1;
      /* Add VERY_SMALL to x[] first to reduce dependency chain. */
      tmp0 = x0[j] + VERY_SMALL + m0;
      tmp1 = x1[j] + VERY_SMALL + m1;
      m0 = MULT16_32_Q15(coef0, tmp0);
      m1 = MULT16_32_Q15(coef0, tmp1);
      pcm[2*j  ] = SCALEOUT(SIG2WORD16(tmp0));
      pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
   }
   mem[0] = m0;
   mem[1] = m1;
}
Пример #3
0
Файл: pitch.c Проект: 93i/godot
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
                            int max_pitch, int *best_pitch
#ifdef FIXED_POINT
                            , int yshift, opus_val32 maxcorr
#endif
                            )
{
   int i, j;
   opus_val32 Syy=1;
   opus_val16 best_num[2];
   opus_val32 best_den[2];
#ifdef FIXED_POINT
   int xshift;

   xshift = celt_ilog2(maxcorr)-14;
#endif

   best_num[0] = -1;
   best_num[1] = -1;
   best_den[0] = 0;
   best_den[1] = 0;
   best_pitch[0] = 0;
   best_pitch[1] = 1;
   for (j=0;j<len;j++)
      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
   for (i=0;i<max_pitch;i++)
   {
      if (xcorr[i]>0)
      {
         opus_val16 num;
         opus_val32 xcorr16;
         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
#ifndef FIXED_POINT
         /* Considering the range of xcorr16, this should avoid both underflows
            and overflows (inf) when squaring xcorr16 */
         xcorr16 *= 1e-12f;
#endif
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
         {
            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
            {
               best_num[1] = best_num[0];
               best_den[1] = best_den[0];
               best_pitch[1] = best_pitch[0];
               best_num[0] = num;
               best_den[0] = Syy;
               best_pitch[0] = i;
            } else {
               best_num[1] = num;
               best_den[1] = Syy;
               best_pitch[1] = i;
            }
         }
      }
      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
      Syy = MAX32(1, Syy);
   }
}
Пример #4
0
static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
{
   const int N = st->filt_len;
   int out_sample = 0;
   int last_sample = st->last_sample[channel_index];
   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
   const int out_stride = st->out_stride;
   const int int_advance = st->int_advance;
   const int frac_advance = st->frac_advance;
   const spx_uint32_t den_rate = st->den_rate;
   spx_word32_t sum;

   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
   {
      const spx_word16_t *iptr = & in[last_sample];

      const int offset = samp_frac_num*st->oversample/st->den_rate;
#ifdef FIXED_POINT
      const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
#else
      const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
#endif
      spx_word16_t interp[4];


#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
      int j;
      spx_word32_t accum[4] = {0,0,0,0};

      for(j=0;j<N;j++) {
        const spx_word16_t curr_in=iptr[j];
        accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
        accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
        accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
        accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
      }

      cubic_coef(frac, interp);
      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
      sum = SATURATE32PSHR(sum, 15, 32767);
#else
      cubic_coef(frac, interp);
      sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
#endif

      out[out_stride * out_sample++] = sum;
      last_sample += int_advance;
      samp_frac_num += frac_advance;
      if (samp_frac_num >= den_rate)
      {
         samp_frac_num -= den_rate;
         last_sample++;
      }
   }

   st->last_sample[channel_index] = last_sample;
   st->samp_frac_num[channel_index] = samp_frac_num;
   return out_sample;
}
Пример #5
0
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
                            int max_pitch, int *best_pitch
#ifdef FIXED_POINT
                            , int yshift, opus_val32 maxcorr
#endif
                            )
{
   int i, j;
   opus_val32 Syy=1;
   opus_val16 best_num[2];
   opus_val32 best_den[2];
#ifdef FIXED_POINT
   int xshift;

   xshift = celt_ilog2(maxcorr)-14;
#endif

   best_num[0] = -1;
   best_num[1] = -1;
   best_den[0] = 0;
   best_den[1] = 0;
   best_pitch[0] = 0;
   best_pitch[1] = 1;
   for (j=0;j<len;j++)
      Syy = MAC16_16(Syy, y[j],y[j]);
   for (i=0;i<max_pitch;i++)
   {
      if (xcorr[i]>0)
      {
         opus_val16 num;
         opus_val32 xcorr16;
         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
         {
            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
            {
               best_num[1] = best_num[0];
               best_den[1] = best_den[0];
               best_pitch[1] = best_pitch[0];
               best_num[0] = num;
               best_den[0] = Syy;
               best_pitch[0] = i;
            } else {
               best_num[1] = num;
               best_den[1] = Syy;
               best_pitch[1] = i;
            }
         }
      }
      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
      Syy = MAX32(1, Syy);
   }
}
Пример #6
0
void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
      opus_val16 g10, opus_val16 g11, opus_val16 g12)
{
   int i;
   __m128 x0v;
   __m128 g10v, g11v, g12v;
   g10v = _mm_load1_ps(&g10);
   g11v = _mm_load1_ps(&g11);
   g12v = _mm_load1_ps(&g12);
   x0v = _mm_loadu_ps(&x[-T-2]);
   for (i=0;i<N-3;i+=4)
   {
      __m128 yi, yi2, x1v, x2v, x3v, x4v;
      const opus_val32 *xp = &x[i-T-2];
      yi = _mm_loadu_ps(x+i);
      x4v = _mm_loadu_ps(xp+4);
#if 0
      /* Slower version with all loads */
      x1v = _mm_loadu_ps(xp+1);
      x2v = _mm_loadu_ps(xp+2);
      x3v = _mm_loadu_ps(xp+3);
#else
      x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
      x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
      x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
#endif

      yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
      yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
      yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
#else
      /* Use partial sums */
      yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
                       _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
      yi = _mm_add_ps(yi, yi2);
#endif
      x0v=x4v;
      _mm_storeu_ps(y+i, yi);
   }
#ifdef CUSTOM_MODES
   for (;i<N;i++)
   {
      y[i] = x[i]
               + MULT16_32_Q15(g10,x[i-T])
               + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
               + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
   }
#endif
}
Пример #7
0
void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState *_stereo)
{
   int i;
   spx_word32_t balance;
   spx_word16_t e_left, e_right, e_ratio;
   RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo;

   /* COMPATIBILITY_HACK(stereo); */
   
   balance=stereo->balance;
   e_ratio=stereo->e_ratio;
   
   /* These two are Q14, with max value just below 2. */
   e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance))));
   e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8);

   for (i=frame_size-1;i>=0;i--)
   {
      spx_int16_t tmp=data[i];
      stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15));
      stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15));
      data[2*i] = (spx_int16_t)MULT16_16_P14(stereo->smooth_left, tmp);
      data[2*i+1] = (spx_int16_t)MULT16_16_P14(stereo->smooth_right, tmp);
   }
}
Пример #8
0
/* Compute spectrum of estimated echo for use in an echo post-filter */
void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
{
   int i;
   spx_word16_t leak2;
   int N;
   
   N = st->window_size;

   /* Apply hanning window (should pre-compute it)*/
   for (i=0;i<N;i++)
      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
      
   /* Compute power spectrum of the echo */
   spx_fft(st->fft_table, st->y, st->Y);
   power_spectrum(st->Y, residual_echo, N);
      
#ifdef FIXED_POINT
   if (st->leak_estimate > 16383)
      leak2 = 32767;
   else
      leak2 = SHL16(st->leak_estimate, 1);
#else
   if (st->leak_estimate>.5)
      leak2 = 1;
   else
      leak2 = 2*st->leak_estimate;
#endif
   /* Estimate residual echo */
   for (i=0;i<=st->frame_size;i++)
      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
   
}
Пример #9
0
static opus_val32 silk_resampler_down2_hp(
    opus_val32                  *S,                 /* I/O  State vector [ 2 ]                                          */
    opus_val32                  *out,               /* O    Output signal [ floor(len/2) ]                              */
    const opus_val32            *in,                /* I    Input signal [ len ]                                        */
    int                         inLen               /* I    Number of input samples                                     */
)
{
    int k, len2 = inLen/2;
    opus_val32 in32, out32, out32_hp, Y, X;
    opus_val64 hp_ener = 0;
    /* Internal variables and state are in Q10 format */
    for( k = 0; k < len2; k++ ) {
        /* Convert to Q10 */
        in32 = in[ 2 * k ];

        /* All-pass section for even input sample */
        Y      = SUB32( in32, S[ 0 ] );
        X      = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y);
        out32  = ADD32( S[ 0 ], X );
        S[ 0 ] = ADD32( in32, X );
        out32_hp = out32;
        /* Convert to Q10 */
        in32 = in[ 2 * k + 1 ];

        /* All-pass section for odd input sample, and add to output of previous section */
        Y      = SUB32( in32, S[ 1 ] );
        X      = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
        out32  = ADD32( out32, S[ 1 ] );
        out32  = ADD32( out32, X );
        S[ 1 ] = ADD32( in32, X );

        Y      = SUB32( -in32, S[ 2 ] );
        X      = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
        out32_hp  = ADD32( out32_hp, S[ 2 ] );
        out32_hp  = ADD32( out32_hp, X );
        S[ 2 ] = ADD32( -in32, X );

        hp_ener += out32_hp*(opus_val64)out32_hp;
        /* Add, convert back to int16 and store to output */
        out[ k ] = HALF32(out32);
    }
#ifdef FIXED_POINT
    /* len2 can be up to 480, so we shift by 8 more to make it fit. */
    hp_ener = hp_ener >> (2*SIG_SHIFT + 8);
#endif
    return (opus_val32)hp_ener;
}
Пример #10
0
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
   int i,j;
   spx_sig_t xi,yi,nyi;

   for (i=0;i<N;i++)
   {
      int xh,xl,yh,yl;
      xi=SATURATE(x[i],805306368);
      yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
      nyi = -yi;
      xh = xi>>15; xl=xi&0x00007fff; yh = yi>>15; yl=yi&0x00007fff; 
      for (j=0;j<ord-1;j++)
      {
         mem[j] = MAC16_32_Q15(MAC16_32_Q15(mem[j+1], num[j+1],xi), den[j+1],nyi);
      }
      mem[ord-1] = SUB32(MULT16_32_Q15(num[ord],xi), MULT16_32_Q15(den[ord],yi));
      y[i] = yi;
   }
}
Пример #11
0
/** Forced pitch delay and gain */
int forced_pitch_quant(spx_word16_t target[],	/* Target vector */
		       spx_word16_t * sw, spx_coef_t ak[],	/* LPCs for this subframe */
		       spx_coef_t awk1[],	/* Weighted LPCs #1 for this subframe */
		       spx_coef_t awk2[],	/* Weighted LPCs #2 for this subframe */
		       spx_sig_t exc[],	/* Excitation */
		       const void *par, int start,	/* Smallest pitch value allowed */
		       int end,	/* Largest pitch value allowed */
		       spx_word16_t pitch_coef,	/* Voicing (pitch) coefficient */
		       int p,	/* Number of LPC coeffs */
		       int nsf,	/* Number of samples in subframe */
		       SpeexBits * bits,
		       char *stack,
		       spx_word16_t * exc2,
		       spx_word16_t * r,
		       int complexity,
		       int cdbk_offset,
		       int plc_tuning, spx_word32_t * cumul_gain)
{
	(void)sw;
	(void)par;
	(void)end;
	(void)bits;
	(void)r;
	(void)complexity;
	(void)cdbk_offset;
	(void)plc_tuning;
	(void)cumul_gain;
	int i;
	spx_word16_t res[nsf];
#ifdef FIXED_POINT
	if (pitch_coef > 63)
		pitch_coef = 63;
#else
	if (pitch_coef > .99)
		pitch_coef = .99;
#endif
	for (i = 0; i < nsf && i < start; i++) {
		exc[i] = MULT16_16(SHL16(pitch_coef, 7), exc2[i - start]);
	}
	for (; i < nsf; i++) {
		exc[i] = MULT16_32_Q15(SHL16(pitch_coef, 9), exc[i - start]);
	}
	for (i = 0; i < nsf; i++)
		res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT - 1));
	syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack);
	for (i = 0; i < nsf; i++)
		target[i] =
		    EXTRACT16(SATURATE
			      (SUB32(EXTEND32(target[i]), EXTEND32(res[i])),
			       32700));
	return start;
}
Пример #12
0
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
{
   int i;
   spx_word16_t den2;
#ifdef FIXED_POINT
   den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
#else
   den2 = radius*radius + .7*(1-radius)*(1-radius);
#endif   
   /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
   for (i=0;i<len;i++)
   {
      spx_word16_t vin = in[i*stride];
      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
#ifdef FIXED_POINT
      mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
#else
      mem[0] = mem[1] + 2*(-vin + radius*vout);
#endif
      mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
      out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
   }
}
Пример #13
0
void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
{
   int i;
   spx_word16_t scale_1;

   scale = PSHR(scale, SIG_SHIFT);
   if (scale<2)
      scale_1 = 32767;
   else
      scale_1 = 32767/scale;
   for (i=0;i<len;i++)
   {
      y[i] = MULT16_32_Q15(scale_1,x[i]);
   }
}
Пример #14
0
static int transient_analysis(celt_word32_t *in, int len, int C, int *transient_time, int *transient_shift)
{
   int c, i, n;
   celt_word32_t ratio;
   VARDECL(celt_word32_t, begin);
   SAVE_STACK;
   ALLOC(begin, len, celt_word32_t);
   for (i=0;i<len;i++)
      begin[i] = ABS32(SHR32(in[C*i],SIG_SHIFT));
   for (c=1;c<C;c++)
   {
      for (i=0;i<len;i++)
         begin[i] = MAX32(begin[i], ABS32(SHR32(in[C*i+c],SIG_SHIFT)));
   }
   for (i=1;i<len;i++)
      begin[i] = MAX32(begin[i-1],begin[i]);
   n = -1;
   for (i=8;i<len-8;i++)
   {
      if (begin[i] < MULT16_32_Q15(QCONST16(.2f,15),begin[len-1]))
         n=i;
   }
   if (n<32)
   {
      n = -1;
      ratio = 0;
   } else {
      ratio = DIV32(begin[len-1],1+begin[n-16]);
   }
   if (ratio < 0)
      ratio = 0;
   if (ratio > 1000)
      ratio = 1000;
   ratio *= ratio;
   
   if (ratio > 2048)
      *transient_shift = 3;
   else
      *transient_shift = 0;
   
   *transient_time = n;
   
   RESTORE_STACK;
   return ratio > 20;
}
Пример #15
0
void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
   int i,j;
   spx_word32_t xi,yi;

   for (i=0;i<N;i++)
   {
      int xh,xl;
      xi=SATURATE(x[i],805306368);
      yi = xi + (mem[0]<<2);
      xh = xi>>15; xl=xi&0x00007fff;
      for (j=0;j<ord-1;j++)
      {
         mem[j] = MAC16_32_Q15(mem[j+1], num[j+1],xi);
      }
      mem[ord-1] = MULT16_32_Q15(num[ord],xi);
      y[i] = SATURATE(yi,805306368);
   }

}
Пример #16
0
void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
   int i,j;
   spx_word32_t xi,yi,nyi;

   for (i=0;i<N;i++)
   {
      int yh,yl;
      xi=SATURATE(x[i],805306368);
      yi = SATURATE(xi + (mem[0]<<2),805306368);
      nyi = -yi;
      yh = yi>>15; yl=yi&0x00007fff; 
      for (j=0;j<ord-1;j++)
      {
         mem[j] = MAC16_32_Q15(mem[j+1],den[j+1],nyi);
      }
      mem[ord-1] = - MULT16_32_Q15(den[ord],yi);
      y[i] = yi;
   }
}
Пример #17
0
void postFilter(bcg729DecoderChannelContextStruct *decoderChannelContext, word16_t *LPCoefficients, word16_t *reconstructedSpeech, int16_t intPitchDelay, int subframeIndex,
		word16_t *postFilteredSignal)
{
	int i,j;

	/********************************************************************/
	/* Long Term Post Filter                                            */
	/********************************************************************/
	/*** Compute LPGammaN and LPGammaD coefficients : LPGamma[0] = LP[0]*Gamma^(i+1) (i=0..9) ***/
	word16_t LPGammaNCoefficients[NB_LSP_COEFF]; /* in Q12 */
	/* GAMMA_XX constants are in Q15 */
	LPGammaNCoefficients[0] = MULT16_16_P15(LPCoefficients[0], GAMMA_N1);
	LPGammaNCoefficients[1] = MULT16_16_P15(LPCoefficients[1], GAMMA_N2);
	LPGammaNCoefficients[2] = MULT16_16_P15(LPCoefficients[2], GAMMA_N3);
	LPGammaNCoefficients[3] = MULT16_16_P15(LPCoefficients[3], GAMMA_N4);
	LPGammaNCoefficients[4] = MULT16_16_P15(LPCoefficients[4], GAMMA_N5);
	LPGammaNCoefficients[5] = MULT16_16_P15(LPCoefficients[5], GAMMA_N6);
	LPGammaNCoefficients[6] = MULT16_16_P15(LPCoefficients[6], GAMMA_N7);
	LPGammaNCoefficients[7] = MULT16_16_P15(LPCoefficients[7], GAMMA_N8);
	LPGammaNCoefficients[8] = MULT16_16_P15(LPCoefficients[8], GAMMA_N9);
	LPGammaNCoefficients[9] = MULT16_16_P15(LPCoefficients[9], GAMMA_N10);

	/*** Compute the residual signal as described in spec 4.2.1 eq79 ***/
	/* Compute also a scaled residual signal: shift right by 2 to avoid overflows on 32 bits when computing correlation and energy */
	
	/* pointers to current subframe beginning */
	word16_t *residualSignal = &(decoderChannelContext->residualSignalBuffer[MAXIMUM_INT_PITCH_DELAY+subframeIndex]);
	word16_t *scaledResidualSignal = &(decoderChannelContext->scaledResidualSignalBuffer[MAXIMUM_INT_PITCH_DELAY+subframeIndex]);

	for (i=0; i<L_SUBFRAME; i++) {
		word32_t acc = SHL((word32_t)reconstructedSpeech[i], 12); /* reconstructedSpeech in Q0 shifted to set acc in Q12 */
		for (j=0; j<NB_LSP_COEFF; j++) {
			acc = MAC16_16(acc, LPGammaNCoefficients[j],reconstructedSpeech[i-j-1]); /* LPGammaNCoefficients in Q12, reconstructedSpeech in Q0 -> acc in Q12 */
		}
		residualSignal[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* shift back acc to Q0 and saturate it to avoid overflow when going back to 16 bits */
		scaledResidualSignal[i] = PSHR(residualSignal[i], 2); /* shift acc to Q-2 and saturate it to get the scaled version of the signal */
	}

	/*** Compute the maximum correlation on scaledResidualSignal delayed by intPitchDelay +/- 3 to get the best delay. Spec 4.2.1 eq80 ***/
	/* using a scaled(Q-2) signals gives correlation in Q-4. */
	word32_t correlationMax = (word32_t)MININT32;
	int16_t intPitchDelayMax = intPitchDelay+3; /* intPitchDelayMax shall be < MAXIMUM_INT_PITCH_DELAY(143) */
	int16_t bestIntPitchDelay = 0;
	word16_t *delayedResidualSignal;
	if (intPitchDelayMax>MAXIMUM_INT_PITCH_DELAY) {
		intPitchDelayMax = MAXIMUM_INT_PITCH_DELAY;
	}

	for (i=intPitchDelay-3; i<=intPitchDelayMax; i++) {
		word32_t correlation = 0;
		delayedResidualSignal = &(scaledResidualSignal[-i]); /* delayedResidualSignal points to scaledResidualSignal[-i] */
		
		/* compute correlation: ∑r(n)*rk(n) */
		for (j=0; j<L_SUBFRAME; j++) {
			correlation = MAC16_16(correlation, delayedResidualSignal[j], scaledResidualSignal[j]);
		}
		/* if we have a maximum correlation */
		if (correlation>correlationMax) {
			correlationMax = correlation;
			bestIntPitchDelay = i; /* get the intPitchDelay */
		}
	}

	/* saturate correlation to a positive integer */
	if (correlationMax<0) {
		correlationMax = 0;
	}

	/*** Compute the signal energy ∑r(n)*r(n) and delayed signal energy ∑rk(n)*rk(n) which shall be used to compute gl spec 4.2.1 eq81, eq 82 and eq83 ***/
	word32_t residualSignalEnergy = 0; /* in Q-4 */
	word32_t delayedResidualSignalEnergy = 0; /* in Q-4 */
	delayedResidualSignal = &(scaledResidualSignal[-bestIntPitchDelay]); /* in Q-2, points to the residual signal delayed to give the higher correlation: rk(n) */ 
	for (i=0; i<L_SUBFRAME; i++) {
		residualSignalEnergy = MAC16_16(residualSignalEnergy, scaledResidualSignal[i], scaledResidualSignal[i]);
		delayedResidualSignalEnergy = MAC16_16(delayedResidualSignalEnergy, delayedResidualSignal[i], delayedResidualSignal[i]);
	}

	/*** Scale correlationMax, residualSignalEnergy and delayedResidualSignalEnergy to the best fit on 16 bits ***/
	/* these variables must fit on 16bits for the following computation, to avoid loosing information, scale them */
	/* at best fit: scale the higher of three to get the value over 2^14 and shift the other two from the same amount */
	/* Note: all three value are >= 0 */
	word32_t maximumThree = correlationMax;
	if (maximumThree<residualSignalEnergy) {
		maximumThree = residualSignalEnergy;
	}
	if (maximumThree<delayedResidualSignalEnergy) {
		maximumThree = delayedResidualSignalEnergy;
	}

	int16_t leadingZeros = 0;
	word16_t correlationMaxWord16 = 0;
	word16_t residualSignalEnergyWord16 = 0;
	word16_t delayedResidualSignalEnergyWord16 = 0;

	if (maximumThree>0) { /* if all of them a null, just do nothing otherwise shift right to get the max number in range [0x4000,0x8000[ */
		leadingZeros = countLeadingZeros(maximumThree);
		if (leadingZeros<16) {
			correlationMaxWord16 = (word16_t)SHR32(correlationMax, 16-leadingZeros);
			residualSignalEnergyWord16 = (word16_t)SHR32(residualSignalEnergy, 16-leadingZeros);
			delayedResidualSignalEnergyWord16 = (word16_t)SHR32(delayedResidualSignalEnergy, 16-leadingZeros);
		} else { /* if the values already fit on 16 bits, no need to shift */
			correlationMaxWord16 = (word16_t)correlationMax;
			residualSignalEnergyWord16 = (word16_t)residualSignalEnergy;
			delayedResidualSignalEnergyWord16 = (word16_t)delayedResidualSignalEnergy;
		}
	}

	/* eq78: Hp(z)=(1 + γp*gl*z(−T))/(1 + γp*gl) -> (with g=γp*gl) Hp(z)=1/(1+g) + (g/(1+g))*z(-T) = g0 + g1*z(-T) */
	/* g = gl/2 (as γp=0.5)= (eq83) correlationMax/(2*delayedResidualSignalEnergy) */
	/* compute g0 = 1/(1+g)=  delayedResidualSignalEnergy/(delayedResidualSignalEnergy+correlationMax/2) = 1-g1*/
	/* compute g1 = g/(1+g) = correlationMax/(2*delayedResidualSignalEnergy+correlationMax) = 1-g0 */

	/*** eq82 -> (correlationMax^2)/(residualSignalEnergy*delayedResidualSignalEnergy)<0.5 ***/
	/* (correlationMax^2) < (residualSignalEnergy*delayedResidualSignalEnergy)*0.5 */
	if ((MULT16_16(correlationMaxWord16, correlationMaxWord16) < SHR(MULT16_16(residualSignalEnergyWord16, delayedResidualSignalEnergyWord16), 1)) /* eq82 */
		|| ((correlationMaxWord16==0) && (delayedResidualSignalEnergyWord16==0))) { /* correlationMax and delayedResidualSignalEnergy values are 0 -> unable to compute g0 and g1 -> disable filter */
		/* long term post filter disabled */
		for (i=0; i<L_SUBFRAME; i++) {
			decoderChannelContext->longTermFilteredResidualSignal[i] = residualSignal[i];
		}
	} else { /* eq82 gives long term filter enabled, */
		word16_t g0, g1;
		/* eq83: gl = correlationMax/delayedResidualSignalEnergy bounded in ]0,1] */
		/* check if gl > 1 -> gl=1 -> g=1/2 -> g0=2/3 and g1=1/3 */
		if (correlationMax > delayedResidualSignalEnergy) {
			g0 = 21845; /* 2/3 in Q15 */
			g1 = 10923; /* 1/3 in Q15 */
		} else {
			/* g1 = correlationMax/(2*delayedResidualSignalEnergy+correlationMax) */
			g1 = DIV32((word32_t)SHL32(correlationMaxWord16,15),(word32_t)ADD32(SHL32(delayedResidualSignalEnergyWord16,1), correlationMaxWord16)); /* g1 in Q15 */
			g0 = SUB16(32767, g1); /* g0 = 1 - g1 in Q15 */
		}
		
		/* longTermFilteredResidualSignal[i] = g0*residualSignal[i] + g1*delayedResidualSignal[i]*/
		delayedResidualSignal = &(residualSignal[-bestIntPitchDelay]);
		for (i=0; i<L_SUBFRAME; i++) {
			decoderChannelContext->longTermFilteredResidualSignal[i] = (word16_t)SATURATE(PSHR(ADD32(MULT16_16(g0, residualSignal[i]), MULT16_16(g1, delayedResidualSignal[i])), 15), MAXINT16);
		}
	}
	
	/********************************************************************/
	/* Tilt Compensation Filter                                         */
	/********************************************************************/

	/* compute hf the truncated (to 22 coefficients) impulse response of the filter A(z/γn)/A(z/γd) described in spec 4.2.2 eq84 */
	/* hf(i) = LPGammaNCoeff[i] - ∑[j:0..9]LPGammaDCoeff[j]*hf[i-j-1]) */
	word16_t LPGammaDCoefficients[NB_LSP_COEFF]; /* in Q12 */
	/* GAMMA_XX constants are in Q15 */
	LPGammaDCoefficients[0] = MULT16_16_P15(LPCoefficients[0], GAMMA_D1);
	LPGammaDCoefficients[1] = MULT16_16_P15(LPCoefficients[1], GAMMA_D2);
	LPGammaDCoefficients[2] = MULT16_16_P15(LPCoefficients[2], GAMMA_D3);
	LPGammaDCoefficients[3] = MULT16_16_P15(LPCoefficients[3], GAMMA_D4);
	LPGammaDCoefficients[4] = MULT16_16_P15(LPCoefficients[4], GAMMA_D5);
	LPGammaDCoefficients[5] = MULT16_16_P15(LPCoefficients[5], GAMMA_D6);
	LPGammaDCoefficients[6] = MULT16_16_P15(LPCoefficients[6], GAMMA_D7);
	LPGammaDCoefficients[7] = MULT16_16_P15(LPCoefficients[7], GAMMA_D8);
	LPGammaDCoefficients[8] = MULT16_16_P15(LPCoefficients[8], GAMMA_D9);
	LPGammaDCoefficients[9] = MULT16_16_P15(LPCoefficients[9], GAMMA_D10);

	word16_t hf[22]; /* the truncated impulse response to short term filter Hf in Q12 */
	hf[0] = 4096; /* 1 in Q12 as LPGammaNCoefficients and LPGammaDCoefficient doesn't contain the first element which is 1 and past values of hf are 0 */
	for (i=1; i<11; i++) {
		word32_t acc = (word32_t)SHL(LPGammaNCoefficients[i-1],12); /* LPGammaNCoefficients in Q12 -> acc in Q24 */
		for (j=0; j<NB_LSP_COEFF && j<i; j++) { /* j<i to avoid access to negative index of hf(past values are 0 anyway) */
			acc = MSU16_16(acc, LPGammaDCoefficients[j], hf[i-j-1]); /* LPGammaDCoefficient in Q12, hf in Q12 -> Q24 TODO: Possible overflow?? */
		}
		hf[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* get result back in Q12 and saturate on 16 bits */
	}
	for (i=11; i<22; i++) {
		word32_t acc = 0;
		for (j=0; j<NB_LSP_COEFF; j++) { /* j<i to avoid access to negative index of hf(past values are 0 anyway) */
			acc = MSU16_16(acc, LPGammaDCoefficients[j], hf[i-j-1]); /* LPGammaDCoefficient in Q12, hf in Q12 -> Q24 TODO: Possible overflow?? */
		}
		hf[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* get result back in Q12 and saturate on 16 bits */
	}

	/* hf is then used to compute k'1 spec 4.2.3 eq87: k'1 = -rh1/rh0 */
	/* rh0 = ∑[i:0..21]hf[i]*hf[i] */
	/* rh1 = ∑[i:0..20]hf[i]*hf[i+1] */
	word32_t rh1 = MULT16_16(hf[0], hf[1]);
	for (i=1; i<21; i++) {
		rh1 = MAC16_16(rh1, hf[i], hf[i+1]); /* rh1 in Q24 */
	}

	/* tiltCompensationGain is set to 0 if k'1>0 -> rh1<0 (as rh0 is always>0) */
	word16_t tiltCompensatedSignal[L_SUBFRAME]; /* in Q0 */
	if (rh1<0) { /* tiltCompensationGain = 0 -> no gain filter is off, just copy the input */
		memcpy(tiltCompensatedSignal, decoderChannelContext->longTermFilteredResidualSignal, L_SUBFRAME*sizeof(word16_t));
	} else { /*compute tiltCompensationGain = k'1*γt */
		word32_t rh0 = MULT16_16(hf[0], hf[0]);
		for (i=1; i<22; i++) {
			rh0 = MAC16_16(rh0, hf[i], hf[i]); /* rh0 in Q24 */
		}
		rh1 = MULT16_32_Q15(GAMMA_T, rh1); /* GAMMA_T in Q15, rh1 in Q24*/
		word16_t tiltCompensationGain = (word16_t)SATURATE((word32_t)(DIV32(rh1,PSHR(rh0,12))), MAXINT16); /* rh1 in Q24, PSHR(rh0,12) in Q12 -> tiltCompensationGain in Q12 */
		
		/* compute filter Ht (spec A.4.2.3 eqA14) = 1 + gain*z(-1) */
		for (i=0; i<L_SUBFRAME; i++) {
			tiltCompensatedSignal[i] = MSU16_16_Q12(decoderChannelContext->longTermFilteredResidualSignal[i], tiltCompensationGain, decoderChannelContext->longTermFilteredResidualSignal[i-1]);
		}
	}
	/* update memory word of longTermFilteredResidualSignal for next subframe */
	decoderChannelContext->longTermFilteredResidualSignal[-1] = decoderChannelContext->longTermFilteredResidualSignal[L_SUBFRAME-1];

	/********************************************************************/
	/* synthesis filter 1/[Â(z /γd)] spec A.4.2.2                       */
	/*                                                                  */
	/*   Note: Â(z/γn) was done before when computing residual signal   */
	/********************************************************************/
	/* shortTermFilteredResidualSignal is accessed in range [-NB_LSP_COEFF,L_SUBFRAME[ */
	synthesisFilter(tiltCompensatedSignal, LPGammaDCoefficients, decoderChannelContext->shortTermFilteredResidualSignal);
	/* get the last NB_LSP_COEFF of shortTermFilteredResidualSignal and set them as memory for next subframe(they do not overlap so use memcpy) */
	memcpy(decoderChannelContext->shortTermFilteredResidualSignalBuffer, &(decoderChannelContext->shortTermFilteredResidualSignalBuffer[L_SUBFRAME]), NB_LSP_COEFF*sizeof(word16_t));

	/********************************************************************/
	/* Adaptive Gain Control spec A.4.2.4                               */
	/*                                                                  */
	/********************************************************************/

	/*** compute G(gain scaling factor) according to eqA15 : G = Sqrt((∑s(n)^2)/∑sf(n)^2 ) ***/
	word16_t gainScalingFactor; /* in Q12 */
	/* compute ∑sf(n)^2 scale the signal shifting left by 2 to avoid overflow on 32 bits sum */
	word32_t shortTermFilteredResidualSignalSquareSum = 0;
	for (i=0; i<L_SUBFRAME; i++) {
		shortTermFilteredResidualSignalSquareSum = MAC16_16_Q4(shortTermFilteredResidualSignalSquareSum, decoderChannelContext->shortTermFilteredResidualSignal[i], decoderChannelContext->shortTermFilteredResidualSignal[i]);
	}
	
	/* if the sum is null we can't compute gain -> output of postfiltering is the output of shortTermFilter and previousAdaptativeGain is set to 0 */
	/* the reset of previousAdaptativeGain is not mentionned in the spec but in ITU code only */
	if (shortTermFilteredResidualSignalSquareSum == 0) {
		decoderChannelContext->previousAdaptativeGain = 0;
		for (i=0; i<L_SUBFRAME; i++) {
			postFilteredSignal[i] = decoderChannelContext->shortTermFilteredResidualSignal[i];
		}
	} else { /* we can compute adaptativeGain and output signal */

		/* compute ∑s(n)^2 scale the signal shifting left by 2 to avoid overflow on 32 bits sum */
		word32_t reconstructedSpeechSquareSum = 0;
		for (i=0; i<L_SUBFRAME; i++) {
			reconstructedSpeechSquareSum = MAC16_16_Q4(reconstructedSpeechSquareSum, reconstructedSpeech[i], reconstructedSpeech[i]);
		}
		
		if (reconstructedSpeechSquareSum==0) { /* numerator is null -> current gain is null */
			gainScalingFactor = 0;	
		} else {
			/* Compute ∑s(n)^2)/∑sf(n)^2  result shall be in Q10 */
			/* normalise the numerator on 32 bits */
			word16_t numeratorShift = countLeadingZeros(reconstructedSpeechSquareSum);
			reconstructedSpeechSquareSum = SHL(reconstructedSpeechSquareSum, numeratorShift); /* reconstructedSpeechSquareSum*2^numeratorShift */

			/* normalise denominator to get the result directly in Q10 if possible */
			word32_t fractionResult; /* stores  ∑s(n)^2)/∑sf(n)^2 */
			word32_t scaledShortTermFilteredResidualSignalSquareSum = VSHR32(shortTermFilteredResidualSignalSquareSum, 10-numeratorShift); /* shortTermFilteredResidualSignalSquareSum*2^(numeratorShift-10)*/
			
			if (scaledShortTermFilteredResidualSignalSquareSum==0) {/* shift might have sent to zero the denominator */
				fractionResult = DIV32(reconstructedSpeechSquareSum, shortTermFilteredResidualSignalSquareSum); /* result in QnumeratorShift */
				fractionResult = VSHR32(fractionResult, numeratorShift-10); /* result in Q10 */
			} else { /* ok denominator is still > 0 */
				fractionResult = DIV32(reconstructedSpeechSquareSum, scaledShortTermFilteredResidualSignalSquareSum); /* result in Q10 */
			}
			/* now compute current Gain =  Sqrt((∑s(n)^2)/∑sf(n)^2 ) */
			/* g729Sqrt_Q0Q7(Q0)->Q7, by giving a Q10 as input, output is in Q12 */
			gainScalingFactor = (word16_t)SATURATE(g729Sqrt_Q0Q7(fractionResult), MAXINT16);

			/* multiply by 0.1 as described in spec A.4.2.4 */
			gainScalingFactor = MULT16_16_P15(gainScalingFactor, 3277); /* in Q12, 3277 = 0.1 in Q15*/
		}
		/* Compute the signal according to eq89 (spec 4.2.4 and section A4.2.4) */
		/* currentGain = 0.9*previousGain + 0.1*gainScalingFactor the 0.1 factor has already been integrated in the variable gainScalingFactor */
		/* outputsignal = currentGain*shortTermFilteredResidualSignal */
		word16_t currentAdaptativeGain = decoderChannelContext->previousAdaptativeGain;
		for (i=0; i<L_SUBFRAME; i++) {
			currentAdaptativeGain = ADD16(gainScalingFactor, MULT16_16_P15(currentAdaptativeGain, 29491)); /* 29492 = 0.9 in Q15, result in Q12 */
			postFilteredSignal[i] = MULT16_16_Q12(currentAdaptativeGain, decoderChannelContext->shortTermFilteredResidualSignal[i]);
		}
		decoderChannelContext->previousAdaptativeGain = currentAdaptativeGain;
	}

	/* shift buffers if needed */
	if (subframeIndex>0) { /* only after 2nd subframe treatment */
		/* shift left by L_FRAME the residualSignal and scaledResidualSignal buffers */
		memmove(decoderChannelContext->residualSignalBuffer, &(decoderChannelContext->residualSignalBuffer[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t));
		memmove(decoderChannelContext->scaledResidualSignalBuffer, &(decoderChannelContext->scaledResidualSignalBuffer[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t));
	}
	return;
}
Пример #18
0
void comb_filter(
spx_sig_t *exc,          /*decoded excitation*/
spx_sig_t *new_exc,      /*enhanced excitation*/
spx_coef_t *ak,           /*LPC filter coefs*/
int p,               /*LPC order*/
int nsf,             /*sub-frame size*/
int pitch,           /*pitch period*/
spx_word16_t *pitch_gain,   /*pitch gain (3-tap)*/
spx_word16_t  comb_gain,    /*gain of comb filter*/
CombFilterMem *mem
)
{
   int i;
   spx_word16_t exc_energy=0, new_exc_energy=0;
   spx_word16_t gain;
   spx_word16_t step;
   spx_word16_t fact;

   /*Compute excitation amplitude prior to enhancement*/
   exc_energy = compute_rms(exc, nsf);
   /*for (i=0;i<nsf;i++)
     exc_energy+=((float)exc[i])*exc[i];*/

   /*Some gain adjustment if pitch is too high or if unvoiced*/
#ifdef FIXED_POINT
   {
      spx_word16_t g = gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain);
      if (g > 166)
         comb_gain = MULT16_16_Q15(DIV32_16(SHL(165,15),g), comb_gain);
      if (g < 64)
         comb_gain = MULT16_16_Q15(SHL(g, 9), comb_gain);
   }
#else
   {
      float g=0;
      g = GAIN_SCALING_1*.5*(gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain));
      if (g>1.3)
         comb_gain*=1.3/g;
      if (g<.5)
         comb_gain*=2.*g;
   }
#endif
   step = DIV32(COMB_STEP, nsf);
   fact=0;

   /*Apply pitch comb-filter (filter out noise between pitch harmonics)*/
   for (i=0;i<nsf;i++)
   {
      spx_word32_t exc1, exc2;

      fact += step;
      
      exc1 = SHL(MULT16_32_Q15(SHL(pitch_gain[0],7),exc[i-pitch+1]) +
                 MULT16_32_Q15(SHL(pitch_gain[1],7),exc[i-pitch]) +
                 MULT16_32_Q15(SHL(pitch_gain[2],7),exc[i-pitch-1]) , 2);
      exc2 = SHL(MULT16_32_Q15(SHL(mem->last_pitch_gain[0],7),exc[i-mem->last_pitch+1]) +
                 MULT16_32_Q15(SHL(mem->last_pitch_gain[1],7),exc[i-mem->last_pitch]) +
                 MULT16_32_Q15(SHL(mem->last_pitch_gain[2],7),exc[i-mem->last_pitch-1]),2);

      new_exc[i] = exc[i] + MULT16_32_Q15(comb_gain,MULT16_32_Q15(fact,exc1)  + MULT16_32_Q15(SUB16(COMB_STEP,fact), exc2));
   }

   mem->last_pitch_gain[0] = pitch_gain[0];
   mem->last_pitch_gain[1] = pitch_gain[1];
   mem->last_pitch_gain[2] = pitch_gain[2];
   mem->last_pitch = pitch;

   /*Amplitude after enhancement*/
   new_exc_energy = compute_rms(new_exc, nsf);

   if (exc_energy > new_exc_energy)
      exc_energy = new_exc_energy;
   
   gain = DIV32_16(SHL(exc_energy,15),1+new_exc_energy);

#ifdef FIXED_POINT
   if (gain < 16384)
      gain = 16384;
#else
   if (gain < .5)
      gain=.5;
#endif

#ifdef FIXED_POINT
   for (i=0;i<nsf;i++)
   {
      mem->smooth_gain = MULT16_16_Q15(31457,mem->smooth_gain) + MULT16_16_Q15(1311,gain);
      new_exc[i] = MULT16_32_Q15(mem->smooth_gain, new_exc[i]);
   }
#else
   for (i=0;i<nsf;i++)
   {
      mem->smooth_gain = .96*mem->smooth_gain + .04*gain;
      new_exc[i] *= mem->smooth_gain;
   }
#endif
}
Пример #19
0
Файл: ltp.c Проект: Affix/fgcom
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
static spx_word64_t pitch_gain_search_3tap(
    const spx_sig_t target[],       /* Target vector */
    const spx_coef_t ak[],          /* LPCs for this subframe */
    const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
    const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
    spx_sig_t exc[],                /* Excitation */
    const void *par,
    int   pitch,                    /* Pitch value */
    int   p,                        /* Number of LPC coeffs */
    int   nsf,                      /* Number of samples in subframe */
    SpeexBits *bits,
    char *stack,
    const spx_sig_t *exc2,
    const spx_word16_t *r,
    spx_sig_t *new_target,
    int  *cdbk_index,
    int cdbk_offset,
    int plc_tuning
)
{
    int i,j;
    VARDECL(spx_sig_t *tmp1);
    VARDECL(spx_sig_t *tmp2);
    spx_sig_t *x[3];
    spx_sig_t *e[3];
    spx_word32_t corr[3];
    spx_word32_t A[3][3];
    int   gain_cdbk_size;
    const signed char *gain_cdbk;
    spx_word16_t gain[3];
    spx_word64_t err;

    const ltp_params *params;
    params = (const ltp_params*) par;
    gain_cdbk_size = 1<<params->gain_bits;
    gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset;
    ALLOC(tmp1, 3*nsf, spx_sig_t);
    ALLOC(tmp2, 3*nsf, spx_sig_t);

    x[0]=tmp1;
    x[1]=tmp1+nsf;
    x[2]=tmp1+2*nsf;

    e[0]=tmp2;
    e[1]=tmp2+nsf;
    e[2]=tmp2+2*nsf;
    for (i=2; i>=0; i--)
    {
        int pp=pitch+1-i;
        for (j=0; j<nsf; j++)
        {
            if (j-pp<0)
                e[i][j]=exc2[j-pp];
            else if (j-pp-pitch<0)
                e[i][j]=exc2[j-pp-pitch];
            else
                e[i][j]=0;
        }

        if (i==2)
            syn_percep_zero(e[i], ak, awk1, awk2, x[i], nsf, p, stack);
        else {
            for (j=0; j<nsf-1; j++)
                x[i][j+1]=x[i+1][j];
            x[i][0]=0;
            for (j=0; j<nsf; j++)
            {
                x[i][j]=ADD32(x[i][j],SHL32(MULT16_32_Q15(r[j], e[i][0]),1));
            }
        }
    }

#ifdef FIXED_POINT
    {
        /* If using fixed-point, we need to normalize the signals first */
        spx_word16_t *y[3];
        VARDECL(spx_word16_t *ytmp);
        VARDECL(spx_word16_t *t);

        spx_sig_t max_val=1;
        int sig_shift;

        ALLOC(ytmp, 3*nsf, spx_word16_t);
#if 0
        ALLOC(y[0], nsf, spx_word16_t);
        ALLOC(y[1], nsf, spx_word16_t);
        ALLOC(y[2], nsf, spx_word16_t);
#else
        y[0] = ytmp;
        y[1] = ytmp+nsf;
        y[2] = ytmp+2*nsf;
#endif
        ALLOC(t, nsf, spx_word16_t);
        for (j=0; j<3; j++)
        {
            for (i=0; i<nsf; i++)
            {
                spx_sig_t tmp = x[j][i];
                if (tmp<0)
                    tmp = -tmp;
                if (tmp > max_val)
                    max_val = tmp;
            }
        }
        for (i=0; i<nsf; i++)
        {
            spx_sig_t tmp = target[i];
            if (tmp<0)
                tmp = -tmp;
            if (tmp > max_val)
                max_val = tmp;
        }

        sig_shift=0;
        while (max_val>16384)
        {
            sig_shift++;
            max_val >>= 1;
        }

        for (j=0; j<3; j++)
        {
            for (i=0; i<nsf; i++)
            {
                y[j][i] = EXTRACT16(SHR32(x[j][i],sig_shift));
            }
        }
        for (i=0; i<nsf; i++)
        {
            t[i] = EXTRACT16(SHR32(target[i],sig_shift));
        }

        for (i=0; i<3; i++)
            corr[i]=inner_prod(y[i],t,nsf);

        for (i=0; i<3; i++)
            for (j=0; j<=i; j++)
                A[i][j]=A[j][i]=inner_prod(y[i],y[j],nsf);
    }
#else
    {
        for (i=0; i<3; i++)
            corr[i]=inner_prod(x[i],target,nsf);

        for (i=0; i<3; i++)
            for (j=0; j<=i; j++)
                A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
    }
#endif

    {
        spx_word32_t C[9];
        const signed char *ptr=gain_cdbk;
        int best_cdbk=0;
        spx_word32_t best_sum=0;
        C[0]=corr[2];
        C[1]=corr[1];
        C[2]=corr[0];
        C[3]=A[1][2];
        C[4]=A[0][1];
        C[5]=A[0][2];
        C[6]=A[2][2];
        C[7]=A[1][1];
        C[8]=A[0][0];

        /*plc_tuning *= 2;*/
        if (plc_tuning<2)
            plc_tuning=2;
#ifdef FIXED_POINT
        C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]);
        C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]);
        C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]);
#else
        C[0]*=1-.01*plc_tuning;
        C[1]*=1-.01*plc_tuning;
        C[2]*=1-.01*plc_tuning;
        C[6]*=.5*(1+.01*plc_tuning);
        C[7]*=.5*(1+.01*plc_tuning);
        C[8]*=.5*(1+.01*plc_tuning);
#endif
        for (i=0; i<gain_cdbk_size; i++)
        {
            spx_word32_t sum=0;
            spx_word16_t g0,g1,g2;
            spx_word16_t pitch_control=64;
            spx_word16_t gain_sum;

            ptr = gain_cdbk+3*i;
            g0=ADD16((spx_word16_t)ptr[0],32);
            g1=ADD16((spx_word16_t)ptr[1],32);
            g2=ADD16((spx_word16_t)ptr[2],32);

            gain_sum = g1;
            if (g0>0)
                gain_sum += g0;
            if (g2>0)
                gain_sum += g2;
            if (gain_sum > 64)
            {
                gain_sum = SUB16(gain_sum, 64);
                if (gain_sum > 127)
                    gain_sum = 127;
#ifdef FIXED_POINT
                pitch_control =  SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10)));
#else
                pitch_control = 64*(1.-.001*plc_tuning*gain_sum);
#endif
                if (pitch_control < 0)
                    pitch_control = 0;
            }

            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g0,pitch_control),C[0]));
            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g1,pitch_control),C[1]));
            sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g2,pitch_control),C[2]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g0,g1),C[3]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g1),C[4]));
            sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g0),C[5]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g0,g0),C[6]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g1,g1),C[7]));
            sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g2,g2),C[8]));
            /* We could force "safe" pitch values to handle packet loss better */

            if (sum>best_sum || i==0)
            {
                best_sum=sum;
                best_cdbk=i;
            }
        }
#ifdef FIXED_POINT
        gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]);
        gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]);
        gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]);
        /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
#else
        gain[0] = 0.015625*gain_cdbk[best_cdbk*3]  + .5;
        gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5;
        gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5;
#endif
        *cdbk_index=best_cdbk;
    }

#ifdef FIXED_POINT
    for (i=0; i<nsf; i++)
        exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])),
                           MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2);

    err=0;
    for (i=0; i<nsf; i++)
    {
        spx_word16_t perr2;
        spx_sig_t tmp = SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),x[2][i]),MULT16_32_Q15(SHL16(gain[1],7),x[1][i])),
                                    MULT16_32_Q15(SHL16(gain[2],7),x[0][i])),2);
        spx_sig_t perr=SUB32(target[i],tmp);
        new_target[i] = SUB32(target[i], tmp);
        perr2 = EXTRACT16(PSHR32(perr,15));
        err = ADD64(err,MULT16_16(perr2,perr2));

    }
#else
    for (i=0; i<nsf; i++)
        exc[i]=gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i];

    err=0;
    for (i=0; i<nsf; i++)
    {
        spx_sig_t tmp = gain[2]*x[0][i]+gain[1]*x[1][i]+gain[0]*x[2][i];
        new_target[i] = target[i] - tmp;
        err+=new_target[i]*new_target[i];
    }
#endif

    return err;
}
Пример #20
0
int LP2LSPConversion(word16_t LPCoefficients[], word16_t LSPCoefficients[])
{
    uint8_t i;

    /*** Compute the polynomials coefficients according to spec 3.2.3 eq15 ***/
    word32_t f1[6];
    word32_t f2[6]; /* coefficients for polynomials F1 anf F2 in Q12 for computation, then converted in Q15 for the Chebyshev Polynomial function */
    f1[0] = ONE_IN_Q12; /* values 0 are not part of the output, they are just used for computation purpose */
    f2[0] = ONE_IN_Q12;
    /* for (i = 0; i< 5; i++) {		*/
    /*   f1[i+1] = a[i+1] + a[10-i] - f1[i];	*/
    /*   f2[i+1] = a[i+1] - a[10-i] + f2[i];	*/
    /* }					*/
    for (i=0; i<5; i++) {
        f1[i+1] = ADD32(LPCoefficients[i], SUB32(LPCoefficients[9-i], f1[i])); /* note: index on LPCoefficients are -1 respect to spec because the unused value 0 is not stored */
        f2[i+1] = ADD32(f2[i], SUB32(LPCoefficients[i], LPCoefficients[9-i])); /* note: index on LPCoefficients are -1 respect to spec because the unused value 0 is not stored */
    }
    /* convert the coefficients from Q12 to Q15 to be used by the Chebyshev Polynomial function (f1/2[0] aren't used so they are not converted) */
    for (i=1; i<6; i++) {
        f1[i] = SHL(f1[i], 3);
        f2[i] = SHL(f2[i], 3);
    }

    /*** Compute at each step(50 steps for the AnnexA version) the Chebyshev polynomial to find the 10 roots ***/
    /* start using f1 polynomials coefficients and altern with f2 after founding each root (spec 3.2.3 eq13 and eq14) */
    uint8_t numberOfRootFound = 0; /* used to check the final number of roots found and exit the loop on each polynomial computation when we have 10 roots */
    word32_t *polynomialCoefficients = f1; /* start with f1 coefficients */
    word32_t previousCx = ChebyshevPolynomial(cosW0pi[0], polynomialCoefficients); /* compute the first point and store it as the previous value for polynomial */
    word32_t Cx; /* value of Chebyshev Polynomial at current point in Q15 */

    for (i=1; i<NB_COMPUTED_VALUES_CHEBYSHEV_POLYNOMIAL; i++) {
        Cx =  ChebyshevPolynomial(cosW0pi[i], polynomialCoefficients);
        if ((previousCx^Cx)&0x10000000) { /* check signe change by XOR on the value of first bit */
            /* divide 2 times the interval to find a more accurate root */
            uint8_t j;
            word16_t xLow = cosW0pi[i-1];
            word16_t xHigh = cosW0pi[i];
            word16_t xMean;
            for (j=0; j<2; j++) {
                xMean = (word16_t)SHR(ADD32(xLow, xHigh), 1);
                word32_t middleCx = ChebyshevPolynomial(xMean, polynomialCoefficients); /* compute the polynome for the value in the middle of current interval */

                if ((previousCx^middleCx)&0x10000000) { /* check signe change by XOR on the value of first bit */
                    xHigh = xMean;
                    Cx = middleCx; /* used for linear interpolation on root */
                } else {
                    xLow = xMean;
                    previousCx = middleCx;
                }
            }

            /* toggle the polynomial coefficients in use between f1 and f2 */
            if (polynomialCoefficients==f1) {
                polynomialCoefficients = f2;
            } else {
                polynomialCoefficients = f1;
            }

            /* linear interpolation for better root accuracy */
            /* xMean = xLow - (xHigh-xLow)* previousCx/(Cx-previousCx); */
            xMean = (word16_t)SUB32(xLow, MULT16_32_Q15(SUB32(xHigh, xLow), DIV32(SHL(previousCx, 14), SHR(SUB32(Cx, previousCx), 1)))); /* Cx are in Q2.15 so we can shift them left 14 bits, the denominator is shifted righ by 1 so the division result is in Q15 */

            /* recompute previousCx with the new coefficients */
            previousCx = ChebyshevPolynomial(xMean, polynomialCoefficients);

            LSPCoefficients[numberOfRootFound] = xMean;

            numberOfRootFound++;
            if (numberOfRootFound == NB_LSP_COEFF) break; /* exit the for loop as soon as we habe all the LSP*/
        }

    }
    if (numberOfRootFound != NB_LSP_COEFF) return 0; /* we were not able to find the 10 roots */


    return 1;
}
Пример #21
0
Файл: ltp.c Проект: Affix/fgcom
void pitch_unquant_3tap(
    spx_sig_t exc[],                    /* Excitation */
    int   start,                    /* Smallest pitch value allowed */
    int   end,                      /* Largest pitch value allowed */
    spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
    const void *par,
    int   nsf,                      /* Number of samples in subframe */
    int *pitch_val,
    spx_word16_t *gain_val,
    SpeexBits *bits,
    char *stack,
    int count_lost,
    int subframe_offset,
    spx_word16_t last_pitch_gain,
    int cdbk_offset
)
{
    int i;
    int pitch;
    int gain_index;
    spx_word16_t gain[3];
    const signed char *gain_cdbk;
    int gain_cdbk_size;
    const ltp_params *params;

    params = (const ltp_params*) par;
    gain_cdbk_size = 1<<params->gain_bits;
    gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset;

    pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
    pitch += start;
    gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
    /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
#ifdef FIXED_POINT
    gain[0] = 32+(spx_word16_t)gain_cdbk[gain_index*3];
    gain[1] = 32+(spx_word16_t)gain_cdbk[gain_index*3+1];
    gain[2] = 32+(spx_word16_t)gain_cdbk[gain_index*3+2];
#else
    gain[0] = 0.015625*gain_cdbk[gain_index*3]+.5;
    gain[1] = 0.015625*gain_cdbk[gain_index*3+1]+.5;
    gain[2] = 0.015625*gain_cdbk[gain_index*3+2]+.5;
#endif

    if (count_lost && pitch > subframe_offset)
    {
        float gain_sum;
        if (1) {
            float tmp = count_lost < 4 ? GAIN_SCALING_1*last_pitch_gain : 0.4 * GAIN_SCALING_1 * last_pitch_gain;
            if (tmp>.95)
                tmp=.95;
            gain_sum = GAIN_SCALING_1*gain_3tap_to_1tap(gain);

            if (gain_sum > tmp) {
                float fact = tmp/gain_sum;
                for (i=0; i<3; i++)
                    gain[i]*=fact;

            }

        }

    }

    *pitch_val = pitch;
    gain_val[0]=gain[0];
    gain_val[1]=gain[1];
    gain_val[2]=gain[2];

    {
        spx_sig_t *e[3];
        VARDECL(spx_sig_t *tmp2);
        ALLOC(tmp2, 3*nsf, spx_sig_t);
        e[0]=tmp2;
        e[1]=tmp2+nsf;
        e[2]=tmp2+2*nsf;

        for (i=0; i<3; i++)
        {
            int j;
            int pp=pitch+1-i;
#if 0
            for (j=0; j<nsf; j++)
            {
                if (j-pp<0)
                    e[i][j]=exc[j-pp];
                else if (j-pp-pitch<0)
                    e[i][j]=exc[j-pp-pitch];
                else
                    e[i][j]=0;
            }
#else
            {
                int tmp1, tmp3;
                tmp1=nsf;
                if (tmp1>pp)
                    tmp1=pp;
                for (j=0; j<tmp1; j++)
                    e[i][j]=exc[j-pp];
                tmp3=nsf;
                if (tmp3>pp+pitch)
                    tmp3=pp+pitch;
                for (j=tmp1; j<tmp3; j++)
                    e[i][j]=exc[j-pp-pitch];
                for (j=tmp3; j<nsf; j++)
                    e[i][j]=0;
            }
#endif
        }

#ifdef FIXED_POINT
        {
            for (i=0; i<nsf; i++)
                exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])),
                                   MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2);
        }
#else
        for (i=0; i<nsf; i++)
            exc[i]=VERY_SMALL+gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i];
#endif
    }
}
Пример #22
0
static
#endif
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
      celt_sig *mem, int accum)
{
   int c;
   int Nd;
   int apply_downsampling=0;
   opus_val16 coef0;
   VARDECL(celt_sig, scratch);
   SAVE_STACK;
#ifndef FIXED_POINT
   (void)accum;
   celt_assert(accum==0);
#endif
   ALLOC(scratch, N, celt_sig);
   coef0 = coef[0];
   Nd = N/downsample;
   c=0; do {
      int j;
      celt_sig * OPUS_RESTRICT x;
      opus_val16  * OPUS_RESTRICT y;
      celt_sig m = mem[c];
      x =in[c];
      y = pcm+c;
#ifdef CUSTOM_MODES
      if (coef[1] != 0)
      {
         opus_val16 coef1 = coef[1];
         opus_val16 coef3 = coef[3];
         for (j=0;j<N;j++)
         {
            celt_sig tmp = x[j] + m + VERY_SMALL;
            m = MULT16_32_Q15(coef0, tmp)
                          - MULT16_32_Q15(coef1, x[j]);
            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
            scratch[j] = tmp;
         }
         apply_downsampling=1;
      } else
#endif
      if (downsample>1)
      {
         /* Shortcut for the standard (non-custom modes) case */
         for (j=0;j<N;j++)
         {
            celt_sig tmp = x[j] + m + VERY_SMALL;
            m = MULT16_32_Q15(coef0, tmp);
            scratch[j] = tmp;
         }
         apply_downsampling=1;
      } else {
         /* Shortcut for the standard (non-custom modes) case */
#ifdef FIXED_POINT
         if (accum)
         {
            for (j=0;j<N;j++)
            {
               celt_sig tmp = x[j] + m + VERY_SMALL;
               m = MULT16_32_Q15(coef0, tmp);
               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
            }
         } else
#endif
         {
            for (j=0;j<N;j++)
            {
               celt_sig tmp = x[j] + m + VERY_SMALL;
               m = MULT16_32_Q15(coef0, tmp);
               y[j*C] = SCALEOUT(SIG2WORD16(tmp));
            }
         }
      }
      mem[c] = m;

      if (apply_downsampling)
      {
         /* Perform down-sampling */
#ifdef FIXED_POINT
         if (accum)
         {
            for (j=0;j<Nd;j++)
               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
         } else
#endif
         {
            for (j=0;j<Nd;j++)
               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
         }
      }
   } while (++c<C);
   RESTORE_STACK;
}
Пример #23
0
/** Performs echo cancellation on a frame */
EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
{
   int i,j, chan, speak;
   int N,M, C, K;
   spx_word32_t Syy,See,Sxx,Sdd, Sff;
#ifdef TWO_PATH
   spx_word32_t Dbf;
   int update_foreground;
#endif
   spx_word32_t Sey;
   spx_word16_t ss, ss_1;
   spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
   spx_float_t alpha, alpha_1;
   spx_word16_t RER;
   spx_word32_t tmp32;
   
   N = st->window_size;
   M = st->M;
   C = st->C;
   K = st->K;

   st->cancel_count++;
#ifdef FIXED_POINT
   ss=DIV32_16(11469,M);
   ss_1 = SUB16(32767,ss);
#else
   ss=.35/M;
   ss_1 = 1-ss;
#endif

   for (chan = 0; chan < C; chan++)
   {
      /* Apply a notch filter to make sure DC doesn't end up causing problems */
      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
      /* Copy input data to buffer and apply pre-emphasis */
      /* Copy input data to buffer */
      for (i=0;i<st->frame_size;i++)
      {
         spx_word32_t tmp32;
         /* FIXME: This core has changed a bit, need to merge properly */
         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
#ifdef FIXED_POINT
         if (tmp32 > 32767)
         {
            tmp32 = 32767;
            if (st->saturated == 0)
               st->saturated = 1;
         }      
         if (tmp32 < -32767)
         {
            tmp32 = -32767;
            if (st->saturated == 0)
               st->saturated = 1;
         }
#endif
         st->memD[chan] = st->input[chan*st->frame_size+i];
         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
      }
   }

   for (speak = 0; speak < K; speak++)
   {
      for (i=0;i<st->frame_size;i++)
      {
         spx_word32_t tmp32;
         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
#ifdef FIXED_POINT
         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
         if (tmp32 > 32767)
         {
            tmp32 = 32767;
            st->saturated = M+1;
         }      
         if (tmp32 < -32767)
         {
            tmp32 = -32767;
            st->saturated = M+1;
         }      
#endif
         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
         st->memX[speak] = far_end[i*K+speak];
      }
   }   
   
   for (speak = 0; speak < K; speak++)
   {
      /* Shift memory: this could be optimized eventually*/
      for (j=M-1;j>=0;j--)
      {
         for (i=0;i<N;i++)
            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
      }
      /* Convert x (echo input) to frequency domain */
      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
   }
   
   Sxx = 0;
   for (speak = 0; speak < K; speak++)
   {
      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
      power_spectrum_accum(st->X+speak*N, st->Xf, N);
   }
   
   Sff = 0;  
   for (chan = 0; chan < C; chan++)
   {
#ifdef TWO_PATH
      /* Compute foreground filter */
      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
      for (i=0;i<st->frame_size;i++)
         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
#endif
   }
   
   /* Adjust proportional adaption rate */
   /* FIXME: Adjust that for C, K*/
   if (st->adapted)
      mdf_adjust_prop (st->W, N, M, C*K, st->prop);
   /* Compute weight gradient */
   if (st->saturated == 0)
   {
      for (chan = 0; chan < C; chan++)
      {
         for (speak = 0; speak < K; speak++)
         {
            for (j=M-1;j>=0;j--)
            {
               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
               for (i=0;i<N;i++)
                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
            }
         }
      }
   } else {
      st->saturated--;
   }
   
   /* FIXME: MC conversion required */ 
   /* Update weight to prevent circular convolution (MDF / AUMDF) */
   for (chan = 0; chan < C; chan++)
   {
      for (speak = 0; speak < K; speak++)
      {
         for (j=0;j<M;j++)
         {
            /* This is a variant of the Alternatively Updated MDF (AUMDF) */
            /* Remove the "if" to make this an MDF filter */
            if (j==0 || st->cancel_count%(M-1) == j-1)
            {
#ifdef FIXED_POINT
               for (i=0;i<N;i++)
                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
               spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
               for (i=0;i<st->frame_size;i++)
               {
                  st->wtmp[i]=0;
               }
               for (i=st->frame_size;i<N;i++)
               {
                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
               }
               spx_fft(st->fft_table, st->wtmp, st->wtmp2);
               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
               for (i=0;i<N;i++)
                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
#else
               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
               for (i=st->frame_size;i<N;i++)
               {
                  st->wtmp[i]=0;
               }
               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
#endif
            }
         }
      }
   }
   
   /* So we can use power_spectrum_accum */ 
   for (i=0;i<=st->frame_size;i++)
      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
      
   Dbf = 0;
   See = 0;    
#ifdef TWO_PATH
   /* Difference in response, this is used to estimate the variance of our residual power estimate */
   for (chan = 0; chan < C; chan++)
   {
      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
      for (i=0;i<st->frame_size;i++)
         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
      for (i=0;i<st->frame_size;i++)
         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
   }
#endif

#ifndef TWO_PATH
   Sff = See;
#endif

#ifdef TWO_PATH
   /* Logic for updating the foreground filter */
   
   /* For two time windows, compute the mean of the energy difference, as well as the variance */
   st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
   st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
   st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
   st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
   
   /* Equivalent float code:
   st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
   st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
   st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
   st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
   */
   
   update_foreground = 0;
   /* Check if we have a statistically significant reduction in the residual echo */
   /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
   if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
      update_foreground = 1;
   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
      update_foreground = 1;
   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
      update_foreground = 1;
   
   /* Do we update? */
   if (update_foreground)
   {
      st->Davg1 = st->Davg2 = 0;
      st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
      /* Copy background filter to foreground filter */
      for (i=0;i<N*M*C*K;i++)
         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
      /* Apply a smooth transition so as to not introduce blocking artifacts */
      for (chan = 0; chan < C; chan++)
         for (i=0;i<st->frame_size;i++)
            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
   } else {
      int reset_background=0;
      /* Otherwise, check if the background filter is significantly worse */
      if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
         reset_background = 1;
      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
         reset_background = 1;
      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
         reset_background = 1;
      if (reset_background)
      {
         /* Copy foreground filter to background filter */
         for (i=0;i<N*M*C*K;i++)
            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
         /* We also need to copy the output so as to get correct adaptation */
         for (chan = 0; chan < C; chan++)
         {        
            for (i=0;i<st->frame_size;i++)
               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
            for (i=0;i<st->frame_size;i++)
               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
         }        
         See = Sff;
         st->Davg1 = st->Davg2 = 0;
         st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
      }
   }
#endif

   Sey = Syy = Sdd = 0;  
   for (chan = 0; chan < C; chan++)
   {    
      /* Compute error signal (for the output with de-emphasis) */ 
      for (i=0;i<st->frame_size;i++)
      {
         spx_word32_t tmp_out;
#ifdef TWO_PATH
         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
#else
         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
#endif
         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
      /* This is an arbitrary test for saturation in the microphone signal */
         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
         {
         if (st->saturated == 0)
            st->saturated = 1;
         }
         out[i*C+chan] = WORD2INT(tmp_out);
         st->memE[chan] = tmp_out;
      }

#ifdef DUMP_ECHO_CANCEL_DATA
      dump_audio(in, far_end, out, st->frame_size);
#endif
   
      /* Compute error signal (filter update version) */ 
      for (i=0;i<st->frame_size;i++)
      {
         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
         st->e[chan*N+i] = 0;
      }
      
      /* Compute a bunch of correlations */
      /* FIXME: bad merge */
      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
      
      /* Convert error to frequency domain */
      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
      for (i=0;i<st->frame_size;i++)
         st->y[i+chan*N] = 0;
      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
   
      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
      power_spectrum_accum(st->E+chan*N, st->Rf, N);
      power_spectrum_accum(st->Y+chan*N, st->Yf, N);
    
   }
   
   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
   
   /* Do some sanity check */
   if (!(Syy>=0 && Sxx>=0 && See >= 0)
#ifndef FIXED_POINT
       || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
#endif
      )
   {
      /* Things have gone really bad */
      st->screwed_up += 50;
      for (i=0;i<st->frame_size*C;i++)
         out[i] = 0;
   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
   {
      /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
      st->screwed_up++;
   } else {
      /* Everything's fine */
      st->screwed_up=0;
   }
   if (st->screwed_up>=50)
   {
      speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
      speex_echo_state_reset(st);
      return;
   }

   /* Add a small noise floor to make sure not to have problems when dividing */
   See = MAX32(See, SHR32(MULT16_16(N, 100),6));
     
   for (speak = 0; speak < K; speak++)
   {
      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
      power_spectrum_accum(st->X+speak*N, st->Xf, N);
   }

   
   /* Smooth far end energy estimate over time */
   for (j=0;j<=st->frame_size;j++)
      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);

   /* Compute filtered spectra and (cross-)correlations */
   for (j=st->frame_size;j>=0;j--)
   {
      spx_float_t Eh, Yh;
      Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
      Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
      Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
      Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
#ifdef FIXED_POINT
      st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
      st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
#else
      st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
      st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
#endif
   }
   
   Pyy = FLOAT_SQRT(Pyy);
   Pey = FLOAT_DIVU(Pey,Pyy);

   /* Compute correlation updatete rate */
   tmp32 = MULT16_32_Q15(st->beta0,Syy);
   if (tmp32 > MULT16_32_Q15(st->beta_max,See))
      tmp32 = MULT16_32_Q15(st->beta_max,See);
   alpha = FLOAT_DIV32(tmp32, See);
   alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
   /* Update correlations (recursive average) */
   st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
   st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
   if (FLOAT_LT(st->Pyy, FLOAT_ONE))
      st->Pyy = FLOAT_ONE;
   /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
   if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
      st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
   if (FLOAT_GT(st->Pey, st->Pyy))
      st->Pey = st->Pyy;
   /* leak_estimate is the linear regression result */
   st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
   /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
   if (st->leak_estimate > 16383)
      st->leak_estimate = 32767;
   else
      st->leak_estimate = SHL16(st->leak_estimate,1);
   /*printf ("%f\n", st->leak_estimate);*/
   
   /* Compute Residual to Error Ratio */
#ifdef FIXED_POINT
   tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
   tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
   /* Check for y in e (lower bound on RER) */
   {
      spx_float_t bound = PSEUDOFLOAT(Sey);
      bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
      if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
         tmp32 = See;
      else if (tmp32 < FLOAT_EXTRACT32(bound))
         tmp32 = FLOAT_EXTRACT32(bound);
   }
   if (tmp32 > SHR32(See,1))
      tmp32 = SHR32(See,1);
   RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
#else
   RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
   /* Check for y in e (lower bound on RER) */
   if (RER < Sey*Sey/(1+See*Syy))
      RER = Sey*Sey/(1+See*Syy);
   if (RER > .5)
      RER = .5;
#endif

   /* We consider that the filter has had minimal adaptation if the following is true*/
   if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
   {
      st->adapted = 1;
   }

   if (st->adapted)
   {
      /* Normal learning rate calculation once we're past the minimal adaptation phase */
      for (i=0;i<=st->frame_size;i++)
      {
         spx_word32_t r, e;
         /* Compute frequency-domain adaptation mask */
         r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
         e = SHL32(st->Rf[i],3)+1;
#ifdef FIXED_POINT
         if (r>SHR32(e,1))
            r = SHR32(e,1);
#else
         if (r>.5*e)
            r = .5*e;
#endif
         r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
         /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
      }
   } else {
      /* Temporary adaption rate if filter is not yet adapted enough */
      spx_word16_t adapt_rate=0;

      if (Sxx > SHR32(MULT16_16(N, 1000),6)) 
      {
         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
#ifdef FIXED_POINT
         if (tmp32 > SHR32(See,2))
            tmp32 = SHR32(See,2);
#else
         if (tmp32 > .25*See)
            tmp32 = .25*See;
#endif
         adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
      }
      for (i=0;i<=st->frame_size;i++)
         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);


      /* How much have we adapted so far? */
      st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
   }

   /* FIXME: MC conversion required */ 
      for (i=0;i<st->frame_size;i++)
         st->last_y[i] = st->last_y[st->frame_size+i];
   if (st->adapted)
   {
      /* If the filter is adapted, take the filtered echo */
      for (i=0;i<st->frame_size;i++)
         st->last_y[st->frame_size+i] = in[i]-out[i];
   } else {
      /* If filter isn't adapted yet, all we can do is take the far end signal directly */
      /* moved earlier: for (i=0;i<N;i++)
      st->last_y[i] = st->x[i];*/
   }

}