Пример #1
0
/* Transform a masking curve (power spectrum) into a pole-zero filter */
void curve_to_lpc(VorbisPsy *psy, float *curve, float *awk1, float *awk2, int ord)
{
   int i;
   float ac[psy->n];
   float tmp;
   int len = psy->n >> 1;
   for (i=0;i<2*len;i++)
      ac[i] = 0;
   for (i=1;i<len;i++)
      ac[2*i-1] = curve[i];
   ac[0] = curve[0];
   ac[2*len-1] = curve[len-1];
   
   spx_drft_backward(&psy->lookup, ac);
   _spx_lpc(awk1, ac, ord);
   tmp = 1.;
   for (i=0;i<ord;i++)
   {
      tmp *= .99;
      awk1[i] *= tmp;
   }
#if 0
   for (i=0;i<ord;i++)
      awk2[i] = 0;
#else
   /* Use the second (awk2) filter to correct the first one */
   for (i=0;i<2*len;i++)
      ac[i] = 0;   
   for (i=0;i<ord;i++)
      ac[i+1] = awk1[i];
   ac[0] = 1;
   spx_drft_forward(&psy->lookup, ac);
   /* Compute (power) response of awk1 (all zero) */
   ac[0] *= ac[0];
   for (i=1;i<len;i++)
      ac[i] = ac[2*i-1]*ac[2*i-1] + ac[2*i]*ac[2*i];
   ac[len] = ac[2*len-1]*ac[2*len-1];
   /* Compute correction required */
   for (i=0;i<len;i++)
      curve[i] = 1. / (1e-6f+curve[i]*ac[i]);

   for (i=0;i<2*len;i++)
      ac[i] = 0;
   for (i=1;i<len;i++)
      ac[2*i-1] = curve[i];
   ac[0] = curve[0];
   ac[2*len-1] = curve[len-1];
   
   spx_drft_backward(&psy->lookup, ac);
   _spx_lpc(awk2, ac, ord);
   tmp = 1;
   for (i=0;i<ord;i++)
   {
      tmp *= .99;
      awk2[i] *= tmp;
   }
#endif
}
Пример #2
0
/** Performs echo cancellation on a frame */
void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, float *Yout)
{
    int i,j,m;
    int N,M;
    float scale;
    float ESR;
    float SER;
    float Sry=0,Srr=0,Syy=0,Sey=0,See=0,Sxx=0;
    float leak_estimate;

    leak_estimate = .1+(.9/(1+2*st->sum_adapt));

    N = st->window_size;
    M = st->M;
    scale = 1.0f/N;
    st->cancel_count++;

    /* Copy input data to buffer */
    for (i=0; i<st->frame_size; i++)
    {
        st->x[i] = st->x[i+st->frame_size];
        st->x[i+st->frame_size] = echo[i];

        st->d[i] = st->d[i+st->frame_size];
        st->d[i+st->frame_size] = ref[i];
    }

    /* Shift memory: this could be optimized eventually*/
    for (i=0; i<N*(M-1); i++)
        st->X[i]=st->X[i+N];

    /* Copy new echo frame */
    for (i=0; i<N; i++)
        st->X[(M-1)*N+i]=st->x[i];

    /* Convert x (echo input) to frequency domain */
    spx_drft_forward(st->fft_lookup, &st->X[(M-1)*N]);

    /* Compute filter response Y */
    for (i=0; i<N; i++)
        st->Y[i] = 0;
    for (j=0; j<M; j++)
        spectral_mul_accum(&st->X[j*N], &st->W[j*N], st->Y, N);

    /* Convert Y (filter response) to time domain */
    for (i=0; i<N; i++)
        st->y[i] = st->Y[i];
    spx_drft_backward(st->fft_lookup, st->y);
    for (i=0; i<N; i++)
        st->y[i] *= scale;

    /* Transform d (reference signal) to frequency domain */
    for (i=0; i<N; i++)
        st->D[i]=st->d[i];
    spx_drft_forward(st->fft_lookup, st->D);

    /* Compute error signal (signal with echo removed) */
    for (i=0; i<st->frame_size; i++)
    {
        float tmp_out;
        tmp_out = (float)ref[i] - st->y[i+st->frame_size];

        st->E[i] = 0;
        st->E[i+st->frame_size] = tmp_out;

        /* Saturation */
        if (tmp_out>32767)
            tmp_out = 32767;
        else if (tmp_out<-32768)
            tmp_out = -32768;
        out[i] = tmp_out;
    }

    /* This bit of code provides faster adaptation by doing a projection of the previous gradient on the
       "MMSE surface" */
    if (1)
    {
        float Sge, Sgg, Syy;
        float gain;
        Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
        for (i=0; i<N; i++)
            st->Y2[i] = 0;
        for (j=0; j<M; j++)
            spectral_mul_accum(&st->X[j*N], &st->PHI[j*N], st->Y2, N);
        for (i=0; i<N; i++)
            st->y2[i] = st->Y2[i];
        spx_drft_backward(st->fft_lookup, st->y2);
        for (i=0; i<N; i++)
            st->y2[i] *= scale;
        Sge = inner_prod(st->y2+st->frame_size, st->E+st->frame_size, st->frame_size);
        Sgg = inner_prod(st->y2+st->frame_size, st->y2+st->frame_size, st->frame_size);
        /* Compute projection gain */
        gain = Sge/(N+.03*Syy+Sgg);
        if (gain>2)
            gain = 2;
        if (gain < -2)
            gain = -2;

        /* Apply gain to weights, echo estimates, output */
        for (i=0; i<N; i++)
            st->Y[i] += gain*st->Y2[i];
        for (i=0; i<st->frame_size; i++)
        {
            st->y[i+st->frame_size] += gain*st->y2[i+st->frame_size];
            st->E[i+st->frame_size] -= gain*st->y2[i+st->frame_size];
        }
        for (i=0; i<M*N; i++)
            st->W[i] += gain*st->PHI[i];
    }

    /* Compute power spectrum of output (D-Y) and filter response (Y) */
    for (i=0; i<N; i++)
        st->D[i] -= st->Y[i];
    power_spectrum(st->D, st->Rf, N);
    power_spectrum(st->Y, st->Yf, N);

    /* Compute frequency-domain adaptation mask */
    for (j=0; j<=st->frame_size; j++)
    {
        float r;
        r = leak_estimate*st->Yf[j] / (1+st->Rf[j]);
        if (r>1)
            r = 1;
        st->fratio[j] = r;
    }

    /* Compute a bunch of correlations */
    Sry = inner_prod(st->y+st->frame_size, st->d+st->frame_size, st->frame_size);
    Sey = inner_prod(st->y+st->frame_size, st->E+st->frame_size, st->frame_size);
    See = inner_prod(st->E+st->frame_size, st->E+st->frame_size, st->frame_size);
    Syy = inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
    Srr = inner_prod(st->d+st->frame_size, st->d+st->frame_size, st->frame_size);
    Sxx = inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);

    /* Compute smoothed cross-correlation and energy */
    st->Sey = .98*st->Sey + .02*Sey;
    st->Syy = .98*st->Syy + .02*Syy;
    st->See = .98*st->See + .02*See;

    /* Check if filter is completely mis-adapted (if so, reset filter) */
    if (st->Sey/(1+st->Syy + .01*st->See) < -1)
    {
        /*fprintf (stderr, "reset at %d\n", st->cancel_count);*/
        speex_echo_state_reset(st);
        return;
    }

    SER = Srr / (1+Sxx);
    ESR = leak_estimate*Syy / (1+See);
    if (ESR>1)
        ESR = 1;
#if 1
    /* If over-cancellation (creating echo with 180 phase) damp filter */
    if (st->Sey/(1+st->Syy) < -.1 && (ESR > .3))
    {
        for (i=0; i<M*N; i++)
            st->W[i] *= .95;
        st->Sey *= .5;
        /*fprintf (stderr, "corrected down\n");*/
    }
#endif
#if 1
    /* If under-cancellation (leaving echo with 0 phase) scale filter up */
    if (st->Sey/(1+st->Syy) > .1 && (ESR > .1 || SER < 10))
    {
        for (i=0; i<M*N; i++)
            st->W[i] *= 1.05;
        st->Sey *= .5;
        /*fprintf (stderr, "corrected up %d\n", st->cancel_count);*/
    }
#endif

    /* We consider that the filter is adapted if the following is true*/
    if (ESR>.6 && st->sum_adapt > 1)
    {
        /*if (!st->adapted)
           fprintf(stderr, "Adapted at %d %f\n", st->cancel_count, st->sum_adapt);*/
        st->adapted = 1;
    }

    /* Update frequency-dependent energy ratio with the total energy ratio */
    for (i=0; i<=st->frame_size; i++)
    {
        st->fratio[i]  = (.2*ESR+.8*min(.005+ESR,st->fratio[i]));
    }

    if (st->adapted)
    {
        st->adapt_rate = .95f/(2+M);
    } else {
        /* Temporary adaption rate if filter is not adapted correctly */
        if (SER<.1)
            st->adapt_rate =.8/(2+M);
        else if (SER<1)
            st->adapt_rate =.4/(2+M);
        else if (SER<10)
            st->adapt_rate =.2/(2+M);
        else if (SER<30)
            st->adapt_rate =.08/(2+M);
        else
            st->adapt_rate = 0;
    }

    /* How much have we adapted so far? */
    st->sum_adapt += st->adapt_rate;

    /* Compute echo power in each frequency bin */
    {
        float ss = 1.0f/st->cancel_count;
        if (ss < .3/M)
            ss=.3/M;
        power_spectrum(&st->X[(M-1)*N], st->Xf, N);
        /* Smooth echo energy estimate over time */
        for (j=0; j<=st->frame_size; j++)
            st->power[j] = (1-ss)*st->power[j] + ss*st->Xf[j];


        /* Combine adaptation rate to the the inverse energy estimate */
        if (st->adapted)
        {
            /* If filter is adapted, include the frequency-dependent ratio too */
            for (i=0; i<=st->frame_size; i++)
                st->power_1[i] = st->adapt_rate*st->fratio[i] /(1.f+st->power[i]);
        } else {
            for (i=0; i<=st->frame_size; i++)
                st->power_1[i] = st->adapt_rate/(1.f+st->power[i]);
        }
    }


    /* Convert error to frequency domain */
    spx_drft_forward(st->fft_lookup, st->E);

    /* Do some regularization (prevents problems when system is ill-conditoned) */
    for (m=0; m<M; m++)
        for (i=0; i<N; i++)
            st->W[m*N+i] *= 1-st->regul[i]*ESR;

    /* Compute weight gradient */
    for (j=0; j<M; j++)
    {
        weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI+N*j, N);
    }

    /* Gradient descent */
    for (i=0; i<M*N; i++)
        st->W[i] += st->PHI[i];

    /* AUMDF weight constraint */
    for (j=0; j<M; j++)
    {
        /* Remove the "if" to make this an MDF filter */
        if (st->cancel_count%M == j)
        {
            spx_drft_backward(st->fft_lookup, &st->W[j*N]);
            for (i=0; i<N; i++)
                st->W[j*N+i]*=scale;
            for (i=st->frame_size; i<N; i++)
            {
                st->W[j*N+i]=0;
            }
            spx_drft_forward(st->fft_lookup, &st->W[j*N]);
        }
    }

    /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/
    if (Yout)
    {
        if (st->adapted)
        {
            /* If the filter is adapted, take the filtered echo */
            for (i=0; i<st->frame_size; i++)
                st->last_y[i] = st->last_y[st->frame_size+i];
            for (i=0; i<st->frame_size; i++)
                st->last_y[st->frame_size+i] = st->y[st->frame_size+i];
        } else {
            /* If filter isn't adapted yet, all we can do is take the echo signal directly */
            for (i=0; i<N; i++)
                st->last_y[i] = st->x[i];
        }

        /* Apply hanning window (should pre-compute it)*/
        for (i=0; i<N; i++)
            st->Yps[i] = (.5-.5*cos(2*M_PI*i/N))*st->last_y[i];

        /* Compute power spectrum of the echo */
        spx_drft_forward(st->fft_lookup, st->Yps);
        power_spectrum(st->Yps, st->Yps, N);

        /* Estimate residual echo */
        for (i=0; i<=st->frame_size; i++)
            Yout[i] = 2*leak_estimate*st->Yps[i];
    }

}
Пример #3
0
int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, float *echo)
{
   int i;
   int is_speech=1;
   float mean_post=0;
   float mean_prior=0;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int N4 = st->frame_size - N3;
   float scale=.5f/N;
   float *ps=st->ps;
   float Zframe=0, Pframe;

   preprocess_analysis(st, x);

   update_noise_prob(st);

   st->nb_preprocess++;

   /* Noise estimation always updated for the 20 first times */
   if (st->nb_adapt<10)
   {
      update_noise(st, ps, echo);
   }

   /* Deal with residual echo if provided */
   if (echo)
      for (i=1;i<N;i++)
         st->echo_noise[i] = (.3f*st->echo_noise[i] + echo[i]);

   /* Compute a posteriori SNR */
   for (i=1;i<N;i++)
   {
      st->post[i] = ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]) - 1.f;
      if (st->post[i]>100.f)
         st->post[i]=100.f;
      /*if (st->post[i]<0)
        st->post[i]=0;*/
      mean_post+=st->post[i];
   }
   mean_post /= N;
   if (mean_post<0.f)
      mean_post=0.f;

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=1;i<N;i++)
         st->old_ps[i] = ps[i];

   /* Compute a priori SNR */
   {
      /* A priori update rate */
      float gamma;
      float min_gamma=0.12f;
      gamma = 1.0f/st->nb_preprocess;

      /*Make update rate smaller when there's no speech*/
#if 0
      if (mean_post<3.5 && mean_prior < 1)
         min_gamma *= (mean_post+.5);
      else
         min_gamma *= 4.;
#else
      min_gamma = .1f*fabs(mean_prior - mean_post)*fabs(mean_prior - mean_post);
      if (min_gamma>.15f)
         min_gamma = .15f;
      if (min_gamma<.02f)
         min_gamma = .02f;
#endif
      /*min_gamma = .08;*/

      /*if (gamma<min_gamma)*/
         gamma=min_gamma;
      gamma = .1;
      for (i=1;i<N;i++)
      {
         
         /* A priori SNR update */
         st->prior[i] = gamma*max(0.0f,st->post[i]) +
         (1.f-gamma)*st->gain[i]*st->gain[i]*st->old_ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]);
         
         if (st->prior[i]>100.f)
            st->prior[i]=100.f;
         
         mean_prior+=st->prior[i];
      }
   }
   mean_prior /= N;

#if 0
   for (i=0;i<N;i++)
   {
      fprintf (stderr, "%f ", st->prior[i]);
   }
   fprintf (stderr, "\n");
#endif
   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/

   if (st->nb_preprocess>=20)
   {
      int do_update = 0;
      float noise_ener=0, sig_ener=0;
      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
      /*if (mean_prior<.23 && mean_post < .5)*/
      if (mean_prior<.23f && mean_post < .5f)
         do_update = 1;
      for (i=1;i<N;i++)
      {
         noise_ener += st->noise[i];
         sig_ener += ps[i];
      }
      if (noise_ener > 3.f*sig_ener)
         do_update = 1;
      /*do_update = 0;*/
      if (do_update)
      {
         st->consec_noise++;
      } else {
         st->consec_noise=0;
      }
   }

   if (st->vad_enabled)
      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);


   if (st->consec_noise>=3)
   {
      update_noise(st, st->old_ps, echo);
   } else {
      for (i=1;i<N-1;i++)
      {
         if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
         {
            if (echo)
               st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
            else
               st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
         }
      }
   }

   for (i=1;i<N;i++)
   {
      st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
   }

   {
      int freq_start = (int)(300.0f*2.f*N/st->sampling_rate);
      int freq_end   = (int)(2000.0f*2.f*N/st->sampling_rate);
      for (i=freq_start;i<freq_end;i++)
      {
         Zframe += st->zeta[i];         
      }
   }

   Zframe /= N;
   if (Zframe<ZMIN)
   {
      Pframe = 0;
   } else {
      if (Zframe > 1.5f*st->Zlast)
      {
         Pframe = 1.f;
         st->Zpeak = Zframe;
         if (st->Zpeak > 10.f)
            st->Zpeak = 10.f;
         if (st->Zpeak < 1.f)
            st->Zpeak = 1.f;
      } else {
         if (Zframe < st->Zpeak*ZMIN)
         {
            Pframe = 0;
         } else if (Zframe > st->Zpeak*ZMAX)
         {
            Pframe = 1;
         } else {
            Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN);
         }
      }
   }
   st->Zlast = Zframe;

   /*fprintf (stderr, "%f\n", Pframe);*/
   /* Compute gain according to the Ephraim-Malah algorithm */
   for (i=1;i<N;i++)
   {
      float MM;
      float theta;
      float prior_ratio;
      float p, q;
      float zeta1;
      float P1;

      prior_ratio = st->prior[i]/(1.0001f+st->prior[i]);
      theta = (1.f+st->post[i])*prior_ratio;

      if (i==1 || i==N-1)
         zeta1 = st->zeta[i];
      else
         zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
      if (zeta1<ZMIN)
         P1 = 0.f;
      else if (zeta1>ZMAX)
         P1 = 1.f;
      else
         P1 = LOG_MIN_MAX_1 * log(ZMIN_1*zeta1);
  
      /*P1 = log(zeta1/ZMIN)/log(ZMAX/ZMIN);*/
      
      /* FIXME: add global prob (P2) */
      q = 1-Pframe*P1;
      q = 1-P1;
      if (q>.95f)
         q=.95f;
      p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
      /*p=1;*/

#if 0
      /* log-spectral magnitude estimator */
      if (theta<6)
         MM = 0.74082*pow(theta+1,.61)/sqrt(.0001+theta);
      else
         MM=1;
#else
      /* Optimal estimator for loudness domain */
      MM = hypergeom_gain(theta);
#endif

      st->gain[i] = prior_ratio * MM;
      /*Put some (very arbitraty) limit on the gain*/
      if (st->gain[i]>2.f)
      {
         st->gain[i]=2.f;
      }
      
      st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
      if (st->denoise_enabled)
      {
         st->gain2[i]=p*p*st->gain[i];
      } else {
         st->gain2[i]=1.f;
      }
   }
   st->gain2[0]=st->gain[0]=0.f;
   st->gain2[N-1]=st->gain[N-1]=0.f;

   if (st->agc_enabled)
      speex_compute_agc(st, mean_prior);

#if 0
   if (!is_speech)
   {
      for (i=0;i<N;i++)
         st->gain2[i] = 0;
   }
#if 0
 else {
      for (i=0;i<N;i++)
         st->gain2[i] = 1;
   }
#endif
#endif

   /* Apply computed gain */
   for (i=1;i<N;i++)
   {
      st->frame[2*i-1] *= st->gain2[i];
      st->frame[2*i] *= st->gain2[i];
   }

   /* Get rid of the DC and very low frequencies */
   st->frame[0]=0;
   st->frame[1]=0;
   st->frame[2]=0;
   /* Nyquist frequency is mostly useless too */
   st->frame[2*N-1]=0;

   /* Inverse FFT with 1/N scaling */
   spx_drft_backward(st->fft_lookup, st->frame);

   for (i=0;i<2*N;i++)
      st->frame[i] *= scale;

   {
      float max_sample=0;
      for (i=0;i<2*N;i++)
         if (fabs(st->frame[i])>max_sample)
            max_sample = fabs(st->frame[i]);
      if (max_sample>28000.f)
      {
         float damp = 28000.f/max_sample;
         for (i=0;i<2*N;i++)
            st->frame[i] *= damp;
      }
   }

   for (i=0;i<2*N;i++)
      st->frame[i] *= st->window[i];

   /* Perform overlap and add */
   for (i=0;i<N3;i++)
      x[i] = st->outbuf[i] + st->frame[i];
   for (i=0;i<N4;i++)
      x[N3+i] = st->frame[N3+i];
   
   /* Update outbuf */
   for (i=0;i<N3;i++)
      st->outbuf[i] = st->frame[st->frame_size+i];

   /* Save old power spectrum */
   for (i=1;i<N;i++)
      st->old_ps[i] = ps[i];

   return is_speech;
}
Пример #4
0
EXPORT void speex_decorrelate(SpeexDecorrState *st, const spx_int16_t *in, spx_int16_t *out, int strength)
{
   int ch;
   float amount;
   
   if (strength<0)
      strength = 0;
   if (strength>100)
      strength = 100;
   
   amount = .01*strength;
   for (ch=0;ch<st->channels;ch++)
   {
      int i;
      int N=2*st->frame_size;
      float beta, beta2;
      float *x;
      float max_alpha = 0;
      
      float *buff;
      float *ring;
      int ringID;
      int order;
      float alpha;

      buff = st->buff+ch*2*st->frame_size;
      ring = st->ring[ch];
      ringID = st->ringID[ch];
      order = st->order[ch];
      alpha = st->alpha[ch];
      
      for (i=0;i<st->frame_size;i++)
         buff[i] = buff[i+st->frame_size];
      for (i=0;i<st->frame_size;i++)
         buff[i+st->frame_size] = in[i*st->channels+ch];

      x = buff+st->frame_size;
      beta = 1.-.3*amount*amount;
      if (amount>1)
         beta = 1-sqrt(.4*amount);
      else
         beta = 1-0.63246*amount;
      if (beta<0)
         beta = 0;
   
      beta2 = beta;
      for (i=0;i<st->frame_size;i++)
      {
         st->y[i] = alpha*(x[i-ALLPASS_ORDER+order]-beta*x[i-ALLPASS_ORDER+order-1])*st->vorbis_win[st->frame_size+i+order] 
               + x[i-ALLPASS_ORDER]*st->vorbis_win[st->frame_size+i] 
               - alpha*(ring[ringID]
               - beta*ring[ringID+1>=order?0:ringID+1]);
         ring[ringID++]=st->y[i];
         st->y[i] *= st->vorbis_win[st->frame_size+i];
         if (ringID>=order)
            ringID=0;
      }
      order = order+(irand(&st->seed)%3)-1;
      if (order < 5)
         order = 5;
      if (order > 10)
         order = 10;
      /*order = 5+(irand(&st->seed)%6);*/
      max_alpha = pow(.96+.04*(amount-1),order);
      if (max_alpha > .98/(1.+beta2))
         max_alpha = .98/(1.+beta2);
   
      alpha = alpha + .4*uni_rand(&st->seed);
      if (alpha > max_alpha)
         alpha = max_alpha;
      if (alpha < -max_alpha)
         alpha = -max_alpha;
      for (i=0;i<ALLPASS_ORDER;i++)
         ring[i] = 0;
      ringID = 0;
      for (i=0;i<st->frame_size;i++)
      {
         float tmp =  alpha*(x[i-ALLPASS_ORDER+order]-beta*x[i-ALLPASS_ORDER+order-1])*st->vorbis_win[i+order] 
               + x[i-ALLPASS_ORDER]*st->vorbis_win[i] 
               - alpha*(ring[ringID]
               - beta*ring[ringID+1>=order?0:ringID+1]);
         ring[ringID++]=tmp;
         tmp *= st->vorbis_win[i];
         if (ringID>=order)
            ringID=0;
         st->y[i] += tmp;
      }
   
#ifdef VORBIS_PSYCHO
      float frame[N];
      float scale = 1./N;
      for (i=0;i<2*st->frame_size;i++)
         frame[i] = buff[i];
   //float coef = .5*0.78130;
      float coef = M_PI*0.075063 * 0.93763 * amount * .8 * 0.707;
      compute_curve(st->psy, buff, st->curve);
      for (i=1;i<st->frame_size;i++)
      {
         float x1,x2;
         float gain;
         do {
            x1 = uni_rand(&st->seed);
            x2 = uni_rand(&st->seed);
         } while (x1*x1+x2*x2 > 1.);
         gain = coef*sqrt(.1+st->curve[i]);
         frame[2*i-1] = gain*x1;
         frame[2*i] = gain*x2;
      }
      frame[0] = coef*uni_rand(&st->seed)*sqrt(.1+st->curve[0]);
      frame[2*st->frame_size-1] = coef*uni_rand(&st->seed)*sqrt(.1+st->curve[st->frame_size-1]);
      spx_drft_backward(&st->lookup,frame);
      for (i=0;i<2*st->frame_size;i++)
         frame[i] *= st->vorbis_win[i];
#endif
   
      for (i=0;i<st->frame_size;i++)
      {
#ifdef VORBIS_PSYCHO
         float tmp = st->y[i] + frame[i] + st->wola_mem[i];
         st->wola_mem[i] = frame[i+st->frame_size];
#else
         float tmp = st->y[i];
#endif
         if (tmp>32767)
            tmp = 32767;
         if (tmp < -32767)
            tmp = -32767;
         out[i*st->channels+ch] = tmp;
      }
      
      st->ringID[ch] = ringID;
      st->order[ch] = order;
      st->alpha[ch] = alpha;

   }
}
Пример #5
0
int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
{
   int i;
   int is_speech=1;
   float mean_post=0;
   float mean_prior=0;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int N4 = st->frame_size - N3;
   float scale=.5f/N;
   float *ps=st->ps;
   float Zframe=0, Pframe;

   preprocess_analysis(st, x);

   update_noise_prob(st);

   st->nb_preprocess++;

   /* Noise estimation always updated for the 20 first times */
   if (st->nb_adapt<10)
   {
      update_noise(st, ps, echo);
   }

   /* Deal with residual echo if provided */
   if (echo)
      for (i=1;i<N;i++)
         st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*1.0*echo[i]);

   /* Compute a posteriori SNR */
   for (i=1;i<N;i++)
   {
      float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
      st->post[i] = ps[i]/tot_noise - 1.f;
      if (st->post[i]>100.f)
         st->post[i]=100.f;
      /*if (st->post[i]<0)
        st->post[i]=0;*/
      mean_post+=st->post[i];
   }
   mean_post /= N;
   if (mean_post<0.f)
      mean_post=0.f;

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=1;i<N;i++)
         st->old_ps[i] = ps[i];

   /* Compute a priori SNR */
   {
      /* A priori update rate */
      for (i=1;i<N;i++)
      {
         float gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
         float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
         /* A priori SNR update */
         st->prior[i] = gamma*max(0.0f,st->post[i]) +
               (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
         
         if (st->prior[i]>100.f)
            st->prior[i]=100.f;
         
         mean_prior+=st->prior[i];
      }
   }
   mean_prior /= N;

#if 0
   for (i=0;i<N;i++)
   {
      fprintf (stderr, "%f ", st->prior[i]);
   }
   fprintf (stderr, "\n");
#endif
   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/

   if (st->nb_preprocess>=20)
   {
      int do_update = 0;
      float noise_ener=0, sig_ener=0;
      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
      /*if (mean_prior<.23 && mean_post < .5)*/
      if (mean_prior<.23f && mean_post < .5f)
         do_update = 1;
      for (i=1;i<N;i++)
      {
         noise_ener += st->noise[i];
         sig_ener += ps[i];
      }
      if (noise_ener > 3.f*sig_ener)
         do_update = 1;
      /*do_update = 0;*/
      if (do_update)
      {
         st->consec_noise++;
      } else {
         st->consec_noise=0;
      }
   }

   if (st->vad_enabled)
      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);


   if (st->consec_noise>=3)
   {
      update_noise(st, st->old_ps, echo);
   } else {
      for (i=1;i<N-1;i++)
      {
         if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
         {
            if (echo)
               st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]);
            else
               st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i];
         }
      }
   }

   for (i=1;i<N;i++)
   {
      st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
   }

   {
      int freq_start = (int)(300.0f*2.f*N/st->sampling_rate);
      int freq_end   = (int)(2000.0f*2.f*N/st->sampling_rate);
      for (i=freq_start;i<freq_end;i++)
      {
         Zframe += st->zeta[i];         
      }
      Zframe /= (freq_end-freq_start);
   }
   st->Zlast = Zframe;

   Pframe = qcurve(Zframe);

   /*fprintf (stderr, "%f\n", Pframe);*/
   /* Compute gain according to the Ephraim-Malah algorithm */
   for (i=1;i<N;i++)
   {
      float MM;
      float theta;
      float prior_ratio;
      float p, q;
      float zeta1;
      float P1;

      prior_ratio = st->prior[i]/(1.0001f+st->prior[i]);
      theta = (1.f+st->post[i])*prior_ratio;

      if (i==1 || i==N-1)
         zeta1 = st->zeta[i];
      else
         zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
      P1 = qcurve (zeta1);
      
      /* FIXME: add global prob (P2) */
      q = 1-Pframe*P1;
      q = 1-P1;
      if (q>.95f)
         q=.95f;
      p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
      /*p=1;*/

      /* Optimal estimator for loudness domain */
      MM = hypergeom_gain(theta);

      st->gain[i] = prior_ratio * MM;
      /*Put some (very arbitraty) limit on the gain*/
      if (st->gain[i]>2.f)
      {
         st->gain[i]=2.f;
      }
      
      st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
      if (st->denoise_enabled)
      {
         /*st->gain2[i] = p*p*st->gain[i];*/
         st->gain2[i]=(p*sqrt(st->gain[i])+.2*(1-p)) * (p*sqrt(st->gain[i])+.2*(1-p));
         /*st->gain2[i] = pow(st->gain[i], p) * pow(.1f,1.f-p);*/
      } else {
         st->gain2[i]=1.f;
      }
   }
   
   st->gain2[0]=st->gain[0]=0.f;
   st->gain2[N-1]=st->gain[N-1]=0.f;
   /*
   for (i=30;i<N-2;i++)
   {
      st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]);
   }
   for (i=30;i<N-2;i++)
      st->gain2[i] = st->gain[i];
   */
   if (st->agc_enabled)
      speex_compute_agc(st, mean_prior);

#if 0
   if (!is_speech)
   {
      for (i=0;i<N;i++)
         st->gain2[i] = 0;
   }
#if 0
 else {
      for (i=0;i<N;i++)
         st->gain2[i] = 1;
   }
#endif
#endif

   /* Apply computed gain */
   for (i=1;i<N;i++)
   {
      st->frame[2*i-1] *= st->gain2[i];
      st->frame[2*i] *= st->gain2[i];
   }

   /* Get rid of the DC and very low frequencies */
   st->frame[0]=0;
   st->frame[1]=0;
   st->frame[2]=0;
   /* Nyquist frequency is mostly useless too */
   st->frame[2*N-1]=0;

   /* Inverse FFT with 1/N scaling */
   spx_drft_backward(st->fft_lookup, st->frame);

   for (i=0;i<2*N;i++)
      st->frame[i] *= scale;

   {
      float max_sample=0;
      for (i=0;i<2*N;i++)
         if (fabs(st->frame[i])>max_sample)
            max_sample = fabs(st->frame[i]);
      if (max_sample>28000.f)
      {
         float damp = 28000.f/max_sample;
         for (i=0;i<2*N;i++)
            st->frame[i] *= damp;
      }
   }

   for (i=0;i<2*N;i++)
      st->frame[i] *= st->window[i];

   /* Perform overlap and add */
   for (i=0;i<N3;i++)
      x[i] = st->outbuf[i] + st->frame[i];
   for (i=0;i<N4;i++)
      x[N3+i] = st->frame[N3+i];
   
   /* Update outbuf */
   for (i=0;i<N3;i++)
      st->outbuf[i] = st->frame[st->frame_size+i];

   /* Save old power spectrum */
   for (i=1;i<N;i++)
      st->old_ps[i] = ps[i];

   return is_speech;
}