コード例 #1
0
ファイル: preprocess.c プロジェクト: mcr/app-conference
int speex_preprocess(SpeexPreprocessState *st, short *x, float *echo)
{
   int i;
   int is_speech=1;
   float mean_post=0;
   float mean_prior=0;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int N4 = st->frame_size - N3;
   float scale=.5/N;
   float *ps=st->ps;
   float Zframe=0, Pframe;

   preprocess_analysis(st, x);

   update_noise_prob(st);

   st->nb_preprocess++;

   /* Noise estimation always updated for the 20 first times */
   if (st->nb_adapt<10)
   {
      update_noise(st, ps, echo);
   }

   /* Deal with residual echo if provided */
   if (echo)
      for (i=1;i<N;i++)
         st->echo_noise[i] = (.7*st->echo_noise[i] + .3* echo[i]);

   /* Compute a posteriori SNR */
   for (i=1;i<N;i++)
   {
      st->post[i] = ps[i]/(1+st->noise[i]+st->echo_noise[i]) - 1;
      if (st->post[i]>100)
         st->post[i]=100;
      /*if (st->post[i]<0)
        st->post[i]=0;*/
      mean_post+=st->post[i];
   }
   mean_post /= N;
   if (mean_post<0)
      mean_post=0;

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=1;i<N;i++)
         st->old_ps[i] = ps[i];

   /* Compute a priori SNR */
   {
      /* A priori update rate */
      float gamma;
      float min_gamma=0.12;
      gamma = 1.0/st->nb_preprocess;

      /*Make update rate smaller when there's no speech*/
#if 0
      if (mean_post<3.5 && mean_prior < 1)
         min_gamma *= (mean_post+.5);
      else
         min_gamma *= 4.;
#else
      min_gamma = .1*fabs(mean_prior - mean_post)*fabs(mean_prior - mean_post);
      if (min_gamma>.15)
         min_gamma = .15;
      if (min_gamma<.02)
         min_gamma = .02;
#endif
      /*min_gamma = .08;*/

      /*if (gamma<min_gamma)*/
         gamma=min_gamma;
      
      for (i=1;i<N;i++)
      {
         
         /* A priori SNR update */
         st->prior[i] = gamma*max(0.0,st->post[i]) +
         (1-gamma)*st->gain[i]*st->gain[i]*st->old_ps[i]/(1+st->noise[i]+st->echo_noise[i]);
         
         if (st->prior[i]>100)
            st->prior[i]=100;
         
         mean_prior+=st->prior[i];
      }
   }
   mean_prior /= N;

#if 0
   for (i=0;i<N;i++)
   {
      fprintf (stderr, "%f ", st->prior[i]);
   }
   fprintf (stderr, "\n");
#endif
   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/

   if (st->nb_preprocess>=20)
   {
      int do_update = 0;
      float noise_ener=0, sig_ener=0;
      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
      /*if (mean_prior<.23 && mean_post < .5)*/
      if (mean_prior<.23 && mean_post < .5)
         do_update = 1;
      for (i=1;i<N;i++)
      {
         noise_ener += st->noise[i];
         sig_ener += ps[i];
      }
      if (noise_ener > 3*sig_ener)
         do_update = 1;
      /*do_update = 0;*/
      if (do_update)
      {
         st->consec_noise++;
      } else {
         st->consec_noise=0;
      }
   }

   if (st->vad_enabled)
      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);


   if (st->consec_noise>=3)
   {
      update_noise(st, st->old_ps, echo);
   } else {
      for (i=1;i<N-1;i++)
      {
         if (st->update_prob[i]<.5)
            st->noise[i] = .90*st->noise[i] + .1*st->ps[i];
      }
   }

   for (i=1;i<N;i++)
   {
      st->zeta[i] = .7*st->zeta[i] + .3*st->prior[i];
   }

   {
      int freq_start = (int)(300.0*2*N/st->sampling_rate);
      int freq_end   = (int)(2000.0*2*N/st->sampling_rate);
      for (i=freq_start;i<freq_end;i++)
      {
         Zframe += st->zeta[i];         
      }
   }

   Zframe /= N;
   if (Zframe<ZMIN)
   {
      Pframe = 0;
   } else {
      if (Zframe > 1.5*st->Zlast)
      {
         Pframe = 1;
         st->Zpeak = Zframe;
         if (st->Zpeak > 10)
            st->Zpeak = 10;
         if (st->Zpeak < 1)
            st->Zpeak = 1;
      } else {
         if (Zframe < st->Zpeak*ZMIN)
         {
            Pframe = 0;
         } else if (Zframe > st->Zpeak*ZMAX)
         {
            Pframe = 1;
         } else {
            Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN);
         }
      }
   }
   st->Zlast = Zframe;

   /*fprintf (stderr, "%f\n", Pframe);*/
   /* Compute gain according to the Ephraim-Malah algorithm */
   ephraim_malah(st,N,Pframe);

   if (st->agc_enabled)
      speex_compute_agc(st, mean_prior);

#if 0
   if (!is_speech)
   {
      for (i=0;i<N;i++)
         st->gain2[i] = 0;
   }
#if 0
 else {
      for (i=0;i<N;i++)
         st->gain2[i] = 1;
   }
#endif
#endif

   /* PERF: 14% when only vad is enabled [7.0 vs 8.2 sec] */
   if(st->agc_enabled || st->denoise_enabled) {
     /* Apply computed gain */
     for (i=1;i<N;i++)
     {
	st->frame[2*i-1] *= st->gain2[i];
	st->frame[2*i] *= st->gain2[i];
     }

     /* Get rid of the DC and very low frequencies */
     st->frame[0]=0;
     st->frame[1]=0;
     st->frame[2]=0;
     /* Nyquist frequency is mostly useless too */
     st->frame[2*N-1]=0;

     /* Inverse FFT with 1/N scaling */
     drft_backward(st->fft_lookup, st->frame);

     for (i=0;i<2*N;i++)
	st->frame[i] *= scale;

     {
	float max_sample=0;
	for (i=0;i<2*N;i++)
	   if (fabs(st->frame[i])>max_sample)
	      max_sample = fabs(st->frame[i]);
	if (max_sample>28000)
	{
	   float damp = 28000./max_sample;
	   for (i=0;i<2*N;i++)
	      st->frame[i] *= damp;
	}
     }

     for (i=0;i<2*N;i++)
	st->frame[i] *= st->window[i];

     /* Perform overlap and add */
     for (i=0;i<N3;i++)
	x[i] = st->outbuf[i] + st->frame[i];
     for (i=0;i<N4;i++)
	x[N3+i] = st->frame[N3+i];
     
     /* Update outbuf */
     for (i=0;i<N3;i++)
	st->outbuf[i] = st->frame[st->frame_size+i];
   }

   /* Save old power spectrum */
   for (i=1;i<N;i++)
      st->old_ps[i] = ps[i];

   return is_speech;
}
コード例 #2
0
ファイル: preprocess.c プロジェクト: BigHNF/tcpmp-revive
int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, float *echo)
{
   int i;
   int is_speech=1;
   float mean_post=0;
   float mean_prior=0;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int N4 = st->frame_size - N3;
   float scale=.5f/N;
   float *ps=st->ps;
   float Zframe=0, Pframe;

   preprocess_analysis(st, x);

   update_noise_prob(st);

   st->nb_preprocess++;

   /* Noise estimation always updated for the 20 first times */
   if (st->nb_adapt<10)
   {
      update_noise(st, ps, echo);
   }

   /* Deal with residual echo if provided */
   if (echo)
      for (i=1;i<N;i++)
         st->echo_noise[i] = (.3f*st->echo_noise[i] + echo[i]);

   /* Compute a posteriori SNR */
   for (i=1;i<N;i++)
   {
      st->post[i] = ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]) - 1.f;
      if (st->post[i]>100.f)
         st->post[i]=100.f;
      /*if (st->post[i]<0)
        st->post[i]=0;*/
      mean_post+=st->post[i];
   }
   mean_post /= N;
   if (mean_post<0.f)
      mean_post=0.f;

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=1;i<N;i++)
         st->old_ps[i] = ps[i];

   /* Compute a priori SNR */
   {
      /* A priori update rate */
      float gamma;
      float min_gamma=0.12f;
      gamma = 1.0f/st->nb_preprocess;

      /*Make update rate smaller when there's no speech*/
#if 0
      if (mean_post<3.5 && mean_prior < 1)
         min_gamma *= (mean_post+.5);
      else
         min_gamma *= 4.;
#else
      min_gamma = .1f*fabs(mean_prior - mean_post)*fabs(mean_prior - mean_post);
      if (min_gamma>.15f)
         min_gamma = .15f;
      if (min_gamma<.02f)
         min_gamma = .02f;
#endif
      /*min_gamma = .08;*/

      /*if (gamma<min_gamma)*/
         gamma=min_gamma;
      gamma = .1;
      for (i=1;i<N;i++)
      {
         
         /* A priori SNR update */
         st->prior[i] = gamma*max(0.0f,st->post[i]) +
         (1.f-gamma)*st->gain[i]*st->gain[i]*st->old_ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]);
         
         if (st->prior[i]>100.f)
            st->prior[i]=100.f;
         
         mean_prior+=st->prior[i];
      }
   }
   mean_prior /= N;

#if 0
   for (i=0;i<N;i++)
   {
      fprintf (stderr, "%f ", st->prior[i]);
   }
   fprintf (stderr, "\n");
#endif
   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/

   if (st->nb_preprocess>=20)
   {
      int do_update = 0;
      float noise_ener=0, sig_ener=0;
      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
      /*if (mean_prior<.23 && mean_post < .5)*/
      if (mean_prior<.23f && mean_post < .5f)
         do_update = 1;
      for (i=1;i<N;i++)
      {
         noise_ener += st->noise[i];
         sig_ener += ps[i];
      }
      if (noise_ener > 3.f*sig_ener)
         do_update = 1;
      /*do_update = 0;*/
      if (do_update)
      {
         st->consec_noise++;
      } else {
         st->consec_noise=0;
      }
   }

   if (st->vad_enabled)
      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);


   if (st->consec_noise>=3)
   {
      update_noise(st, st->old_ps, echo);
   } else {
      for (i=1;i<N-1;i++)
      {
         if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
         {
            if (echo)
               st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
            else
               st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
         }
      }
   }

   for (i=1;i<N;i++)
   {
      st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
   }

   {
      int freq_start = (int)(300.0f*2.f*N/st->sampling_rate);
      int freq_end   = (int)(2000.0f*2.f*N/st->sampling_rate);
      for (i=freq_start;i<freq_end;i++)
      {
         Zframe += st->zeta[i];         
      }
   }

   Zframe /= N;
   if (Zframe<ZMIN)
   {
      Pframe = 0;
   } else {
      if (Zframe > 1.5f*st->Zlast)
      {
         Pframe = 1.f;
         st->Zpeak = Zframe;
         if (st->Zpeak > 10.f)
            st->Zpeak = 10.f;
         if (st->Zpeak < 1.f)
            st->Zpeak = 1.f;
      } else {
         if (Zframe < st->Zpeak*ZMIN)
         {
            Pframe = 0;
         } else if (Zframe > st->Zpeak*ZMAX)
         {
            Pframe = 1;
         } else {
            Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN);
         }
      }
   }
   st->Zlast = Zframe;

   /*fprintf (stderr, "%f\n", Pframe);*/
   /* Compute gain according to the Ephraim-Malah algorithm */
   for (i=1;i<N;i++)
   {
      float MM;
      float theta;
      float prior_ratio;
      float p, q;
      float zeta1;
      float P1;

      prior_ratio = st->prior[i]/(1.0001f+st->prior[i]);
      theta = (1.f+st->post[i])*prior_ratio;

      if (i==1 || i==N-1)
         zeta1 = st->zeta[i];
      else
         zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
      if (zeta1<ZMIN)
         P1 = 0.f;
      else if (zeta1>ZMAX)
         P1 = 1.f;
      else
         P1 = LOG_MIN_MAX_1 * log(ZMIN_1*zeta1);
  
      /*P1 = log(zeta1/ZMIN)/log(ZMAX/ZMIN);*/
      
      /* FIXME: add global prob (P2) */
      q = 1-Pframe*P1;
      q = 1-P1;
      if (q>.95f)
         q=.95f;
      p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
      /*p=1;*/

#if 0
      /* log-spectral magnitude estimator */
      if (theta<6)
         MM = 0.74082*pow(theta+1,.61)/sqrt(.0001+theta);
      else
         MM=1;
#else
      /* Optimal estimator for loudness domain */
      MM = hypergeom_gain(theta);
#endif

      st->gain[i] = prior_ratio * MM;
      /*Put some (very arbitraty) limit on the gain*/
      if (st->gain[i]>2.f)
      {
         st->gain[i]=2.f;
      }
      
      st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
      if (st->denoise_enabled)
      {
         st->gain2[i]=p*p*st->gain[i];
      } else {
         st->gain2[i]=1.f;
      }
   }
   st->gain2[0]=st->gain[0]=0.f;
   st->gain2[N-1]=st->gain[N-1]=0.f;

   if (st->agc_enabled)
      speex_compute_agc(st, mean_prior);

#if 0
   if (!is_speech)
   {
      for (i=0;i<N;i++)
         st->gain2[i] = 0;
   }
#if 0
 else {
      for (i=0;i<N;i++)
         st->gain2[i] = 1;
   }
#endif
#endif

   /* Apply computed gain */
   for (i=1;i<N;i++)
   {
      st->frame[2*i-1] *= st->gain2[i];
      st->frame[2*i] *= st->gain2[i];
   }

   /* Get rid of the DC and very low frequencies */
   st->frame[0]=0;
   st->frame[1]=0;
   st->frame[2]=0;
   /* Nyquist frequency is mostly useless too */
   st->frame[2*N-1]=0;

   /* Inverse FFT with 1/N scaling */
   spx_drft_backward(st->fft_lookup, st->frame);

   for (i=0;i<2*N;i++)
      st->frame[i] *= scale;

   {
      float max_sample=0;
      for (i=0;i<2*N;i++)
         if (fabs(st->frame[i])>max_sample)
            max_sample = fabs(st->frame[i]);
      if (max_sample>28000.f)
      {
         float damp = 28000.f/max_sample;
         for (i=0;i<2*N;i++)
            st->frame[i] *= damp;
      }
   }

   for (i=0;i<2*N;i++)
      st->frame[i] *= st->window[i];

   /* Perform overlap and add */
   for (i=0;i<N3;i++)
      x[i] = st->outbuf[i] + st->frame[i];
   for (i=0;i<N4;i++)
      x[N3+i] = st->frame[N3+i];
   
   /* Update outbuf */
   for (i=0;i<N3;i++)
      st->outbuf[i] = st->frame[st->frame_size+i];

   /* Save old power spectrum */
   for (i=1;i<N;i++)
      st->old_ps[i] = ps[i];

   return is_speech;
}
コード例 #3
0
ファイル: preprocess.c プロジェクト: aichew/KOTI_AEC
int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
{
   int i;
   int is_speech=1;
   float mean_post=0;
   float mean_prior=0;
   int N = st->ps_size;
   int N3 = 2*N - st->frame_size;
   int N4 = st->frame_size - N3;
   float scale=.5f/N;
   float *ps=st->ps;
   float Zframe=0, Pframe;

   preprocess_analysis(st, x);

   update_noise_prob(st);

   st->nb_preprocess++;

   /* Noise estimation always updated for the 20 first times */
   if (st->nb_adapt<10)
   {
      update_noise(st, ps, echo);
   }

   /* Deal with residual echo if provided */
   if (echo)
      for (i=1;i<N;i++)
         st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*1.0*echo[i]);

   /* Compute a posteriori SNR */
   for (i=1;i<N;i++)
   {
      float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
      st->post[i] = ps[i]/tot_noise - 1.f;
      if (st->post[i]>100.f)
         st->post[i]=100.f;
      /*if (st->post[i]<0)
        st->post[i]=0;*/
      mean_post+=st->post[i];
   }
   mean_post /= N;
   if (mean_post<0.f)
      mean_post=0.f;

   /* Special case for first frame */
   if (st->nb_adapt==1)
      for (i=1;i<N;i++)
         st->old_ps[i] = ps[i];

   /* Compute a priori SNR */
   {
      /* A priori update rate */
      for (i=1;i<N;i++)
      {
         float gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
         float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
         /* A priori SNR update */
         st->prior[i] = gamma*max(0.0f,st->post[i]) +
               (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
         
         if (st->prior[i]>100.f)
            st->prior[i]=100.f;
         
         mean_prior+=st->prior[i];
      }
   }
   mean_prior /= N;

#if 0
   for (i=0;i<N;i++)
   {
      fprintf (stderr, "%f ", st->prior[i]);
   }
   fprintf (stderr, "\n");
#endif
   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/

   if (st->nb_preprocess>=20)
   {
      int do_update = 0;
      float noise_ener=0, sig_ener=0;
      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
      /*if (mean_prior<.23 && mean_post < .5)*/
      if (mean_prior<.23f && mean_post < .5f)
         do_update = 1;
      for (i=1;i<N;i++)
      {
         noise_ener += st->noise[i];
         sig_ener += ps[i];
      }
      if (noise_ener > 3.f*sig_ener)
         do_update = 1;
      /*do_update = 0;*/
      if (do_update)
      {
         st->consec_noise++;
      } else {
         st->consec_noise=0;
      }
   }

   if (st->vad_enabled)
      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);


   if (st->consec_noise>=3)
   {
      update_noise(st, st->old_ps, echo);
   } else {
      for (i=1;i<N-1;i++)
      {
         if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
         {
            if (echo)
               st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]);
            else
               st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i];
         }
      }
   }

   for (i=1;i<N;i++)
   {
      st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
   }

   {
      int freq_start = (int)(300.0f*2.f*N/st->sampling_rate);
      int freq_end   = (int)(2000.0f*2.f*N/st->sampling_rate);
      for (i=freq_start;i<freq_end;i++)
      {
         Zframe += st->zeta[i];         
      }
      Zframe /= (freq_end-freq_start);
   }
   st->Zlast = Zframe;

   Pframe = qcurve(Zframe);

   /*fprintf (stderr, "%f\n", Pframe);*/
   /* Compute gain according to the Ephraim-Malah algorithm */
   for (i=1;i<N;i++)
   {
      float MM;
      float theta;
      float prior_ratio;
      float p, q;
      float zeta1;
      float P1;

      prior_ratio = st->prior[i]/(1.0001f+st->prior[i]);
      theta = (1.f+st->post[i])*prior_ratio;

      if (i==1 || i==N-1)
         zeta1 = st->zeta[i];
      else
         zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
      P1 = qcurve (zeta1);
      
      /* FIXME: add global prob (P2) */
      q = 1-Pframe*P1;
      q = 1-P1;
      if (q>.95f)
         q=.95f;
      p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
      /*p=1;*/

      /* Optimal estimator for loudness domain */
      MM = hypergeom_gain(theta);

      st->gain[i] = prior_ratio * MM;
      /*Put some (very arbitraty) limit on the gain*/
      if (st->gain[i]>2.f)
      {
         st->gain[i]=2.f;
      }
      
      st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
      if (st->denoise_enabled)
      {
         /*st->gain2[i] = p*p*st->gain[i];*/
         st->gain2[i]=(p*sqrt(st->gain[i])+.2*(1-p)) * (p*sqrt(st->gain[i])+.2*(1-p));
         /*st->gain2[i] = pow(st->gain[i], p) * pow(.1f,1.f-p);*/
      } else {
         st->gain2[i]=1.f;
      }
   }
   
   st->gain2[0]=st->gain[0]=0.f;
   st->gain2[N-1]=st->gain[N-1]=0.f;
   /*
   for (i=30;i<N-2;i++)
   {
      st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]);
   }
   for (i=30;i<N-2;i++)
      st->gain2[i] = st->gain[i];
   */
   if (st->agc_enabled)
      speex_compute_agc(st, mean_prior);

#if 0
   if (!is_speech)
   {
      for (i=0;i<N;i++)
         st->gain2[i] = 0;
   }
#if 0
 else {
      for (i=0;i<N;i++)
         st->gain2[i] = 1;
   }
#endif
#endif

   /* Apply computed gain */
   for (i=1;i<N;i++)
   {
      st->frame[2*i-1] *= st->gain2[i];
      st->frame[2*i] *= st->gain2[i];
   }

   /* Get rid of the DC and very low frequencies */
   st->frame[0]=0;
   st->frame[1]=0;
   st->frame[2]=0;
   /* Nyquist frequency is mostly useless too */
   st->frame[2*N-1]=0;

   /* Inverse FFT with 1/N scaling */
   spx_drft_backward(st->fft_lookup, st->frame);

   for (i=0;i<2*N;i++)
      st->frame[i] *= scale;

   {
      float max_sample=0;
      for (i=0;i<2*N;i++)
         if (fabs(st->frame[i])>max_sample)
            max_sample = fabs(st->frame[i]);
      if (max_sample>28000.f)
      {
         float damp = 28000.f/max_sample;
         for (i=0;i<2*N;i++)
            st->frame[i] *= damp;
      }
   }

   for (i=0;i<2*N;i++)
      st->frame[i] *= st->window[i];

   /* Perform overlap and add */
   for (i=0;i<N3;i++)
      x[i] = st->outbuf[i] + st->frame[i];
   for (i=0;i<N4;i++)
      x[N3+i] = st->frame[N3+i];
   
   /* Update outbuf */
   for (i=0;i<N3;i++)
      st->outbuf[i] = st->frame[st->frame_size+i];

   /* Save old power spectrum */
   for (i=1;i<N;i++)
      st->old_ps[i] = ps[i];

   return is_speech;
}