float vorbis_lpc_from_curve(float *curve,float *lpc,lpc_lookup *l){ int n=l->ln; int m=l->m; float *work=alloca(sizeof(*work)*(n+n)); float fscale=.5f/n; int i,j; /* input is a real curve. make it complex-real */ /* This mixes phase, but the LPC generation doesn't care. */ for(i=0;i<n;i++){ work[i*2]=curve[i]*fscale; work[i*2+1]=0; } work[n*2-1]=curve[n-1]*fscale; n*=2; drft_backward(&l->fft,work); /* The autocorrelation will not be circular. Shift, else we lose most of the power in the edges. */ for(i=0,j=n/2;i<n/2;){ float temp=work[i]; work[i++]=work[j]; work[j++]=temp; } /* we *could* shave speed here by skimping on the edges (thus speeding up the autocorrelation in vorbis_lpc_from_data) but we don't right now. */ return(vorbis_lpc_from_data(work,lpc,n,m)); }
int speex_preprocess(SpeexPreprocessState *st, short *x, float *echo) { int i; int is_speech=1; float mean_post=0; float mean_prior=0; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; float scale=.5/N; float *ps=st->ps; float Zframe=0, Pframe; preprocess_analysis(st, x); update_noise_prob(st); st->nb_preprocess++; /* Noise estimation always updated for the 20 first times */ if (st->nb_adapt<10) { update_noise(st, ps, echo); } /* Deal with residual echo if provided */ if (echo) for (i=1;i<N;i++) st->echo_noise[i] = (.7*st->echo_noise[i] + .3* echo[i]); /* Compute a posteriori SNR */ for (i=1;i<N;i++) { st->post[i] = ps[i]/(1+st->noise[i]+st->echo_noise[i]) - 1; if (st->post[i]>100) st->post[i]=100; /*if (st->post[i]<0) st->post[i]=0;*/ mean_post+=st->post[i]; } mean_post /= N; if (mean_post<0) mean_post=0; /* Special case for first frame */ if (st->nb_adapt==1) for (i=1;i<N;i++) st->old_ps[i] = ps[i]; /* Compute a priori SNR */ { /* A priori update rate */ float gamma; float min_gamma=0.12; gamma = 1.0/st->nb_preprocess; /*Make update rate smaller when there's no speech*/ #if 0 if (mean_post<3.5 && mean_prior < 1) min_gamma *= (mean_post+.5); else min_gamma *= 4.; #else min_gamma = .1*fabs(mean_prior - mean_post)*fabs(mean_prior - mean_post); if (min_gamma>.15) min_gamma = .15; if (min_gamma<.02) min_gamma = .02; #endif /*min_gamma = .08;*/ /*if (gamma<min_gamma)*/ gamma=min_gamma; for (i=1;i<N;i++) { /* A priori SNR update */ st->prior[i] = gamma*max(0.0,st->post[i]) + (1-gamma)*st->gain[i]*st->gain[i]*st->old_ps[i]/(1+st->noise[i]+st->echo_noise[i]); if (st->prior[i]>100) st->prior[i]=100; mean_prior+=st->prior[i]; } } mean_prior /= N; #if 0 for (i=0;i<N;i++) { fprintf (stderr, "%f ", st->prior[i]); } fprintf (stderr, "\n"); #endif /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/ if (st->nb_preprocess>=20) { int do_update = 0; float noise_ener=0, sig_ener=0; /* If SNR is low (both a priori and a posteriori), update the noise estimate*/ /*if (mean_prior<.23 && mean_post < .5)*/ if (mean_prior<.23 && mean_post < .5) do_update = 1; for (i=1;i<N;i++) { noise_ener += st->noise[i]; sig_ener += ps[i]; } if (noise_ener > 3*sig_ener) do_update = 1; /*do_update = 0;*/ if (do_update) { st->consec_noise++; } else { st->consec_noise=0; } } if (st->vad_enabled) is_speech = speex_compute_vad(st, ps, mean_prior, mean_post); if (st->consec_noise>=3) { update_noise(st, st->old_ps, echo); } else { for (i=1;i<N-1;i++) { if (st->update_prob[i]<.5) st->noise[i] = .90*st->noise[i] + .1*st->ps[i]; } } for (i=1;i<N;i++) { st->zeta[i] = .7*st->zeta[i] + .3*st->prior[i]; } { int freq_start = (int)(300.0*2*N/st->sampling_rate); int freq_end = (int)(2000.0*2*N/st->sampling_rate); for (i=freq_start;i<freq_end;i++) { Zframe += st->zeta[i]; } } Zframe /= N; if (Zframe<ZMIN) { Pframe = 0; } else { if (Zframe > 1.5*st->Zlast) { Pframe = 1; st->Zpeak = Zframe; if (st->Zpeak > 10) st->Zpeak = 10; if (st->Zpeak < 1) st->Zpeak = 1; } else { if (Zframe < st->Zpeak*ZMIN) { Pframe = 0; } else if (Zframe > st->Zpeak*ZMAX) { Pframe = 1; } else { Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN); } } } st->Zlast = Zframe; /*fprintf (stderr, "%f\n", Pframe);*/ /* Compute gain according to the Ephraim-Malah algorithm */ ephraim_malah(st,N,Pframe); if (st->agc_enabled) speex_compute_agc(st, mean_prior); #if 0 if (!is_speech) { for (i=0;i<N;i++) st->gain2[i] = 0; } #if 0 else { for (i=0;i<N;i++) st->gain2[i] = 1; } #endif #endif /* PERF: 14% when only vad is enabled [7.0 vs 8.2 sec] */ if(st->agc_enabled || st->denoise_enabled) { /* Apply computed gain */ for (i=1;i<N;i++) { st->frame[2*i-1] *= st->gain2[i]; st->frame[2*i] *= st->gain2[i]; } /* Get rid of the DC and very low frequencies */ st->frame[0]=0; st->frame[1]=0; st->frame[2]=0; /* Nyquist frequency is mostly useless too */ st->frame[2*N-1]=0; /* Inverse FFT with 1/N scaling */ drft_backward(st->fft_lookup, st->frame); for (i=0;i<2*N;i++) st->frame[i] *= scale; { float max_sample=0; for (i=0;i<2*N;i++) if (fabs(st->frame[i])>max_sample) max_sample = fabs(st->frame[i]); if (max_sample>28000) { float damp = 28000./max_sample; for (i=0;i<2*N;i++) st->frame[i] *= damp; } } for (i=0;i<2*N;i++) st->frame[i] *= st->window[i]; /* Perform overlap and add */ for (i=0;i<N3;i++) x[i] = st->outbuf[i] + st->frame[i]; for (i=0;i<N4;i++) x[N3+i] = st->frame[N3+i]; /* Update outbuf */ for (i=0;i<N3;i++) st->outbuf[i] = st->frame[st->frame_size+i]; } /* Save old power spectrum */ for (i=1;i<N;i++) st->old_ps[i] = ps[i]; return is_speech; }