void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); VARDECL(int *best_nind); VARDECL(int *best_ntarget); int have_sign; N=complexity; if (N>10) N=10; /* Complexity isn't as important for the codebooks as it is for the pitch */ N=(2*N)/3; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(best_nind, N, int); ALLOC(best_ntarget, N, int); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; } SPEEX_COPY(t, target, nsf); for (j=0;j<N;j++) SPEEX_COPY(&ot[j][0], t, nsf); /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=VERY_LARGE32; /* This is not strictly necessary, but it provides an additonal safety to prevent crashes in case something goes wrong in the previous steps (e.g. NaNs) */ for (j=0;j<N;j++) best_nind[j] = best_ntarget[j] = 0; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; spx_word32_t tener = 0; for (m=0;m<subvect_size;m++) tener = MAC16_16(tener, x[m],x[m]); #ifdef FIXED_POINT tener = SHR32(tener,1); #else tener *= .5; #endif /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { /* Compute total distance (including previous sub-vectors */ spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener); /*update n-best list*/ if (err<ndist[N-1]) { for (m=0;m<N;m++) { if (err < ndist[m]) { for (n=N-1;n>m;n--) { ndist[n] = ndist[n-1]; best_nind[n] = best_nind[n-1]; best_ntarget[n] = best_ntarget[n-1]; } /* n is equal to m here, so they're interchangeable */ ndist[m] = err; best_nind[n] = best_index[k]; best_ntarget[n] = j; break; } } } } if (i==0) break; } for (j=0;j<N;j++) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) nt[j][m]=ot[best_ntarget[j]][m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_nind[j]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } for (q=0;q<nb_subvect;q++) nind[j][q]=oind[best_ntarget[j]][q]; nind[j][i]=best_nind[j]; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len) { int n; for (n=0;n<len;n++) t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13)); }
void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t *xqmem); #ifndef FIXED_LPC_SIZE VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t **xq); #else spx_word16_t freqn[FIXED_LPC_SIZE]; spx_word32_t *xp[(FIXED_LPC_SIZE/2)+1]; spx_word32_t *xq[(FIXED_LPC_SIZE/2)+1]; #endif int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ #ifndef FIXED_LPC_SIZE ALLOC(xp, (m+1), spx_word32_t*); #endif ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE ALLOC(xq, (m+1), spx_word32_t*); #endif ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } #else for(i=0; i<=m; i++) { xp[i] = xpmem + i*(FIXED_LPC_SIZE+1+2); xq[i] = xqmem + i*(FIXED_LPC_SIZE+1+2); } #endif /* work out 2cos terms in Q14 */ #ifndef FIXED_LPC_SIZE ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #else for (i=0;i<FIXED_LPC_SIZE;i++) freqn[i] = ANGLE2X(freq[i]); #endif #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ #ifndef FIXED_LPC_SIZE for(j=1;j<=lpcrdr;j++) { #else for(j=1;j<=FIXED_LPC_SIZE;j++) { #endif int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } } #else void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; float xout1,xout2,xin1,xin2; VARDECL(float *Wp); float *pw,*n1,*n2,*n3,*n4=NULL; VARDECL(float *x_freq); int m = lpcrdr>>1; ALLOC(Wp, 4*m+2, float); pw = Wp; /* initialise contents of array */ for(i=0;i<=4*m+1;i++){ /* set contents of buffer to 0 */ *pw++ = 0.0; } /* Set pointers up */ pw = Wp; xin1 = 1.0; xin2 = 1.0; ALLOC(x_freq, lpcrdr, float); for (i=0;i<lpcrdr;i++) x_freq[i] = ANGLE2X(freq[i]); /* reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */ for(j=0;j<=lpcrdr;j++){ int i2=0; for(i=0;i<m;i++,i2+=2){ n1 = pw+(i*4); n2 = n1 + 1; n3 = n2 + 1; n4 = n3 + 1; xout1 = xin1 - 2.f*x_freq[i2] * *n1 + *n2; xout2 = xin2 - 2.f*x_freq[i2+1] * *n3 + *n4; *n2 = *n1; *n4 = *n3; *n1 = xin1; *n3 = xin2; xin1 = xout1; xin2 = xout2; } xout1 = xin1 + *(n4+1); xout2 = xin2 - *(n4+2); if (j>0) ak[j-1] = (xout1 + xout2)*0.5f; *(n4+1) = xin1; *(n4+2) = xin2; xin1 = 0.0; xin2 = 0.0; } }
static void split_cb_search_shape_sign_N1( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int update_target ) { int i,j,m,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int best_index; spx_word32_t best_dist; int have_sign; params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (i=0;i<nb_subvect;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,params->shape_bits+have_sign); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type) { FilterBank *bank; spx_word32_t df; spx_word32_t max_mel, mel_interval; int i; int id1; int id2; df = DIV32(SHL32(sampling,15),MULT16_16(2,len)); max_mel = toBARK(EXTRACT16(sampling/2)); mel_interval = PDIV32(max_mel,banks-1); bank = (FilterBank*)speex_alloc(sizeof(FilterBank)); bank->nb_banks = banks; bank->len = len; bank->bank_left = (int*)speex_alloc(len*sizeof(int)); bank->bank_right = (int*)speex_alloc(len*sizeof(int)); bank->filter_left = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); bank->filter_right = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ #ifndef FIXED_POINT bank->scaling = (float*)speex_alloc(banks*sizeof(float)); #endif for (i=0;i<len;i++) { spx_word16_t curr_freq; spx_word32_t mel; spx_word16_t val; curr_freq = EXTRACT16(MULT16_32_P15(i,df)); mel = toBARK(curr_freq); if (mel > max_mel) break; #ifdef FIXED_POINT id1 = DIV32(mel,mel_interval); #else id1 = (int)(floor(mel/mel_interval)); #endif if (id1>banks-2) { id1 = banks-2; val = Q15_ONE; } else { val = DIV32_16(mel - id1*mel_interval,EXTRACT16(PSHR32(mel_interval,15))); } id2 = id1+1; bank->bank_left[i] = id1; bank->filter_left[i] = SUB16(Q15_ONE,val); bank->bank_right[i] = id2; bank->filter_right[i] = val; } /* Think I can safely disable normalisation for fixed-point (and probably float as well) */ #ifndef FIXED_POINT for (i=0;i<bank->nb_banks;i++) bank->scaling[i] = 0; for (i=0;i<bank->len;i++) { int id = bank->bank_left[i]; bank->scaling[id] += bank->filter_left[i]; id = bank->bank_right[i]; bank->scaling[id] += bank->filter_right[i]; } for (i=0;i<bank->nb_banks;i++) bank->scaling[i] = Q15_ONE/(bank->scaling[i]); #endif return bank; }
int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t delta, char *stack) /* float *a lpc coefficients */ /* int lpcrdr order of LPC coefficients (10) */ /* float *freq LSP frequencies in the x domain */ /* int nb number of sub-intervals (4) */ /* float delta grid spacing interval (0.02) */ { spx_word16_t temp_xr,xl,xr,xm=0; spx_word32_t psuml,psumr,psumm,temp_psumr/*,temp_qsumr*/; int i,j,m,flag,k; #ifndef FIXED_LPC_SIZE VARDECL(spx_word32_t *Q); /* ptrs for memory allocation */ VARDECL(spx_word32_t *P); VARDECL(spx_word16_t *Q16); /* ptrs for memory allocation */ VARDECL(spx_word16_t *P16); #else spx_word32_t Q[(FIXED_LPC_SIZE/2)+1]; /* ptrs for memory allocation */ spx_word32_t P[(FIXED_LPC_SIZE/2)+1]; spx_word16_t Q16[(FIXED_LPC_SIZE/2)+1]; /* ptrs for memory allocation */ spx_word16_t P16[(FIXED_LPC_SIZE/2)+1]; #endif spx_word32_t *px; /* ptrs of respective P'(z) & Q'(z) */ spx_word32_t *qx; spx_word32_t *p; spx_word32_t *q; spx_word16_t *pt; /* ptr used for cheb_poly_eval() whether P' or Q' */ int roots=0; /* DR 8/2/94: number of roots found */ flag = 1; /* program is searching for a root when, 1 else has found one */ m = lpcrdr/2; /* order of P'(z) & Q'(z) polynomials */ #ifndef FIXED_LPC_SIZE /* Allocate memory space for polynomials */ ALLOC(Q, (m+1), spx_word32_t); ALLOC(P, (m+1), spx_word32_t); #endif /* determine P'(z)'s and Q'(z)'s coefficients where P'(z) = P(z)/(1 + z^(-1)) and Q'(z) = Q(z)/(1-z^(-1)) */ px = P; /* initialise ptrs */ qx = Q; p = px; q = qx; #ifdef FIXED_POINT *px++ = LPC_SCALING; *qx++ = LPC_SCALING; #ifndef FIXED_LPC_SIZE for(i=0;i<m;i++){ *px++ = SUB32(ADD32(EXTEND32(a[i]),EXTEND32(a[lpcrdr-i-1])), *p++); *qx++ = ADD32(SUB32(EXTEND32(a[i]),EXTEND32(a[lpcrdr-i-1])), *q++); } #else for(i=0;i<(FIXED_LPC_SIZE/2);i++){ *px++ = SUB32(ADD32(EXTEND32(a[i]),EXTEND32(a[FIXED_LPC_SIZE-i-1])), *p++); *qx++ = ADD32(SUB32(EXTEND32(a[i]),EXTEND32(a[FIXED_LPC_SIZE-i-1])), *q++); } #endif px = P; qx = Q; for(i=0;i<m;i++) { /*if (fabs(*px)>=32768) speex_warning_int("px", *px); if (fabs(*qx)>=32768) speex_warning_int("qx", *qx);*/ *px = PSHR32(*px,2); *qx = PSHR32(*qx,2); px++; qx++; } /* The reason for this lies in the way cheb_poly_eva() is implemented for fixed-point */ P[m] = PSHR32(P[m],3); Q[m] = PSHR32(Q[m],3); #else *px++ = LPC_SCALING; *qx++ = LPC_SCALING; for(i=0;i<m;i++){ *px++ = (a[i]+a[lpcrdr-1-i]) - *p++; *qx++ = (a[i]-a[lpcrdr-1-i]) + *q++; } px = P; qx = Q; for(i=0;i<m;i++){ *px = 2**px; *qx = 2**qx; px++; qx++; } #endif px = P; /* re-initialise ptrs */ qx = Q; /* now that we have computed P and Q convert to 16 bits to speed up cheb_poly_eval */ #ifndef FIXED_LPC_SIZE ALLOC(P16, m+1, spx_word16_t); ALLOC(Q16, m+1, spx_word16_t); #endif for (i=0;i<m+1;i++) { P16[i] = P[i]; Q16[i] = Q[i]; } /* Search for a zero in P'(z) polynomial first and then alternate to Q'(z). Keep alternating between the two polynomials as each zero is found */ xr = 0; /* initialise xr to zero */ xl = FREQ_SCALE; /* start at point xl = 1 */ for(j=0;j<lpcrdr;j++){ if(j&1) /* determines whether P' or Q' is eval. */ pt = Q16; else pt = P16; psuml = cheb_poly_eva(pt,xl,m,stack); /* evals poly. at xl */ flag = 1; while(flag && (xr >= -FREQ_SCALE)){ spx_word16_t dd; /* Modified by JMV to provide smaller steps around x=+-1 */ #ifdef FIXED_POINT dd = MULT16_16_Q15(delta,SUB16(FREQ_SCALE, MULT16_16_Q14(MULT16_16_Q14(xl,xl),14000))); if (psuml<512 && psuml>-512) dd = PSHR16(dd,1); #else dd=delta*(1-.9*xl*xl); if (fabs(psuml)<.2) dd *= .5; #endif xr = SUB16(xl, dd); /* interval spacing */ psumr = cheb_poly_eva(pt,xr,m,stack);/* poly(xl-delta_x) */ temp_psumr = psumr; temp_xr = xr; /* if no sign change increment xr and re-evaluate poly(xr). Repeat til sign change. if a sign change has occurred the interval is bisected and then checked again for a sign change which determines in which interval the zero lies in. If there is no sign change between poly(xm) and poly(xl) set interval between xm and xr else set interval between xl and xr and repeat till root is located within the specified limits */ if(SIGN_CHANGE(psumr,psuml)) { roots++; psumm=psuml; for(k=0;k<=nb;k++){ #ifdef FIXED_POINT xm = ADD16(PSHR16(xl,1),PSHR16(xr,1)); /* bisect the interval */ #else xm = .5*(xl+xr); /* bisect the interval */ #endif psumm=cheb_poly_eva(pt,xm,m,stack); /*if(psumm*psuml>0.)*/ if(!SIGN_CHANGE(psumm,psuml)) { psuml=psumm; xl=xm; } else { psumr=psumm; xr=xm; } } /* once zero is found, reset initial interval to xr */ freq[j] = X2ANGLE(xm); xl = xm; flag = 0; /* reset flag for next search */ } else{ psuml=temp_psumr; xl=temp_xr; } } } return(roots); }
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ static spx_word32_t pitch_gain_search_3tap( const spx_word16_t target[], /* Target vector */ const spx_coef_t ak[], /* LPCs for this subframe */ const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const signed char* gain_cdbk, int gain_cdbk_size, int pitch, /* Pitch value */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits* bits, char* stack, const spx_word16_t* exc2, const spx_word16_t* r, spx_word16_t* new_target, int* cdbk_index, int plc_tuning, spx_word32_t cumul_gain, int scaledown ) { int i, j; VARDECL(spx_word16_t * tmp1); VARDECL(spx_word16_t * e); spx_word16_t* x[3]; spx_word32_t corr[3]; spx_word32_t A[3][3]; spx_word16_t gain[3]; spx_word32_t err; spx_word16_t max_gain = 128; int best_cdbk = 0; ALLOC(tmp1, 3 * nsf, spx_word16_t); ALLOC(e, nsf, spx_word16_t); if (cumul_gain > 262144) max_gain = 31; x[0] = tmp1; x[1] = tmp1 + nsf; x[2] = tmp1 + 2 * nsf; for (j = 0; j < nsf; j++) new_target[j] = target[j]; { VARDECL(spx_mem_t * mm); int pp = pitch - 1; ALLOC(mm, p, spx_mem_t); for (j = 0; j < nsf; j++) { if (j - pp < 0) e[j] = exc2[j - pp]; else if (j - pp - pitch < 0) e[j] = exc2[j - pp - pitch]; else e[j] = 0; } #ifdef FIXED_POINT /* Scale target and excitation down if needed (avoiding overflow) */ if (scaledown) { for (j = 0; j < nsf; j++) e[j] = SHR16(e[j], 1); for (j = 0; j < nsf; j++) new_target[j] = SHR16(new_target[j], 1); } #endif for (j = 0; j < p; j++) mm[j] = 0; iir_mem16(e, ak, e, nsf, p, mm, stack); for (j = 0; j < p; j++) mm[j] = 0; filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack); for (j = 0; j < nsf; j++) x[2][j] = e[j]; } for (i = 1; i >= 0; i--) { spx_word16_t e0 = exc2[-pitch - 1 + i]; #ifdef FIXED_POINT /* Scale excitation down if needed (avoiding overflow) */ if (scaledown) e0 = SHR16(e0, 1); #endif x[i][0] = MULT16_16_Q14(r[0], e0); for (j = 0; j < nsf - 1; j++) x[i][j + 1] = ADD32(x[i + 1][j], MULT16_16_P14(r[j + 1], e0)); } for (i = 0; i < 3; i++) corr[i] = inner_prod(x[i], new_target, nsf); for (i = 0; i < 3; i++) for (j = 0; j <= i; j++) A[i][j] = A[j][i] = inner_prod(x[i], x[j], nsf); { spx_word32_t C[9]; #ifdef FIXED_POINT spx_word16_t C16[9]; #else spx_word16_t* C16 = C; #endif C[0] = corr[2]; C[1] = corr[1]; C[2] = corr[0]; C[3] = A[1][2]; C[4] = A[0][1]; C[5] = A[0][2]; C[6] = A[2][2]; C[7] = A[1][1]; C[8] = A[0][0]; /*plc_tuning *= 2;*/ if (plc_tuning < 2) plc_tuning = 2; if (plc_tuning > 30) plc_tuning = 30; #ifdef FIXED_POINT C[0] = SHL32(C[0], 1); C[1] = SHL32(C[1], 1); C[2] = SHL32(C[2], 1); C[3] = SHL32(C[3], 1); C[4] = SHL32(C[4], 1); C[5] = SHL32(C[5], 1); C[6] = MAC16_32_Q15(C[6], MULT16_16_16(plc_tuning, 655), C[6]); C[7] = MAC16_32_Q15(C[7], MULT16_16_16(plc_tuning, 655), C[7]); C[8] = MAC16_32_Q15(C[8], MULT16_16_16(plc_tuning, 655), C[8]); normalize16(C, C16, 32767, 9); #else C[6] *= .5 * (1 + .02 * plc_tuning); C[7] *= .5 * (1 + .02 * plc_tuning); C[8] *= .5 * (1 + .02 * plc_tuning); #endif best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain); #ifdef FIXED_POINT gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4]); gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 1]); gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 2]); /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/ #else gain[0] = 0.015625 * gain_cdbk[best_cdbk * 4] + .5; gain[1] = 0.015625 * gain_cdbk[best_cdbk * 4 + 1] + .5; gain[2] = 0.015625 * gain_cdbk[best_cdbk * 4 + 2] + .5; #endif *cdbk_index = best_cdbk; } SPEEX_MEMSET(exc, 0, nsf); for (i = 0; i < 3; i++) { int j; int tmp1, tmp3; int pp = pitch + 1 - i; tmp1 = nsf; if (tmp1 > pp) tmp1 = pp; for (j = 0; j < tmp1; j++) exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp]); tmp3 = nsf; if (tmp3 > pp + pitch) tmp3 = pp + pitch; for (j = tmp1; j < tmp3; j++) exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp - pitch]); } for (i = 0; i < nsf; i++) { spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0], x[2][i]), MULT16_16(gain[1], x[1][i])), MULT16_16(gain[2], x[0][i])); new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp, 6))); } err = inner_prod(new_target, new_target, nsf); return err; }
void lsp_to_lpc(const spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t **xq); VARDECL(spx_word32_t *xqmem); int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ ALLOC(xp, (m+1), spx_word32_t*); ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); ALLOC(xq, (m+1), spx_word32_t*); ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } /* work out 2cos terms in Q14 */ ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ for(j=1;j<=lpcrdr;j++) { int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } }
static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, const opus_val16 *eBands, opus_val16 *oldEBands, opus_int32 budget, opus_int32 tell, const unsigned char *prob_model, opus_val16 *error, ec_enc *enc, int C, int LM, int intra, opus_val16 max_decay) { int i, c; int badness = 0; opus_val32 prev[2] = {0,0}; opus_val16 coef; opus_val16 beta; if (tell+3 <= budget) ec_enc_bit_logp(enc, intra, 3); if (intra) { coef = 0; beta = beta_intra; } else { beta = beta_coef[LM]; coef = pred_coef[LM]; } /* Encode at a fixed coarse resolution */ for (i=start;i<end;i++) { c=0; do { int bits_left; int qi, qi0; opus_val32 q; opus_val16 x; opus_val32 f, tmp; opus_val16 oldE; opus_val16 decay_bound; x = eBands[i+c*m->nbEBands]; oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); #ifdef FIXED_POINT f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c]; /* Rounding to nearest integer here is really important! */ qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7); decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT), SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay))); #else f = x-coef*oldE-prev[c]; /* Rounding to nearest integer here is really important! */ qi = (int)floor(.5f+f); decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay; #endif /* Prevent the energy from going down too quickly (e.g. for bands that have just one bin) */ if (qi < 0 && x < decay_bound) { qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT); if (qi > 0) qi = 0; } qi0 = qi; /* If we don't have enough bits to encode all the energy, just assume something safe. */ tell = ec_tell(enc); bits_left = budget-tell-3*C*(end-i); if (i!=start && bits_left < 30) { if (bits_left < 24) qi = IMIN(1, qi); if (bits_left < 16) qi = IMAX(-1, qi); } if (budget-tell >= 15) { int pi; pi = 2*IMIN(i,20); ec_laplace_encode(enc, &qi, prob_model[pi]<<7, prob_model[pi+1]<<6); } else if(budget-tell >= 2) { qi = IMAX(-1, IMIN(qi, 1)); ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2); } else if(budget-tell >= 1) { qi = IMIN(0, qi); ec_enc_bit_logp(enc, -qi, 1); } else qi = -1; error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT); badness += abs(qi0-qi); q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7); #ifdef FIXED_POINT tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); #endif oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); } while (++c < C); } return badness; }
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ static spx_word64_t pitch_gain_search_3tap( const spx_sig_t target[], /* Target vector */ const spx_coef_t ak[], /* LPCs for this subframe */ const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void *par, int pitch, /* Pitch value */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, const spx_sig_t *exc2, const spx_word16_t *r, spx_sig_t *new_target, int *cdbk_index, int cdbk_offset, int plc_tuning ) { int i,j; VARDECL(spx_sig_t *tmp1); VARDECL(spx_sig_t *tmp2); spx_sig_t *x[3]; spx_sig_t *e[3]; spx_word32_t corr[3]; spx_word32_t A[3][3]; int gain_cdbk_size; const signed char *gain_cdbk; spx_word16_t gain[3]; spx_word64_t err; const ltp_params *params; params = (const ltp_params*) par; gain_cdbk_size = 1<<params->gain_bits; gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset; ALLOC(tmp1, 3*nsf, spx_sig_t); ALLOC(tmp2, 3*nsf, spx_sig_t); x[0]=tmp1; x[1]=tmp1+nsf; x[2]=tmp1+2*nsf; e[0]=tmp2; e[1]=tmp2+nsf; e[2]=tmp2+2*nsf; for (i=2; i>=0; i--) { int pp=pitch+1-i; for (j=0; j<nsf; j++) { if (j-pp<0) e[i][j]=exc2[j-pp]; else if (j-pp-pitch<0) e[i][j]=exc2[j-pp-pitch]; else e[i][j]=0; } if (i==2) syn_percep_zero(e[i], ak, awk1, awk2, x[i], nsf, p, stack); else { for (j=0; j<nsf-1; j++) x[i][j+1]=x[i+1][j]; x[i][0]=0; for (j=0; j<nsf; j++) { x[i][j]=ADD32(x[i][j],SHL32(MULT16_32_Q15(r[j], e[i][0]),1)); } } } #ifdef FIXED_POINT { /* If using fixed-point, we need to normalize the signals first */ spx_word16_t *y[3]; VARDECL(spx_word16_t *ytmp); VARDECL(spx_word16_t *t); spx_sig_t max_val=1; int sig_shift; ALLOC(ytmp, 3*nsf, spx_word16_t); #if 0 ALLOC(y[0], nsf, spx_word16_t); ALLOC(y[1], nsf, spx_word16_t); ALLOC(y[2], nsf, spx_word16_t); #else y[0] = ytmp; y[1] = ytmp+nsf; y[2] = ytmp+2*nsf; #endif ALLOC(t, nsf, spx_word16_t); for (j=0; j<3; j++) { for (i=0; i<nsf; i++) { spx_sig_t tmp = x[j][i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } } for (i=0; i<nsf; i++) { spx_sig_t tmp = target[i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } sig_shift=0; while (max_val>16384) { sig_shift++; max_val >>= 1; } for (j=0; j<3; j++) { for (i=0; i<nsf; i++) { y[j][i] = EXTRACT16(SHR32(x[j][i],sig_shift)); } } for (i=0; i<nsf; i++) { t[i] = EXTRACT16(SHR32(target[i],sig_shift)); } for (i=0; i<3; i++) corr[i]=inner_prod(y[i],t,nsf); for (i=0; i<3; i++) for (j=0; j<=i; j++) A[i][j]=A[j][i]=inner_prod(y[i],y[j],nsf); } #else { for (i=0; i<3; i++) corr[i]=inner_prod(x[i],target,nsf); for (i=0; i<3; i++) for (j=0; j<=i; j++) A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf); } #endif { spx_word32_t C[9]; const signed char *ptr=gain_cdbk; int best_cdbk=0; spx_word32_t best_sum=0; C[0]=corr[2]; C[1]=corr[1]; C[2]=corr[0]; C[3]=A[1][2]; C[4]=A[0][1]; C[5]=A[0][2]; C[6]=A[2][2]; C[7]=A[1][1]; C[8]=A[0][0]; /*plc_tuning *= 2;*/ if (plc_tuning<2) plc_tuning=2; #ifdef FIXED_POINT C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]); C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]); C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]); #else C[0]*=1-.01*plc_tuning; C[1]*=1-.01*plc_tuning; C[2]*=1-.01*plc_tuning; C[6]*=.5*(1+.01*plc_tuning); C[7]*=.5*(1+.01*plc_tuning); C[8]*=.5*(1+.01*plc_tuning); #endif for (i=0; i<gain_cdbk_size; i++) { spx_word32_t sum=0; spx_word16_t g0,g1,g2; spx_word16_t pitch_control=64; spx_word16_t gain_sum; ptr = gain_cdbk+3*i; g0=ADD16((spx_word16_t)ptr[0],32); g1=ADD16((spx_word16_t)ptr[1],32); g2=ADD16((spx_word16_t)ptr[2],32); gain_sum = g1; if (g0>0) gain_sum += g0; if (g2>0) gain_sum += g2; if (gain_sum > 64) { gain_sum = SUB16(gain_sum, 64); if (gain_sum > 127) gain_sum = 127; #ifdef FIXED_POINT pitch_control = SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10))); #else pitch_control = 64*(1.-.001*plc_tuning*gain_sum); #endif if (pitch_control < 0) pitch_control = 0; } sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g0,pitch_control),C[0])); sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g1,pitch_control),C[1])); sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g2,pitch_control),C[2])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g0,g1),C[3])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g1),C[4])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g0),C[5])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g0,g0),C[6])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g1,g1),C[7])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g2,g2),C[8])); /* We could force "safe" pitch values to handle packet loss better */ if (sum>best_sum || i==0) { best_sum=sum; best_cdbk=i; } } #ifdef FIXED_POINT gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]); gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]); gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]); /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/ #else gain[0] = 0.015625*gain_cdbk[best_cdbk*3] + .5; gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5; gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5; #endif *cdbk_index=best_cdbk; } #ifdef FIXED_POINT for (i=0; i<nsf; i++) exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])), MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2); err=0; for (i=0; i<nsf; i++) { spx_word16_t perr2; spx_sig_t tmp = SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),x[2][i]),MULT16_32_Q15(SHL16(gain[1],7),x[1][i])), MULT16_32_Q15(SHL16(gain[2],7),x[0][i])),2); spx_sig_t perr=SUB32(target[i],tmp); new_target[i] = SUB32(target[i], tmp); perr2 = EXTRACT16(PSHR32(perr,15)); err = ADD64(err,MULT16_16(perr2,perr2)); } #else for (i=0; i<nsf; i++) exc[i]=gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i]; err=0; for (i=0; i<nsf; i++) { spx_sig_t tmp = gain[2]*x[0][i]+gain[1]*x[1][i]+gain[0]*x[2][i]; new_target[i] = target[i] - tmp; err+=new_target[i]*new_target[i]; } #endif return err; }
void split_cb_search_shape_sign( spx_sig_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_sig_t *r2); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); int have_sign; N=complexity; if (N>10) N=10; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,complexity,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(r2, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; for (j=0;j<nb_subvect;j++) nind[i][j]=oind[i][j]=-1; } /* FIXME: make that adaptive? */ for (i=0;i<nsf;i++) t[i]=EXTRACT16(PSHR32(target[i],6)); for (j=0;j<N;j++) for (i=0;i<nsf;i++) ot[j][i]=t[i]; /*for (i=0;i<nsf;i++) printf ("%d\n", (int)t[i]);*/ /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=-2; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; /*Find new n-best based on previous n-best j*/ if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { spx_word16_t *ct; spx_word32_t err=0; ct = ot[j]; /*update target*/ /*previous target*/ for (m=i*subvect_size;m<(i+1)*subvect_size;m++) t[m]=ct[m]; /* New code: update only enough of the target to calculate error*/ { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); } /*compute error (distance)*/ err=odist[j]; for (m=i*subvect_size;m<(i+1)*subvect_size;m++) err = MAC16_16(err, t[m],t[m]); /*update n-best list*/ if (err<ndist[N-1] || ndist[N-1]<-1) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) t[m]=ct[m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],MULT16_16_Q11_32(g,r[q])); #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],g*r[q]); #endif } for (m=0;m<N;m++) { if (err < ndist[m] || ndist[m]<-1) { for (n=N-1;n>m;n--) { for (q=(i+1)*subvect_size;q<nsf;q++) nt[n][q]=nt[n-1][q]; for (q=0;q<nb_subvect;q++) nind[n][q]=nind[n-1][q]; ndist[n]=ndist[n-1]; } for (q=(i+1)*subvect_size;q<nsf;q++) nt[m][q]=t[q]; for (q=0;q<nb_subvect;q++) nind[m][q]=oind[j][q]; nind[m][i]=best_index[k]; ndist[m]=err; break; } } } } if (i==0) break; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB32(target[j],r2[j]); } }
/** Performs echo cancellation on a frame */ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { int i,j, chan, speak; int N,M, C, K; spx_word32_t Syy,See,Sxx,Sdd, Sff; #ifdef TWO_PATH spx_word32_t Dbf; int update_foreground; #endif spx_word32_t Sey; spx_word16_t ss, ss_1; spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; spx_float_t alpha, alpha_1; spx_word16_t RER; spx_word32_t tmp32; N = st->window_size; M = st->M; C = st->C; K = st->K; st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); ss_1 = SUB16(32767,ss); #else ss=.35/M; ss_1 = 1-ss; #endif for (chan = 0; chan < C; chan++) { /* Apply a notch filter to make sure DC doesn't end up causing problems */ filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); /* Copy input data to buffer and apply pre-emphasis */ /* Copy input data to buffer */ for (i=0;i<st->frame_size;i++) { spx_word32_t tmp32; /* FIXME: This core has changed a bit, need to merge properly */ tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); #ifdef FIXED_POINT if (tmp32 > 32767) { tmp32 = 32767; if (st->saturated == 0) st->saturated = 1; } if (tmp32 < -32767) { tmp32 = -32767; if (st->saturated == 0) st->saturated = 1; } #endif st->memD[chan] = st->input[chan*st->frame_size+i]; st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); } } for (speak = 0; speak < K; speak++) { for (i=0;i<st->frame_size;i++) { spx_word32_t tmp32; st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); #ifdef FIXED_POINT /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ if (tmp32 > 32767) { tmp32 = 32767; st->saturated = M+1; } if (tmp32 < -32767) { tmp32 = -32767; st->saturated = M+1; } #endif st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); st->memX[speak] = far_end[i*K+speak]; } } for (speak = 0; speak < K; speak++) { /* Shift memory: this could be optimized eventually*/ for (j=M-1;j>=0;j--) { for (i=0;i<N;i++) st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; } /* Convert x (echo input) to frequency domain */ spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); } Sxx = 0; for (speak = 0; speak < K; speak++) { Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); power_spectrum_accum(st->X+speak*N, st->Xf, N); } Sff = 0; for (chan = 0; chan < C; chan++) { #ifdef TWO_PATH /* Compute foreground filter */ spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); #endif } /* Adjust proportional adaption rate */ /* FIXME: Adjust that for C, K*/ if (st->adapted) mdf_adjust_prop (st->W, N, M, C*K, st->prop); /* Compute weight gradient */ if (st->saturated == 0) { for (chan = 0; chan < C; chan++) { for (speak = 0; speak < K; speak++) { for (j=M-1;j>=0;j--) { weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); for (i=0;i<N;i++) st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; } } } } else { st->saturated--; } /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ for (chan = 0; chan < C; chan++) { for (speak = 0; speak < K; speak++) { for (j=0;j<M;j++) { /* This is a variant of the Alternatively Updated MDF (AUMDF) */ /* Remove the "if" to make this an MDF filter */ if (j==0 || st->cancel_count%(M-1) == j-1) { #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); for (i=0;i<st->frame_size;i++) { st->wtmp[i]=0; } for (i=st->frame_size;i<N;i++) { st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); } spx_fft(st->fft_table, st->wtmp, st->wtmp2); /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ for (i=0;i<N;i++) st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); for (i=st->frame_size;i<N;i++) { st->wtmp[i]=0; } spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); #endif } } } } /* So we can use power_spectrum_accum */ for (i=0;i<=st->frame_size;i++) st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; Dbf = 0; See = 0; #ifdef TWO_PATH /* Difference in response, this is used to estimate the variance of our residual power estimate */ for (chan = 0; chan < C; chan++) { spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); } #endif #ifndef TWO_PATH Sff = See; #endif #ifdef TWO_PATH /* Logic for updating the foreground filter */ /* For two time windows, compute the mean of the energy difference, as well as the variance */ st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); /* Equivalent float code: st->Davg1 = .6*st->Davg1 + .4*(Sff-See); st->Davg2 = .85*st->Davg2 + .15*(Sff-See); st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; */ update_foreground = 0; /* Check if we have a statistically significant reduction in the residual echo */ /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) update_foreground = 1; else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) update_foreground = 1; else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) update_foreground = 1; /* Do we update? */ if (update_foreground) { st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; /* Copy background filter to foreground filter */ for (i=0;i<N*M*C*K;i++) st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); /* Apply a smooth transition so as to not introduce blocking artifacts */ for (chan = 0; chan < C; chan++) for (i=0;i<st->frame_size;i++) st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); } else { int reset_background=0; /* Otherwise, check if the background filter is significantly worse */ if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) reset_background = 1; if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) reset_background = 1; if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) reset_background = 1; if (reset_background) { /* Copy foreground filter to background filter */ for (i=0;i<N*M*C*K;i++) st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); /* We also need to copy the output so as to get correct adaptation */ for (chan = 0; chan < C; chan++) { for (i=0;i<st->frame_size;i++) st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); } See = Sff; st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; } } #endif Sey = Syy = Sdd = 0; for (chan = 0; chan < C; chan++) { /* Compute error signal (for the output with de-emphasis) */ for (i=0;i<st->frame_size;i++) { spx_word32_t tmp_out; #ifdef TWO_PATH tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); #else tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); #endif tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); /* This is an arbitrary test for saturation in the microphone signal */ if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) { if (st->saturated == 0) st->saturated = 1; } out[i*C+chan] = WORD2INT(tmp_out); st->memE[chan] = tmp_out; } #ifdef DUMP_ECHO_CANCEL_DATA dump_audio(in, far_end, out, st->frame_size); #endif /* Compute error signal (filter update version) */ for (i=0;i<st->frame_size;i++) { st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; st->e[chan*N+i] = 0; } /* Compute a bunch of correlations */ /* FIXME: bad merge */ Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); for (i=0;i<st->frame_size;i++) st->y[i+chan*N] = 0; spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ power_spectrum_accum(st->E+chan*N, st->Rf, N); power_spectrum_accum(st->Y+chan*N, st->Yf, N); } /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ /* Do some sanity check */ if (!(Syy>=0 && Sxx>=0 && See >= 0) #ifndef FIXED_POINT || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) #endif ) { /* Things have gone really bad */ st->screwed_up += 50; for (i=0;i<st->frame_size*C;i++) out[i] = 0; } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) { /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ st->screwed_up++; } else { /* Everything's fine */ st->screwed_up=0; } if (st->screwed_up>=50) { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); speex_echo_state_reset(st); return; } /* Add a small noise floor to make sure not to have problems when dividing */ See = MAX32(See, SHR32(MULT16_16(N, 100),6)); for (speak = 0; speak < K; speak++) { Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); power_spectrum_accum(st->X+speak*N, st->Xf, N); } /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); /* Compute filtered spectra and (cross-)correlations */ for (j=st->frame_size;j>=0;j--) { spx_float_t Eh, Yh; Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); #ifdef FIXED_POINT st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); #else st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; #endif } Pyy = FLOAT_SQRT(Pyy); Pey = FLOAT_DIVU(Pey,Pyy); /* Compute correlation updatete rate */ tmp32 = MULT16_32_Q15(st->beta0,Syy); if (tmp32 > MULT16_32_Q15(st->beta_max,See)) tmp32 = MULT16_32_Q15(st->beta_max,See); alpha = FLOAT_DIV32(tmp32, See); alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); /* Update correlations (recursive average) */ st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); if (FLOAT_LT(st->Pyy, FLOAT_ONE)) st->Pyy = FLOAT_ONE; /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; /* leak_estimate is the linear regression result */ st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ if (st->leak_estimate > 16383) st->leak_estimate = 32767; else st->leak_estimate = SHL16(st->leak_estimate,1); /*printf ("%f\n", st->leak_estimate);*/ /* Compute Residual to Error Ratio */ #ifdef FIXED_POINT tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); /* Check for y in e (lower bound on RER) */ { spx_float_t bound = PSEUDOFLOAT(Sey); bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); if (FLOAT_GT(bound, PSEUDOFLOAT(See))) tmp32 = See; else if (tmp32 < FLOAT_EXTRACT32(bound)) tmp32 = FLOAT_EXTRACT32(bound); } if (tmp32 > SHR32(See,1)) tmp32 = SHR32(See,1); RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); #else RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; /* Check for y in e (lower bound on RER) */ if (RER < Sey*Sey/(1+See*Syy)) RER = Sey*Sey/(1+See*Syy); if (RER > .5) RER = .5; #endif /* We consider that the filter has had minimal adaptation if the following is true*/ if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) { st->adapted = 1; } if (st->adapted) { /* Normal learning rate calculation once we're past the minimal adaptation phase */ for (i=0;i<=st->frame_size;i++) { spx_word32_t r, e; /* Compute frequency-domain adaptation mask */ r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); e = SHL32(st->Rf[i],3)+1; #ifdef FIXED_POINT if (r>SHR32(e,1)) r = SHR32(e,1); #else if (r>.5*e) r = .5*e; #endif r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); } } else { /* Temporary adaption rate if filter is not yet adapted enough */ spx_word16_t adapt_rate=0; if (Sxx > SHR32(MULT16_16(N, 1000),6)) { tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); #ifdef FIXED_POINT if (tmp32 > SHR32(See,2)) tmp32 = SHR32(See,2); #else if (tmp32 > .25*See) tmp32 = .25*See; #endif adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); } for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); /* How much have we adapted so far? */ st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } /* FIXME: MC conversion required */ for (i=0;i<st->frame_size;i++) st->last_y[i] = st->last_y[st->frame_size+i]; if (st->adapted) { /* If the filter is adapted, take the filtered echo */ for (i=0;i<st->frame_size;i++) st->last_y[st->frame_size+i] = in[i]-out[i]; } else { /* If filter isn't adapted yet, all we can do is take the far end signal directly */ /* moved earlier: for (i=0;i<N;i++) st->last_y[i] = st->x[i];*/ } }
void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin1,xin2; VARDECL(spx_word32_t *Wp); spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL; VARDECL(spx_word16_t *freqn); int m = lpcrdr>>1; ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); ALLOC(Wp, 4*m+2, spx_word32_t); pw = Wp; /* initialise contents of array */ for(i=0;i<=4*m+1;i++){ /* set contents of buffer to 0 */ *pw++ = 0; } /* Set pointers up */ pw = Wp; xin1 = 1048576; xin2 = 1048576; /* reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */ for(j=0;j<=lpcrdr;j++){ spx_word16_t *fr=freqn; for(i=0;i<m;i++){ n1 = pw+(i<<2); n2 = n1 + 1; n3 = n2 + 1; n4 = n3 + 1; xout1 = ADD32(SUB32(xin1, MULT16_32_Q14(*fr,*n1)), *n2); fr++; xout2 = ADD32(SUB32(xin2, MULT16_32_Q14(*fr,*n3)), *n4); fr++; *n2 = *n1; *n4 = *n3; *n1 = xin1; *n3 = xin2; xin1 = xout1; xin2 = xout2; } xout1 = xin1 + *(n4+1); xout2 = xin2 - *(n4+2); /* FIXME: perhaps apply bandwidth expansion in case of overflow? */ /*FIXME: Is it OK to have a long constant? */ if (xout1 + xout2>SHL(32766,8)) ak[j] = 32767; else if (xout1 + xout2 < -SHL(32766,8)) ak[j] = -32767; else ak[j] = EXTRACT16(PSHR32(ADD32(xout1,xout2),8)); *(n4+1) = xin1; *(n4+2) = xin2; xin1 = 0; xin2 = 0; } }