/* FIXME: These functions are ugly and probably introduce too much error */ void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) { int i; for (i=0;i<len;i++) { y[i] = SHL(MULT16_32_Q14(SHR(x[i],7),scale),7); } }
void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin1,xin2; spx_word32_t *Wp; spx_word32_t *pw,*n1,*n2,*n3,*n4=NULL; spx_word16_t *freqn; int m = lpcrdr>>1; freqn = PUSH(stack, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); Wp = PUSH(stack, 4*m+2, spx_word32_t); pw = Wp; /* initialise contents of array */ for(i=0;i<=4*m+1;i++){ /* set contents of buffer to 0 */ *pw++ = 0; } /* Set pointers up */ pw = Wp; xin1 = 1048576; xin2 = 1048576; /* reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */ for(j=0;j<=lpcrdr;j++){ spx_word16_t *fr=freqn; for(i=0;i<m;i++){ n1 = pw+(i<<2); n2 = n1 + 1; n3 = n2 + 1; n4 = n3 + 1; xout1 = ADD32(SUB32(xin1, MULT16_32_Q14(*fr,*n1)), *n2); fr++; xout2 = ADD32(SUB32(xin2, MULT16_32_Q14(*fr,*n3)), *n4); fr++; *n2 = *n1; *n4 = *n3; *n1 = xin1; *n3 = xin2; xin1 = xout1; xin2 = xout2; } xout1 = xin1 + *(n4+1); xout2 = xin2 - *(n4+2); /* FIXME: perhaps apply bandwidth expansion in case of overflow? */ if (xout1 + xout2>256*32766) ak[j] = 32767; else if (xout1 + xout2 < -256*32767) ak[j] = -32768; else ak[j] = PSHR(ADD32(xout1,xout2),8); *(n4+1) = xin1; *(n4+2) = xin2; xin1 = 0; xin2 = 0; } }
void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t *xqmem); #ifndef FIXED_LPC_SIZE VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t **xq); #else spx_word16_t freqn[FIXED_LPC_SIZE]; spx_word32_t *xp[(FIXED_LPC_SIZE/2)+1]; spx_word32_t *xq[(FIXED_LPC_SIZE/2)+1]; #endif int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ #ifndef FIXED_LPC_SIZE ALLOC(xp, (m+1), spx_word32_t*); #endif ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE ALLOC(xq, (m+1), spx_word32_t*); #endif ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } #else for(i=0; i<=m; i++) { xp[i] = xpmem + i*(FIXED_LPC_SIZE+1+2); xq[i] = xqmem + i*(FIXED_LPC_SIZE+1+2); } #endif /* work out 2cos terms in Q14 */ #ifndef FIXED_LPC_SIZE ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #else for (i=0;i<FIXED_LPC_SIZE;i++) freqn[i] = ANGLE2X(freq[i]); #endif #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ #ifndef FIXED_LPC_SIZE for(j=1;j<=lpcrdr;j++) { #else for(j=1;j<=FIXED_LPC_SIZE;j++) { #endif int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } } #else void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; float xout1,xout2,xin1,xin2; VARDECL(float *Wp); float *pw,*n1,*n2,*n3,*n4=NULL; VARDECL(float *x_freq); int m = lpcrdr>>1; ALLOC(Wp, 4*m+2, float); pw = Wp; /* initialise contents of array */ for(i=0;i<=4*m+1;i++){ /* set contents of buffer to 0 */ *pw++ = 0.0; } /* Set pointers up */ pw = Wp; xin1 = 1.0; xin2 = 1.0; ALLOC(x_freq, lpcrdr, float); for (i=0;i<lpcrdr;i++) x_freq[i] = ANGLE2X(freq[i]); /* reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */ for(j=0;j<=lpcrdr;j++){ int i2=0; for(i=0;i<m;i++,i2+=2){ n1 = pw+(i*4); n2 = n1 + 1; n3 = n2 + 1; n4 = n3 + 1; xout1 = xin1 - 2.f*x_freq[i2] * *n1 + *n2; xout2 = xin2 - 2.f*x_freq[i2+1] * *n3 + *n4; *n2 = *n1; *n4 = *n3; *n1 = xin1; *n3 = xin2; xin1 = xout1; xin2 = xout2; } xout1 = xin1 + *(n4+1); xout2 = xin2 - *(n4+2); if (j>0) ak[j-1] = (xout1 + xout2)*0.5f; *(n4+1) = xin1; *(n4+2) = xin2; xin1 = 0.0; xin2 = 0.0; } }
void lsp_to_lpc(const spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t **xq); VARDECL(spx_word32_t *xqmem); int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ ALLOC(xp, (m+1), spx_word32_t*); ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); ALLOC(xq, (m+1), spx_word32_t*); ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } /* work out 2cos terms in Q14 */ ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ for(j=1;j<=lpcrdr;j++) { int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } }
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ static spx_word64_t pitch_gain_search_3tap( const spx_sig_t target[], /* Target vector */ const spx_coef_t ak[], /* LPCs for this subframe */ const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void *par, int pitch, /* Pitch value */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits *bits, char *stack, const spx_sig_t *exc2, const spx_word16_t *r, spx_sig_t *new_target, int *cdbk_index, int cdbk_offset, int plc_tuning ) { int i,j; VARDECL(spx_sig_t *tmp1); VARDECL(spx_sig_t *tmp2); spx_sig_t *x[3]; spx_sig_t *e[3]; spx_word32_t corr[3]; spx_word32_t A[3][3]; int gain_cdbk_size; const signed char *gain_cdbk; spx_word16_t gain[3]; spx_word64_t err; const ltp_params *params; params = (const ltp_params*) par; gain_cdbk_size = 1<<params->gain_bits; gain_cdbk = params->gain_cdbk + 3*gain_cdbk_size*cdbk_offset; ALLOC(tmp1, 3*nsf, spx_sig_t); ALLOC(tmp2, 3*nsf, spx_sig_t); x[0]=tmp1; x[1]=tmp1+nsf; x[2]=tmp1+2*nsf; e[0]=tmp2; e[1]=tmp2+nsf; e[2]=tmp2+2*nsf; for (i=2; i>=0; i--) { int pp=pitch+1-i; for (j=0; j<nsf; j++) { if (j-pp<0) e[i][j]=exc2[j-pp]; else if (j-pp-pitch<0) e[i][j]=exc2[j-pp-pitch]; else e[i][j]=0; } if (i==2) syn_percep_zero(e[i], ak, awk1, awk2, x[i], nsf, p, stack); else { for (j=0; j<nsf-1; j++) x[i][j+1]=x[i+1][j]; x[i][0]=0; for (j=0; j<nsf; j++) { x[i][j]=ADD32(x[i][j],SHL32(MULT16_32_Q15(r[j], e[i][0]),1)); } } } #ifdef FIXED_POINT { /* If using fixed-point, we need to normalize the signals first */ spx_word16_t *y[3]; VARDECL(spx_word16_t *ytmp); VARDECL(spx_word16_t *t); spx_sig_t max_val=1; int sig_shift; ALLOC(ytmp, 3*nsf, spx_word16_t); #if 0 ALLOC(y[0], nsf, spx_word16_t); ALLOC(y[1], nsf, spx_word16_t); ALLOC(y[2], nsf, spx_word16_t); #else y[0] = ytmp; y[1] = ytmp+nsf; y[2] = ytmp+2*nsf; #endif ALLOC(t, nsf, spx_word16_t); for (j=0; j<3; j++) { for (i=0; i<nsf; i++) { spx_sig_t tmp = x[j][i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } } for (i=0; i<nsf; i++) { spx_sig_t tmp = target[i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } sig_shift=0; while (max_val>16384) { sig_shift++; max_val >>= 1; } for (j=0; j<3; j++) { for (i=0; i<nsf; i++) { y[j][i] = EXTRACT16(SHR32(x[j][i],sig_shift)); } } for (i=0; i<nsf; i++) { t[i] = EXTRACT16(SHR32(target[i],sig_shift)); } for (i=0; i<3; i++) corr[i]=inner_prod(y[i],t,nsf); for (i=0; i<3; i++) for (j=0; j<=i; j++) A[i][j]=A[j][i]=inner_prod(y[i],y[j],nsf); } #else { for (i=0; i<3; i++) corr[i]=inner_prod(x[i],target,nsf); for (i=0; i<3; i++) for (j=0; j<=i; j++) A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf); } #endif { spx_word32_t C[9]; const signed char *ptr=gain_cdbk; int best_cdbk=0; spx_word32_t best_sum=0; C[0]=corr[2]; C[1]=corr[1]; C[2]=corr[0]; C[3]=A[1][2]; C[4]=A[0][1]; C[5]=A[0][2]; C[6]=A[2][2]; C[7]=A[1][1]; C[8]=A[0][0]; /*plc_tuning *= 2;*/ if (plc_tuning<2) plc_tuning=2; #ifdef FIXED_POINT C[0] = MAC16_32_Q15(C[0],MULT16_16_16(plc_tuning,-327),C[0]); C[1] = MAC16_32_Q15(C[1],MULT16_16_16(plc_tuning,-327),C[1]); C[2] = MAC16_32_Q15(C[2],MULT16_16_16(plc_tuning,-327),C[2]); #else C[0]*=1-.01*plc_tuning; C[1]*=1-.01*plc_tuning; C[2]*=1-.01*plc_tuning; C[6]*=.5*(1+.01*plc_tuning); C[7]*=.5*(1+.01*plc_tuning); C[8]*=.5*(1+.01*plc_tuning); #endif for (i=0; i<gain_cdbk_size; i++) { spx_word32_t sum=0; spx_word16_t g0,g1,g2; spx_word16_t pitch_control=64; spx_word16_t gain_sum; ptr = gain_cdbk+3*i; g0=ADD16((spx_word16_t)ptr[0],32); g1=ADD16((spx_word16_t)ptr[1],32); g2=ADD16((spx_word16_t)ptr[2],32); gain_sum = g1; if (g0>0) gain_sum += g0; if (g2>0) gain_sum += g2; if (gain_sum > 64) { gain_sum = SUB16(gain_sum, 64); if (gain_sum > 127) gain_sum = 127; #ifdef FIXED_POINT pitch_control = SUB16(64,EXTRACT16(PSHR32(MULT16_16(64,MULT16_16_16(plc_tuning, gain_sum)),10))); #else pitch_control = 64*(1.-.001*plc_tuning*gain_sum); #endif if (pitch_control < 0) pitch_control = 0; } sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g0,pitch_control),C[0])); sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g1,pitch_control),C[1])); sum = ADD32(sum,MULT16_32_Q14(MULT16_16_16(g2,pitch_control),C[2])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g0,g1),C[3])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g1),C[4])); sum = SUB32(sum,MULT16_32_Q14(MULT16_16_16(g2,g0),C[5])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g0,g0),C[6])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g1,g1),C[7])); sum = SUB32(sum,MULT16_32_Q15(MULT16_16_16(g2,g2),C[8])); /* We could force "safe" pitch values to handle packet loss better */ if (sum>best_sum || i==0) { best_sum=sum; best_cdbk=i; } } #ifdef FIXED_POINT gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3]); gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+1]); gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*3+2]); /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/ #else gain[0] = 0.015625*gain_cdbk[best_cdbk*3] + .5; gain[1] = 0.015625*gain_cdbk[best_cdbk*3+1]+ .5; gain[2] = 0.015625*gain_cdbk[best_cdbk*3+2]+ .5; #endif *cdbk_index=best_cdbk; } #ifdef FIXED_POINT for (i=0; i<nsf; i++) exc[i]=SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),e[2][i]), MULT16_32_Q15(SHL16(gain[1],7),e[1][i])), MULT16_32_Q15(SHL16(gain[2],7),e[0][i])), 2); err=0; for (i=0; i<nsf; i++) { spx_word16_t perr2; spx_sig_t tmp = SHL32(ADD32(ADD32(MULT16_32_Q15(SHL16(gain[0],7),x[2][i]),MULT16_32_Q15(SHL16(gain[1],7),x[1][i])), MULT16_32_Q15(SHL16(gain[2],7),x[0][i])),2); spx_sig_t perr=SUB32(target[i],tmp); new_target[i] = SUB32(target[i], tmp); perr2 = EXTRACT16(PSHR32(perr,15)); err = ADD64(err,MULT16_16(perr2,perr2)); } #else for (i=0; i<nsf; i++) exc[i]=gain[0]*e[2][i]+gain[1]*e[1][i]+gain[2]*e[0][i]; err=0; for (i=0; i<nsf; i++) { spx_sig_t tmp = gain[2]*x[0][i]+gain[1]*x[1][i]+gain[0]*x[2][i]; new_target[i] = target[i] - tmp; err+=new_target[i]*new_target[i]; } #endif return err; }