static inline spx_word32_t cheb_poly_eva( spx_word16_t *coef, /* P or Q coefs in Q13 format */ spx_word16_t x, /* cos of freq (-1.0 to 1.0) in Q14 format */ int m, /* LPC order/2 */ char *stack ) { int i; spx_word16_t b0, b1; spx_word32_t sum; /*Prevents overflows*/ if (x>16383) x = 16383; if (x<-16383) x = -16383; /* Initialise values */ b1=16384; b0=x; /* Evaluate Chebyshev series formulation usin g iterative approach */ sum = ADD32(EXTEND32(coef[m]), EXTEND32(MULT16_16_P14(coef[m-1],x))); for(i=2;i<=m;i++) { spx_word16_t tmp=b0; b0 = SUB16(MULT16_16_Q13(x,b0), b1); b1 = tmp; sum = ADD32(sum, EXTEND32(MULT16_16_P14(coef[m-i],b0))); } return sum; }
/* Computes a rough approximation of log2(2^a + 2^b) */ static opus_val16 logSum(opus_val16 a, opus_val16 b) { opus_val16 max; opus_val32 diff; opus_val16 frac; static const opus_val16 diff_table[17] = { QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), QCONST16(0.0028123f, DB_SHIFT) }; int low; if (a>b) { max = a; diff = SUB32(EXTEND32(a),EXTEND32(b)); } else { max = b; diff = SUB32(EXTEND32(b),EXTEND32(a)); } if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ return max; #ifdef FIXED_POINT low = SHR32(diff, DB_SHIFT-1); frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); #else low = (int)floor(2*diff); frac = 2*diff - low; #endif return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); }
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { static const int SPREAD_FACTOR[3]= {15,10,5}; int i; opus_val16 c, s; opus_val16 gain, theta; int stride2=0; int factor; if (2*K>=len || spread==SPREAD_NONE) return; factor = SPREAD_FACTOR[spread-1]; gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); theta = HALF16(MULT16_16_Q15(gain,gain)); c = celt_cos_norm(EXTEND32(theta)); s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ if (len>=8*stride) { stride2 = 1; /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ while ((stride2*stride2+stride2)*stride + (stride>>2) < len) stride2++; }
void split_cb_shape_sign_unquant( spx_sig_t *exc, const void *par, /* non-overlapping codebook */ int nsf, /* number of samples in subframe */ SpeexBits *bits, char *stack, spx_int32_t *seed ) { int i,j; VARDECL(int *ind); VARDECL(int *signs); const signed char *shape_cb; //int shape_cb_size; int subvect_size, nb_subvect; const split_cb_params *params; int have_sign; params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; //shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(ind, nb_subvect, int); ALLOC(signs, nb_subvect, int); /* Decode codewords and gains */ for (i=0;i<nb_subvect;i++) { if (have_sign) signs[i] = speex_bits_unpack_unsigned(bits, 1); else signs[i] = 0; ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits); } /* Compute decoded excitation */ for (i=0;i<nb_subvect;i++) { spx_word16_t s=1; if (signs[i]) s=-1; #ifdef FIXED_POINT if (s==1) { for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j]; #endif } }
EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { case SPEEX_ECHO_GET_FRAME_SIZE: (*(int*)ptr) = st->frame_size; break; case SPEEX_ECHO_SET_SAMPLING_RATE: st->sampling_rate = (*(int*)ptr); st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); break; case SPEEX_ECHO_GET_SAMPLING_RATE: (*(int*)ptr) = st->sampling_rate; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: /*FIXME: Implement this for multiple channels */ *((spx_int32_t *)ptr) = st->M * st->frame_size; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE: { int M = st->M, N = st->window_size, n = st->frame_size, i, j; spx_int32_t *filt = (spx_int32_t *) ptr; for(j=0;j<M;j++) { /*FIXME: Implement this for multiple channels */ #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); #else spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); #endif for(i=0;i<n;i++) filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); } } break; default: speex_warning_int("Unknown speex_echo_ctl request: ", request); return -1; } return 0; }
static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int N, int mm ) { kiss_fft_cpx * Fout2; kiss_fft_cpx * tw1; kiss_fft_cpx t; if (!st->inverse) { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { /* Almost the same as the code path below, except that we divide the input by two (while keeping the best accuracy possible) */ ms_word32_t tr, ti; tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); tw1 += fstride; Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); ++Fout2; ++Fout; } } } else { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { C_MUL (t, *Fout2 , *tw1); tw1 += fstride; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); ++Fout2; ++Fout; } } } }
/** Forced pitch delay and gain */ int forced_pitch_quant(spx_word16_t target[], /* Target vector */ spx_word16_t * sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void *par, int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits * bits, char *stack, spx_word16_t * exc2, spx_word16_t * r, int complexity, int cdbk_offset, int plc_tuning, spx_word32_t * cumul_gain) { (void)sw; (void)par; (void)end; (void)bits; (void)r; (void)complexity; (void)cdbk_offset; (void)plc_tuning; (void)cumul_gain; int i; spx_word16_t res[nsf]; #ifdef FIXED_POINT if (pitch_coef > 63) pitch_coef = 63; #else if (pitch_coef > .99) pitch_coef = .99; #endif for (i = 0; i < nsf && i < start; i++) { exc[i] = MULT16_16(SHL16(pitch_coef, 7), exc2[i - start]); } for (; i < nsf; i++) { exc[i] = MULT16_32_Q15(SHL16(pitch_coef, 9), exc[i - start]); } for (i = 0; i < nsf; i++) res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT - 1)); syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack); for (i = 0; i < nsf; i++) target[i] = EXTRACT16(SATURATE (SUB32(EXTEND32(target[i]), EXTEND32(res[i])), 32700)); return start; }
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K) { int i, k, iter; celt_word16 c, s; celt_word16 gain, theta; celt_norm *Xptr; gain = celt_div((celt_word32)MULT16_16(Q15_ONE,len),(celt_word32)(3+len+6*K)); /* FIXME: Make that HALF16 instead of HALF32 */ theta = SUB16(Q15ONE, HALF32(MULT16_16_Q15(gain,gain))); /*if (len==30) { for (i=0;i<len;i++) X[i] = 0; X[14] = 1; }*/ c = celt_cos_norm(EXTEND32(theta)); s = dir*celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ if (len > 8*stride) stride *= len/(8*stride); iter = 1; for (k=0;k<iter;k++) { /* We could use MULT16_16_P15 instead of MULT16_16_Q15 for more accuracy, but at this point, I really don't think it's necessary */ Xptr = X; for (i=0;i<len-stride;i++) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1); *Xptr++ = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2); } Xptr = &X[len-2*stride-1]; for (i=len-2*stride-1;i>=0;i--) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1); *Xptr-- = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2); } } /*if (len==30) { for (i=0;i<len;i++) printf ("%f ", X[i]); printf ("\n"); exit(0); }*/ }
static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) { /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc */ spx_word16_t x2, x3; x2 = MULT16_16_P15(x, x); x3 = MULT16_16_P15(x, x2); interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); /* Just to make sure we don't have rounding problems */ interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; if (interp[2]<32767) interp[2]+=1; }
void noise_codebook_quant( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i; VARDECL(spx_word16_t *tmp); ALLOC(tmp, nsf, spx_word16_t); residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack); for (i=0;i<nsf;i++) exc[i]+=SHL32(EXTEND32(tmp[i]),8); SPEEX_MEMSET(target, 0, nsf); }
static void celt_fir5(opus_val16 *x, const opus_val16 *num, int N) { int i; opus_val16 num0, num1, num2, num3, num4; opus_val32 mem0, mem1, mem2, mem3, mem4; num0=num[0]; num1=num[1]; num2=num[2]; num3=num[3]; num4=num[4]; mem0=0; mem1=0; mem2=0; mem3=0; mem4=0; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); sum = MAC16_16(sum,num0,mem0); sum = MAC16_16(sum,num1,mem1); sum = MAC16_16(sum,num2,mem2); sum = MAC16_16(sum,num3,mem3); sum = MAC16_16(sum,num4,mem4); mem4 = mem3; mem3 = mem2; mem2 = mem1; mem1 = mem0; mem0 = x[i]; x[i] = ROUND16(sum, SIG_SHIFT); } }
static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) { int i, j, p; spx_word16_t max_sum = 1; spx_word32_t prop_sum = 1; for (i=0;i<M;i++) { spx_word32_t tmp = 1; for (p=0;p<P;p++) for (j=0;j<N;j++) tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); #ifdef FIXED_POINT /* Just a security in case an overflow were to occur */ tmp = MIN32(ABS32(tmp), 536870912); #endif prop[i] = spx_sqrt(tmp); if (prop[i] > max_sum) max_sum = prop[i]; } for (i=0;i<M;i++) { prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); prop_sum += EXTEND32(prop[i]); } for (i=0;i<M;i++) { prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); /*printf ("%f ", prop[i]);*/ } /*printf ("\n");*/ }
void clt_mdct_init(mdct_lookup *l,int N) { int i; int N2; l->n = N; N2 = N>>1; l->kfft = cpx32_fft_alloc(N>>2); #ifndef ENABLE_TI_DSPLIB55 if (l->kfft==NULL) return; #endif l->trig = (kiss_twiddle_scalar*)celt_alloc(N2*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return; /* We have enough points that sine isn't necessary */ #if defined(FIXED_POINT) #if defined(DOUBLE_PRECISION) & !defined(MIXED_PRECISION) for (i=0;i<N2;i++) l->trig[i] = SAMP_MAX*cos(2*M_PI*(i+1./8.)/N); #else for (i=0;i<N2;i++) l->trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),16386),N)); #endif #else for (i=0;i<N2;i++) l->trig[i] = cos(2*M_PI*(i+1./8.)/N); #endif }
void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes) { #ifndef FIXED_LPC_SIZE int i; #endif spx_word16_t tmp = DIV32_16(SHL32(EXTEND32(1 + subframe),14),nb_subframes); spx_word16_t tmp2 = 16384-tmp; #ifndef FIXED_LPC_SIZE for (i=0;i<len;i++) { interp_lsp[i] = MULT16_16_P14(tmp2,old_lsp[i]) + MULT16_16_P14(tmp,new_lsp[i]); } #else interp_lsp[0] = MULT16_16_P14(tmp2,old_lsp[0]) + MULT16_16_P14(tmp,new_lsp[0]); interp_lsp[1] = MULT16_16_P14(tmp2,old_lsp[1]) + MULT16_16_P14(tmp,new_lsp[1]); interp_lsp[2] = MULT16_16_P14(tmp2,old_lsp[2]) + MULT16_16_P14(tmp,new_lsp[2]); interp_lsp[3] = MULT16_16_P14(tmp2,old_lsp[3]) + MULT16_16_P14(tmp,new_lsp[3]); interp_lsp[4] = MULT16_16_P14(tmp2,old_lsp[4]) + MULT16_16_P14(tmp,new_lsp[4]); interp_lsp[5] = MULT16_16_P14(tmp2,old_lsp[5]) + MULT16_16_P14(tmp,new_lsp[5]); interp_lsp[6] = MULT16_16_P14(tmp2,old_lsp[6]) + MULT16_16_P14(tmp,new_lsp[6]); interp_lsp[7] = MULT16_16_P14(tmp2,old_lsp[7]) + MULT16_16_P14(tmp,new_lsp[7]); interp_lsp[8] = MULT16_16_P14(tmp2,old_lsp[8]) + MULT16_16_P14(tmp,new_lsp[8]); interp_lsp[9] = MULT16_16_P14(tmp2,old_lsp[9]) + MULT16_16_P14(tmp,new_lsp[9]); #endif }
/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) { int i, c, N; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; c=0; do { for (i=0;i<end;i++) { int j; opus_val32 maxval=0; opus_val32 sum = 0; j=M*eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<M*eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=M*eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<M*eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bandE[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } } while (++c<C); /*printf ("\n");*/ }
void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes) { int i; spx_word16_t tmp = DIV32_16(SHL32(EXTEND32(1 + subframe),14),nb_subframes); spx_word16_t tmp2 = 16384-tmp; for (i=0;i<len;i++) { interp_lsp[i] = MULT16_16_P14(tmp2,old_lsp[i]) + MULT16_16_P14(tmp,new_lsp[i]); } }
void mlp_process(const MLP * m, const opus_val16 * in, opus_val16 * out) { int j; opus_val16 hidden[MAX_NEURONS]; const opus_val16 *W = m->weights; /* Copy to tmp_in */ for (j = 0; j < m->topo[1]; j++) { int k; opus_val32 sum = SHL32(EXTEND32(*W++), 8); for (k = 0; k < m->topo[0]; k++) sum = MAC16_16(sum, in[k], *W++); hidden[j] = tansig_approx(sum); } for (j = 0; j < m->topo[2]; j++) { int k; opus_val32 sum = SHL32(EXTEND32(*W++), 14); for (k = 0; k < m->topo[1]; k++) sum = MAC16_16(sum, hidden[k], *W++); out[j] = tansig_approx(EXTRACT16(PSHR32(sum, 17))); } }
void noise_codebook_unquant( spx_sig_t *exc, const void *par, /* non-overlapping codebook */ int nsf, /* number of samples in subframe */ SpeexBits *bits, char *stack, spx_int32_t *seed ) { int i; /* FIXME: This is bad, but I don't think the function ever gets called anyway */ for (i=0;i<nsf;i++) exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT); }
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) { int i; spx_word16_t den2; #ifdef FIXED_POINT den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); #else den2 = radius*radius + .7*(1-radius)*(1-radius); #endif /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;i<len;i++) { spx_word16_t vin = in[i*stride]; spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); #ifdef FIXED_POINT mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); #else mem[0] = mem[1] + 2*(-vin + radius*vout); #endif mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); } }
void celt_fir_c( const opus_val16 *_x, const opus_val16 *num, opus_val16 *_y, int N, int ord, opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); VARDECL(opus_val16, x); SAVE_STACK; ALLOC(rnum, ord, opus_val16); ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; for(i=0;i<ord;i++) x[i] = mem[ord-i-1]; for (i=0;i<N;i++) x[i+ord]=_x[i]; for(i=0;i<ord;i++) mem[i] = _x[N-i-1]; #ifdef SMALL_FOOTPRINT (void)arch; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); for (j=0;j<ord;j++) { sum = MAC16_16(sum,rnum[j],x[i+j]); } _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); } #else for (i=0;i<N-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; xcorr_kernel(rnum, x+i, sum, ord, arch); _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) sum = MAC16_16(sum,rnum[j],x[i+j]); _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); } #endif RESTORE_STACK; }
/* returns minimum mean square error */ spx_word32_t _spx_lpc( spx_coef_t *lpc, /* out: [0...p-1] LPC coefficients */ const spx_word16_t *ac, /* in: [0...p] autocorrelation values */ int p ) { int i, j; spx_word16_t r; spx_word16_t error = ac[0]; if (ac[0] == 0) { for (i = 0; i < p; i++) lpc[i] = 0; return 0; } for (i = 0; i < p; i++) { /* Sum up this iteration's reflection coefficient */ spx_word32_t rr = NEG32(SHL32(EXTEND32(ac[i + 1]),13)); for (j = 0; j < i; j++) rr = SUB32(rr,MULT16_16(lpc[j],ac[i - j])); #ifdef FIXED_POINT r = DIV32_16(rr+PSHR32(error,1),ADD16(error,8)); #else r = rr/(error+.003*ac[0]); #endif /* Update LPC coefficients and total error */ lpc[i] = r; for (j = 0; j < i>>1; j++) { spx_word16_t tmp = lpc[j]; lpc[j] = MAC16_16_P13(lpc[j],r,lpc[i-1-j]); lpc[i-1-j] = MAC16_16_P13(lpc[i-1-j],r,tmp); } if (i & 1) lpc[j] = MAC16_16_P13(lpc[j],lpc[j],r); error = SUB16(error,MULT16_16_Q13(r,MULT16_16_Q13(error,r))); } return error; }
void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *lsp, int len, int subframe, int nb_subframes, spx_word16_t margin) { int i; spx_word16_t m = margin; spx_word16_t m2 = 25736-margin; spx_word16_t tmp = DIV32_16(SHL32(EXTEND32(1 + subframe),14),nb_subframes); spx_word16_t tmp2 = 16384-tmp; for (i=0;i<len;i++) lsp[i] = MULT16_16_P14(tmp2,old_lsp[i]) + MULT16_16_P14(tmp,new_lsp[i]); /* Enforce margin to sure the LSPs are stable*/ if (lsp[0]<m) lsp[0]=m; if (lsp[len-1]>m2) lsp[len-1]=m2; for (i=1;i<len-1;i++) { if (lsp[i]<lsp[i-1]+m) lsp[i]=lsp[i-1]+m; if (lsp[i]>lsp[i+1]-m) lsp[i]= SHR16(lsp[i],1) + SHR16(lsp[i+1]-m,1); } }
/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int _C) { int i, c, N; const celt_int16 *eBands = m->eBands; const int C = CHANNELS(_C); N = FRAMESIZE(m); for (c=0;c<C;c++) { for (i=0;i<m->nbEBands;i++) { int j; celt_word32 maxval=0; celt_word32 sum = 0; j=eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bank[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bank[i+c*m->nbEBands]);*/ } } /*printf ("\n");*/ }
void celt_fir(const opus_val16 *x, const opus_val16 *num, opus_val16 *y, int N, int ord, opus_val16 *mem) { int i,j; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); for (j=0;j<ord;j++) { sum += MULT16_16(num[j],mem[j]); } for (j=ord-1;j>=1;j--) { mem[j]=mem[j-1]; } mem[0] = x[i]; y[i] = ROUND16(sum, SIG_SHIFT); } }
void pitch_unquant_3tap( spx_word16_t exc[], /* Input excitation */ spx_word32_t exc_out[], /* Output excitation */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ const void* par, int nsf, /* Number of samples in subframe */ int* pitch_val, spx_word16_t* gain_val, SpeexBits* bits, char* stack, int count_lost, int subframe_offset, spx_word16_t last_pitch_gain, int cdbk_offset ) { int i; int pitch; int gain_index; spx_word16_t gain[3]; const signed char* gain_cdbk; int gain_cdbk_size; const ltp_params* params; params = (const ltp_params*) par; gain_cdbk_size = 1 << params->gain_bits; gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset; pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits); pitch += start; gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits); /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/ #ifdef FIXED_POINT gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4]); gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 1]); gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 2]); #else gain[0] = 0.015625 * gain_cdbk[gain_index * 4] + .5; gain[1] = 0.015625 * gain_cdbk[gain_index * 4 + 1] + .5; gain[2] = 0.015625 * gain_cdbk[gain_index * 4 + 2] + .5; #endif if (count_lost && pitch > subframe_offset) { spx_word16_t gain_sum; if (1) { #ifdef FIXED_POINT spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain, 1); if (tmp > 62) tmp = 62; #else spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain; if (tmp > .95) tmp = .95; #endif gain_sum = gain_3tap_to_1tap(gain); if (gain_sum > tmp) { spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp), 14), gain_sum); for (i = 0; i < 3; i++) gain[i] = MULT16_16_Q14(fact, gain[i]); } } } *pitch_val = pitch; gain_val[0] = gain[0]; gain_val[1] = gain[1]; gain_val[2] = gain[2]; gain[0] = SHL16(gain[0], 7); gain[1] = SHL16(gain[1], 7); gain[2] = SHL16(gain[2], 7); SPEEX_MEMSET(exc_out, 0, nsf); for (i = 0; i < 3; i++) { int j; int tmp1, tmp3; int pp = pitch + 1 - i; tmp1 = nsf; if (tmp1 > pp) tmp1 = pp; for (j = 0; j < tmp1; j++) exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp]); tmp3 = nsf; if (tmp3 > pp + pitch) tmp3 = pp + pitch; for (j = tmp1; j < tmp3; j++) exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp - pitch]); } /*for (i=0;i<nsf;i++) exc[i]=PSHR32(exc32[i],13);*/ }
void lsp_to_lpc(const spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t **xq); VARDECL(spx_word32_t *xqmem); int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ ALLOC(xp, (m+1), spx_word32_t*); ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); ALLOC(xq, (m+1), spx_word32_t*); ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } /* work out 2cos terms in Q14 */ ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ for(j=1;j<=lpcrdr;j++) { int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } }
int lpc_to_lsp (spx_coef_t *a,int lpcrdr,spx_lsp_t *freq,int nb,spx_word16_t delta, char *stack) /* float *a lpc coefficients */ /* int lpcrdr order of LPC coefficients (10) */ /* float *freq LSP frequencies in the x domain */ /* int nb number of sub-intervals (4) */ /* float delta grid spacing interval (0.02) */ { spx_word16_t temp_xr,xl,xr,xm=0; spx_word32_t psuml,psumr,psumm,temp_psumr/*,temp_qsumr*/; int i,j,m,flag,k; #ifndef FIXED_LPC_SIZE VARDECL(spx_word32_t *Q); /* ptrs for memory allocation */ VARDECL(spx_word32_t *P); VARDECL(spx_word16_t *Q16); /* ptrs for memory allocation */ VARDECL(spx_word16_t *P16); #else spx_word32_t Q[(FIXED_LPC_SIZE/2)+1]; /* ptrs for memory allocation */ spx_word32_t P[(FIXED_LPC_SIZE/2)+1]; spx_word16_t Q16[(FIXED_LPC_SIZE/2)+1]; /* ptrs for memory allocation */ spx_word16_t P16[(FIXED_LPC_SIZE/2)+1]; #endif spx_word32_t *px; /* ptrs of respective P'(z) & Q'(z) */ spx_word32_t *qx; spx_word32_t *p; spx_word32_t *q; spx_word16_t *pt; /* ptr used for cheb_poly_eval() whether P' or Q' */ int roots=0; /* DR 8/2/94: number of roots found */ flag = 1; /* program is searching for a root when, 1 else has found one */ m = lpcrdr/2; /* order of P'(z) & Q'(z) polynomials */ #ifndef FIXED_LPC_SIZE /* Allocate memory space for polynomials */ ALLOC(Q, (m+1), spx_word32_t); ALLOC(P, (m+1), spx_word32_t); #endif /* determine P'(z)'s and Q'(z)'s coefficients where P'(z) = P(z)/(1 + z^(-1)) and Q'(z) = Q(z)/(1-z^(-1)) */ px = P; /* initialise ptrs */ qx = Q; p = px; q = qx; #ifdef FIXED_POINT *px++ = LPC_SCALING; *qx++ = LPC_SCALING; #ifndef FIXED_LPC_SIZE for(i=0;i<m;i++){ *px++ = SUB32(ADD32(EXTEND32(a[i]),EXTEND32(a[lpcrdr-i-1])), *p++); *qx++ = ADD32(SUB32(EXTEND32(a[i]),EXTEND32(a[lpcrdr-i-1])), *q++); } #else for(i=0;i<(FIXED_LPC_SIZE/2);i++){ *px++ = SUB32(ADD32(EXTEND32(a[i]),EXTEND32(a[FIXED_LPC_SIZE-i-1])), *p++); *qx++ = ADD32(SUB32(EXTEND32(a[i]),EXTEND32(a[FIXED_LPC_SIZE-i-1])), *q++); } #endif px = P; qx = Q; for(i=0;i<m;i++) { /*if (fabs(*px)>=32768) speex_warning_int("px", *px); if (fabs(*qx)>=32768) speex_warning_int("qx", *qx);*/ *px = PSHR32(*px,2); *qx = PSHR32(*qx,2); px++; qx++; } /* The reason for this lies in the way cheb_poly_eva() is implemented for fixed-point */ P[m] = PSHR32(P[m],3); Q[m] = PSHR32(Q[m],3); #else *px++ = LPC_SCALING; *qx++ = LPC_SCALING; for(i=0;i<m;i++){ *px++ = (a[i]+a[lpcrdr-1-i]) - *p++; *qx++ = (a[i]-a[lpcrdr-1-i]) + *q++; } px = P; qx = Q; for(i=0;i<m;i++){ *px = 2**px; *qx = 2**qx; px++; qx++; } #endif px = P; /* re-initialise ptrs */ qx = Q; /* now that we have computed P and Q convert to 16 bits to speed up cheb_poly_eval */ #ifndef FIXED_LPC_SIZE ALLOC(P16, m+1, spx_word16_t); ALLOC(Q16, m+1, spx_word16_t); #endif for (i=0;i<m+1;i++) { P16[i] = P[i]; Q16[i] = Q[i]; } /* Search for a zero in P'(z) polynomial first and then alternate to Q'(z). Keep alternating between the two polynomials as each zero is found */ xr = 0; /* initialise xr to zero */ xl = FREQ_SCALE; /* start at point xl = 1 */ for(j=0;j<lpcrdr;j++){ if(j&1) /* determines whether P' or Q' is eval. */ pt = Q16; else pt = P16; psuml = cheb_poly_eva(pt,xl,m,stack); /* evals poly. at xl */ flag = 1; while(flag && (xr >= -FREQ_SCALE)){ spx_word16_t dd; /* Modified by JMV to provide smaller steps around x=+-1 */ #ifdef FIXED_POINT dd = MULT16_16_Q15(delta,SUB16(FREQ_SCALE, MULT16_16_Q14(MULT16_16_Q14(xl,xl),14000))); if (psuml<512 && psuml>-512) dd = PSHR16(dd,1); #else dd=delta*(1-.9*xl*xl); if (fabs(psuml)<.2) dd *= .5; #endif xr = SUB16(xl, dd); /* interval spacing */ psumr = cheb_poly_eva(pt,xr,m,stack);/* poly(xl-delta_x) */ temp_psumr = psumr; temp_xr = xr; /* if no sign change increment xr and re-evaluate poly(xr). Repeat til sign change. if a sign change has occurred the interval is bisected and then checked again for a sign change which determines in which interval the zero lies in. If there is no sign change between poly(xm) and poly(xl) set interval between xm and xr else set interval between xl and xr and repeat till root is located within the specified limits */ if(SIGN_CHANGE(psumr,psuml)) { roots++; psumm=psuml; for(k=0;k<=nb;k++){ #ifdef FIXED_POINT xm = ADD16(PSHR16(xl,1),PSHR16(xr,1)); /* bisect the interval */ #else xm = .5*(xl+xr); /* bisect the interval */ #endif psumm=cheb_poly_eva(pt,xm,m,stack); /*if(psumm*psuml>0.)*/ if(!SIGN_CHANGE(psumm,psuml)) { psuml=psumm; xl=xm; } else { psumr=psumm; xr=xm; } } /* once zero is found, reset initial interval to xr */ freq[j] = X2ANGLE(xm); xl = xm; flag = 0; /* reset flag for next search */ } else{ psuml=temp_psumr; xl=temp_xr; } } } return(roots); }
void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; spx_word32_t xout1,xout2,xin; spx_word32_t mult, a; VARDECL(spx_word32_t *xpmem); VARDECL(spx_word32_t *xqmem); #ifndef FIXED_LPC_SIZE VARDECL(spx_word16_t *freqn); VARDECL(spx_word32_t **xp); VARDECL(spx_word32_t **xq); #else spx_word16_t freqn[FIXED_LPC_SIZE]; spx_word32_t *xp[(FIXED_LPC_SIZE/2)+1]; spx_word32_t *xq[(FIXED_LPC_SIZE/2)+1]; #endif int m = lpcrdr>>1; /* Reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2cos(w)z(-1) + z(-2), where w is the LSP frequency. In the time domain this is: y(n) = x(n) - 2cos(w)x(n-1) + x(n-2) This is what the ALLOCS below are trying to do: int xp[m+1][lpcrdr+1+2]; // P matrix in QIMP int xq[m+1][lpcrdr+1+2]; // Q matrix in QIMP These matrices store the output of each stage on each row. The final (m-th) row has the output of the final (m-th) cascaded 2nd order filter. The first row is the impulse input to the system (not written as it is known). The version below takes advantage of the fact that a lot of the outputs are zero or known, for example if we put an inpulse into the first section the "clock" it 10 times only the first 3 outputs samples are non-zero (it's an FIR filter). */ #ifndef FIXED_LPC_SIZE ALLOC(xp, (m+1), spx_word32_t*); #endif ALLOC(xpmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE ALLOC(xq, (m+1), spx_word32_t*); #endif ALLOC(xqmem, (m+1)*(lpcrdr+1+2), spx_word32_t); #ifndef FIXED_LPC_SIZE for(i=0; i<=m; i++) { xp[i] = xpmem + i*(lpcrdr+1+2); xq[i] = xqmem + i*(lpcrdr+1+2); } #else for(i=0; i<=m; i++) { xp[i] = xpmem + i*(FIXED_LPC_SIZE+1+2); xq[i] = xqmem + i*(FIXED_LPC_SIZE+1+2); } #endif /* work out 2cos terms in Q14 */ #ifndef FIXED_LPC_SIZE ALLOC(freqn, lpcrdr, spx_word16_t); for (i=0;i<lpcrdr;i++) freqn[i] = ANGLE2X(freq[i]); #else for (i=0;i<FIXED_LPC_SIZE;i++) freqn[i] = ANGLE2X(freq[i]); #endif #define QIMP 21 /* scaling for impulse */ xin = SHL32(EXTEND32(1), (QIMP-1)); /* 0.5 in QIMP format */ /* first col and last non-zero values of each row are trivial */ for(i=0;i<=m;i++) { xp[i][1] = 0; xp[i][2] = xin; xp[i][2+2*i] = xin; xq[i][1] = 0; xq[i][2] = xin; xq[i][2+2*i] = xin; } /* 2nd row (first output row) is trivial */ xp[1][3] = -MULT16_32_Q14(freqn[0],xp[0][2]); xq[1][3] = -MULT16_32_Q14(freqn[1],xq[0][2]); xout1 = xout2 = 0; /* now generate remaining rows */ for(i=1;i<m;i++) { for(j=1;j<2*(i+1)-1;j++) { mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = ADD32(SUB32(xp[i][j+2], mult), xp[i][j]); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = ADD32(SUB32(xq[i][j+2], mult), xq[i][j]); } /* for last col xp[i][j+2] = xq[i][j+2] = 0 */ mult = MULT16_32_Q14(freqn[2*i],xp[i][j+1]); xp[i+1][j+2] = SUB32(xp[i][j], mult); mult = MULT16_32_Q14(freqn[2*i+1],xq[i][j+1]); xq[i+1][j+2] = SUB32(xq[i][j], mult); } /* process last row to extra a{k} */ #ifndef FIXED_LPC_SIZE for(j=1;j<=lpcrdr;j++) { #else for(j=1;j<=FIXED_LPC_SIZE;j++) { #endif int shift = QIMP-13; /* final filter sections */ a = PSHR32(xp[m][j+2] + xout1 + xq[m][j+2] - xout2, shift); xout1 = xp[m][j+2]; xout2 = xq[m][j+2]; /* hard limit ak's to +/- 32767 */ if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; } } #else void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* float *freq array of LSP frequencies in the x domain */ /* float *ak array of LPC coefficients */ /* int lpcrdr order of LPC coefficients */ { int i,j; float xout1,xout2,xin1,xin2; VARDECL(float *Wp); float *pw,*n1,*n2,*n3,*n4=NULL; VARDECL(float *x_freq); int m = lpcrdr>>1; ALLOC(Wp, 4*m+2, float); pw = Wp; /* initialise contents of array */ for(i=0;i<=4*m+1;i++){ /* set contents of buffer to 0 */ *pw++ = 0.0; } /* Set pointers up */ pw = Wp; xin1 = 1.0; xin2 = 1.0; ALLOC(x_freq, lpcrdr, float); for (i=0;i<lpcrdr;i++) x_freq[i] = ANGLE2X(freq[i]); /* reconstruct P(z) and Q(z) by cascading second order polynomials in form 1 - 2xz(-1) +z(-2), where x is the LSP coefficient */ for(j=0;j<=lpcrdr;j++){ int i2=0; for(i=0;i<m;i++,i2+=2){ n1 = pw+(i*4); n2 = n1 + 1; n3 = n2 + 1; n4 = n3 + 1; xout1 = xin1 - 2.f*x_freq[i2] * *n1 + *n2; xout2 = xin2 - 2.f*x_freq[i2+1] * *n3 + *n4; *n2 = *n1; *n4 = *n3; *n1 = xin1; *n3 = xin2; xin1 = xout1; xin2 = xout2; } xout1 = xin1 + *(n4+1); xout2 = xin2 - *(n4+2); if (j>0) ak[j-1] = (xout1 + xout2)*0.5f; *(n4+1) = xin1; *(n4+2) = xin2; xin1 = 0.0; xin2 = 0.0; } }
static void split_cb_search_shape_sign_N1( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int update_target ) { int i,j,m,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int best_index; spx_word32_t best_dist; int have_sign; params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (i=0;i<nb_subvect;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,params->shape_bits+have_sign); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); VARDECL(int *best_nind); VARDECL(int *best_ntarget); int have_sign; N=complexity; if (N>10) N=10; /* Complexity isn't as important for the codebooks as it is for the pitch */ N=(2*N)/3; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(best_nind, N, int); ALLOC(best_ntarget, N, int); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; } SPEEX_COPY(t, target, nsf); for (j=0;j<N;j++) SPEEX_COPY(&ot[j][0], t, nsf); /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=VERY_LARGE32; /* This is not strictly necessary, but it provides an additonal safety to prevent crashes in case something goes wrong in the previous steps (e.g. NaNs) */ for (j=0;j<N;j++) best_nind[j] = best_ntarget[j] = 0; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; spx_word32_t tener = 0; for (m=0;m<subvect_size;m++) tener = MAC16_16(tener, x[m],x[m]); #ifdef FIXED_POINT tener = SHR32(tener,1); #else tener *= .5; #endif /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { /* Compute total distance (including previous sub-vectors */ spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener); /*update n-best list*/ if (err<ndist[N-1]) { for (m=0;m<N;m++) { if (err < ndist[m]) { for (n=N-1;n>m;n--) { ndist[n] = ndist[n-1]; best_nind[n] = best_nind[n-1]; best_ntarget[n] = best_ntarget[n-1]; } /* n is equal to m here, so they're interchangeable */ ndist[m] = err; best_nind[n] = best_index[k]; best_ntarget[n] = j; break; } } } } if (i==0) break; } for (j=0;j<N;j++) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) nt[j][m]=ot[best_ntarget[j]][m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_nind[j]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } for (q=0;q<nb_subvect;q++) nind[j][q]=oind[best_ntarget[j]][q]; nind[j][i]=best_nind[j]; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }