static inline opus_val16 tansig_approx(opus_val32 _x) { /* Q19 */ int i; opus_val16 xx; /* Q11 */ /*double x, y; */ opus_val16 dy, yy; /* Q14 */ /*x = 1.9073e-06*_x; */ if (_x >= QCONST32(8, 19)) return QCONST32(1., 14); if (_x <= -QCONST32(8, 19)) return -QCONST32(1., 14); xx = EXTRACT16(SHR32(_x, 8)); /*i = lrint(25*x); */ i = SHR32(ADD32(1024, MULT16_16(25, xx)), 11); /*x -= .04*i; */ xx -= EXTRACT16(SHR32(MULT16_16(20972, i), 8)); /*x = xx*(1./2048); */ /*y = tansig_table[250+i]; */ yy = tansig_table[250 + i]; /*y = yy*(1./16384); */ dy = 16384 - MULT16_16_Q14(yy, yy); yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx, dy), (16384 - MULT16_16_Q11(yy, xx))); return yy; }
void split_cb_search_shape_sign( spx_sig_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_sig_t *r, SpeexBits *bits, char *stack, int complexity ) { int i,j,k,m,n,q; spx_word16_t *resp; #ifdef _USE_SSE __m128 *resp2; __m128 *E; #else spx_word16_t *resp2; spx_word32_t *E; #endif spx_word16_t *t; spx_sig_t *e, *r2; spx_word16_t *tmp; spx_word32_t *ndist, *odist; int *itmp; spx_word16_t **ot, **nt; int **nind, **oind; int *ind; const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; split_cb_params *params; int N=2; int *best_index; spx_word32_t *best_dist; int have_sign; N=complexity; if (N>10) N=10; ot=PUSH(stack, N, spx_word16_t*); nt=PUSH(stack, N, spx_word16_t*); oind=PUSH(stack, N, int*); nind=PUSH(stack, N, int*); params = (split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128); E = PUSH(stack, shape_cb_size>>2, __m128); #else resp2 = resp; E = PUSH(stack, shape_cb_size, spx_word32_t); #endif t = PUSH(stack, nsf, spx_word16_t); e = PUSH(stack, nsf, spx_sig_t); r2 = PUSH(stack, nsf, spx_sig_t); ind = PUSH(stack, nb_subvect, int); tmp = PUSH(stack, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot[i]=tmp; tmp += nsf; nt[i]=tmp; tmp += nsf; } best_index = PUSH(stack, N, int); best_dist = PUSH(stack, N, spx_word32_t); ndist = PUSH(stack, N, spx_word32_t); odist = PUSH(stack, N, spx_word32_t); itmp = PUSH(stack, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp; itmp+=nb_subvect; oind[i]=itmp; itmp+=nb_subvect; for (j=0;j<nb_subvect;j++) nind[i][j]=oind[i][j]=-1; } /* FIXME: make that adaptive? */ for (i=0;i<nsf;i++) t[i]=SHR(target[i],6); for (j=0;j<N;j++) for (i=0;i<nsf;i++) ot[j][i]=t[i]; /*for (i=0;i<nsf;i++) printf ("%d\n", (int)t[i]);*/ /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=-2; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; /*Find new n-best based on previous n-best j*/ if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { spx_word16_t *ct; spx_word32_t err=0; ct = ot[j]; /*update target*/ /*previous target*/ for (m=i*subvect_size;m<(i+1)*subvect_size;m++) t[m]=ct[m]; /* New code: update only enough of the target to calculate error*/ { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] -= res[m]; else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] += res[m]; } /*compute error (distance)*/ err=odist[j]; for (m=i*subvect_size;m<(i+1)*subvect_size;m++) err += t[m]*t[m]; /*update n-best list*/ if (err<ndist[N-1] || ndist[N-1]<-1) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) t[m]=ct[m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index[k]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q])); #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; for (n=subvect_size*(i+1);n<nsf;n++,q++) t[n] = SUB32(t[n],g*r[q]); #endif } for (m=0;m<N;m++) { if (err < ndist[m] || ndist[m]<-1) { for (n=N-1;n>m;n--) { for (q=(i+1)*subvect_size;q<nsf;q++) nt[n][q]=nt[n-1][q]; for (q=0;q<nb_subvect;q++) nind[n][q]=nind[n-1][q]; ndist[n]=ndist[n-1]; } for (q=(i+1)*subvect_size;q<nsf;q++) nt[m][q]=t[q]; for (q=0;q<nb_subvect;q++) nind[m][q]=oind[j][q]; nind[m][i]=best_index[k]; ndist[m]=err; break; } } } } if (i==0) break; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]+=e[j]; /* Update target */ syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]-=r2[j]; }