static void split_cb_search_shape_sign_N1( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int update_target ) { int i,j,m,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int best_index; spx_word32_t best_dist; int have_sign; params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (i=0;i<nb_subvect;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,params->shape_bits+have_sign); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); VARDECL(int *best_nind); VARDECL(int *best_ntarget); int have_sign; N=complexity; if (N>10) N=10; /* Complexity isn't as important for the codebooks as it is for the pitch */ N=(2*N)/3; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(best_nind, N, int); ALLOC(best_ntarget, N, int); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; } SPEEX_COPY(t, target, nsf); for (j=0;j<N;j++) SPEEX_COPY(&ot[j][0], t, nsf); /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=VERY_LARGE32; /* This is not strictly necessary, but it provides an additonal safety to prevent crashes in case something goes wrong in the previous steps (e.g. NaNs) */ for (j=0;j<N;j++) best_nind[j] = best_ntarget[j] = 0; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; spx_word32_t tener = 0; for (m=0;m<subvect_size;m++) tener = MAC16_16(tener, x[m],x[m]); #ifdef FIXED_POINT tener = SHR32(tener,1); #else tener *= .5; #endif /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { /* Compute total distance (including previous sub-vectors */ spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener); /*update n-best list*/ if (err<ndist[N-1]) { for (m=0;m<N;m++) { if (err < ndist[m]) { for (n=N-1;n>m;n--) { ndist[n] = ndist[n-1]; best_nind[n] = best_nind[n-1]; best_ntarget[n] = best_ntarget[n-1]; } /* n is equal to m here, so they're interchangeable */ ndist[m] = err; best_nind[n] = best_index[k]; best_ntarget[n] = j; break; } } } } if (i==0) break; } for (j=0;j<N;j++) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) nt[j][m]=ot[best_ntarget[j]][m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_nind[j]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } for (q=0;q<nb_subvect;q++) nind[j][q]=oind[best_ntarget[j]][q]; nind[j][i]=best_nind[j]; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,m,q; const signed char *shape_cb; int shape_cb_size = 32, subvect_size = 10; int best_index; spx_word32_t best_dist; spx_word16_t resp[320]; spx_word16_t *resp2 = resp; spx_word32_t E[32]; spx_word16_t t[40]; spx_sig_t e[40]; shape_cb=exc_10_32_table; /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); //compute_weighted_codebook { int i, k; spx_word16_t shape[10]; for (i=0;i<shape_cb_size;i++) { spx_word16_t *res; res = resp+i*subvect_size; for (k=0;k<subvect_size;k++) shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k]; E[i]=0; /* Compute codeword response using convolution with impulse response */ { spx_word32_t resj; spx_word16_t res16; // 0 resj = MULT16_16(shape[0],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[0] = res16; //++++++++++++++++++++++++++ // 1 resj = MULT16_16(shape[0],r[1]); resj = MAC16_16(resj,shape[1],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[1] = res16; //++++++++++++++++++++++++++ // 2 resj = MULT16_16(shape[0],r[2]); resj = MAC16_16(resj,shape[1],r[1]); resj = MAC16_16(resj,shape[2],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[2] = res16; //++++++++++++++++++++++++++ // 3 resj = MULT16_16(shape[0],r[3]); resj = MAC16_16(resj,shape[1],r[2]); resj = MAC16_16(resj,shape[2],r[1]); resj = MAC16_16(resj,shape[3],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[3] = res16; //++++++++++++++++++++++++++ // 4 resj = MULT16_16(shape[0],r[4]); resj = MAC16_16(resj,shape[1],r[3]); resj = MAC16_16(resj,shape[2],r[2]); resj = MAC16_16(resj,shape[3],r[1]); resj = MAC16_16(resj,shape[4],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[4] = res16; //++++++++++++++++++++++++++ // 5 resj = MULT16_16(shape[0],r[5]); resj = MAC16_16(resj,shape[1],r[4]); resj = MAC16_16(resj,shape[2],r[3]); resj = MAC16_16(resj,shape[3],r[2]); resj = MAC16_16(resj,shape[4],r[1]); resj = MAC16_16(resj,shape[5],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[5] = res16; //++++++++++++++++++++++++++ // 6 resj = MULT16_16(shape[0],r[6]); resj = MAC16_16(resj,shape[1],r[5]); resj = MAC16_16(resj,shape[2],r[4]); resj = MAC16_16(resj,shape[3],r[3]); resj = MAC16_16(resj,shape[4],r[2]); resj = MAC16_16(resj,shape[5],r[1]); resj = MAC16_16(resj,shape[6],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[6] = res16; //++++++++++++++++++++++++++ // 7 resj = MULT16_16(shape[0],r[7]); resj = MAC16_16(resj,shape[1],r[6]); resj = MAC16_16(resj,shape[2],r[5]); resj = MAC16_16(resj,shape[3],r[4]); resj = MAC16_16(resj,shape[4],r[3]); resj = MAC16_16(resj,shape[5],r[2]); resj = MAC16_16(resj,shape[6],r[1]); resj = MAC16_16(resj,shape[7],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[7] = res16; //++++++++++++++++++++++++++ // 8 resj = MULT16_16(shape[0],r[8]); resj = MAC16_16(resj,shape[1],r[7]); resj = MAC16_16(resj,shape[2],r[6]); resj = MAC16_16(resj,shape[3],r[5]); resj = MAC16_16(resj,shape[4],r[4]); resj = MAC16_16(resj,shape[5],r[3]); resj = MAC16_16(resj,shape[6],r[2]); resj = MAC16_16(resj,shape[7],r[1]); resj = MAC16_16(resj,shape[8],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[8] = res16; //++++++++++++++++++++++++++ // 9 resj = MULT16_16(shape[0],r[9]); resj = MAC16_16(resj,shape[1],r[8]); resj = MAC16_16(resj,shape[2],r[7]); resj = MAC16_16(resj,shape[3],r[6]); resj = MAC16_16(resj,shape[4],r[5]); resj = MAC16_16(resj,shape[5],r[4]); resj = MAC16_16(resj,shape[6],r[3]); resj = MAC16_16(resj,shape[7],r[2]); resj = MAC16_16(resj,shape[8],r[1]); resj = MAC16_16(resj,shape[9],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[9] = res16; //++++++++++++++++++++++++++ } } } for (i=0;i<4;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,5); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; g=sign*shape_cb[rind*subvect_size+m]; target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); }