static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) { int i,j; spx_word32_t tmp1=0,tmp2=0; for (j=0;j<M;j++) { tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); } acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); for (i=1;i<N-1;i+=2) { tmp1 = tmp2 = 0; for (j=0;j<M;j++) { tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); } acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); } tmp1 = tmp2 = 0; for (j=0;j<M;j++) { tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); } acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); }
static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack) { int i, j, k; VARDECL(spx_word16_t *shape); ALLOC(shape, subvect_size, spx_word16_t); for (i=0;i<shape_cb_size;i++) { spx_word16_t *res; res = resp+i*subvect_size; for (k=0;k<subvect_size;k++) shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k]; E[i]=0; /* Compute codeword response using convolution with impulse response */ for(j=0;j<subvect_size;j++) { spx_word32_t resj=0; spx_word16_t res16; for (k=0;k<=j;k++) resj = MAC16_16(resj,shape[k],r[j-k]); #ifdef FIXED_POINT res16 = EXTRACT16(SHR32(resj, 13)); #else res16 = 0.03125f*resj; #endif /* Compute codeword energy */ E[i]=MAC16_16(E[i],res16,res16); res[j] = res16; /*printf ("%d\n", (int)res[j]);*/ } } }
void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState *_stereo) { int i; spx_word32_t balance; spx_word16_t e_left, e_right, e_ratio; RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo; /* COMPATIBILITY_HACK(stereo); */ balance=stereo->balance; e_ratio=stereo->e_ratio; /* These two are Q14, with max value just below 2. */ e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance)))); e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8); for (i=frame_size-1;i>=0;i--) { spx_int16_t tmp=data[i]; stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15)); stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15)); data[2*i] = (spx_int16_t)MULT16_16_P14(stereo->smooth_left, tmp); data[2*i+1] = (spx_int16_t)MULT16_16_P14(stereo->smooth_right, tmp); } }
static void celt_fir5(opus_val16 *x, const opus_val16 *num, int N) { int i; opus_val16 num0, num1, num2, num3, num4; opus_val32 mem0, mem1, mem2, mem3, mem4; num0=num[0]; num1=num[1]; num2=num[2]; num3=num[3]; num4=num[4]; mem0=0; mem1=0; mem2=0; mem3=0; mem4=0; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); sum = MAC16_16(sum,num0,mem0); sum = MAC16_16(sum,num1,mem1); sum = MAC16_16(sum,num2,mem2); sum = MAC16_16(sum,num3,mem3); sum = MAC16_16(sum,num4,mem4); mem4 = mem3; mem3 = mem2; mem2 = mem1; mem1 = mem0; mem0 = x[i]; x[i] = ROUND16(sum, SIG_SHIFT); } }
void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2) { int i; __m128 xsum1, xsum2; xsum1 = _mm_setzero_ps(); xsum2 = _mm_setzero_ps(); for (i=0;i<N-3;i+=4) { __m128 xi = _mm_loadu_ps(x+i); __m128 y1i = _mm_loadu_ps(y01+i); __m128 y2i = _mm_loadu_ps(y02+i); xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i)); xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i)); } /* Horizontal sum */ xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1)); xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55)); _mm_store_ss(xy1, xsum1); xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); _mm_store_ss(xy2, xsum2); for (;i<N;i++) { *xy1 = MAC16_16(*xy1, x[i], y01[i]); *xy2 = MAC16_16(*xy2, x[i], y02[i]); } }
void preProcessing(bcg729EncoderChannelContextStruct *encoderChannelContext, word16_t signal[], word16_t preProcessedSignal[]) { int i; word16_t inputX2; word32_t acc; /* in Q12 */ for(i=0; i<L_FRAME; i++) { inputX2 = encoderChannelContext->inputX1; encoderChannelContext->inputX1 = encoderChannelContext->inputX0; encoderChannelContext->inputX0 = signal[i]; /* compute with acc and coefficients in Q12 */ acc = MULT16_32_Q12(A1, encoderChannelContext->outputY1); /* Y1 in Q15.12 * A1 in Q1.12 -> acc in Q17.12*/ acc = MAC16_32_Q12(acc, A2, encoderChannelContext->outputY2); /* Y2 in Q15.12 * A2 in Q0.12 -> Q15.12 + acc in Q17.12 -> acc in Q18.12 */ /* 3*(Xi in Q15.0 * Bi in Q0.12)->Q17.12 + acc in Q18.12 -> acc in 19.12 */ acc = MAC16_16(acc, encoderChannelContext->inputX0, B0); acc = MAC16_16(acc, encoderChannelContext->inputX1, B1); acc = MAC16_16(acc, inputX2, B2); /* acc in Q19.12 : We must check it won't overflow - the Q15.12 of Y - the Q15.0 extracted from it by shifting 12 right -> saturate to 28 bits -> acc in Q15.12 */ acc = SATURATE(acc, MAXINT28); preProcessedSignal[i] = PSHR(acc,12); /* extract integer value of the Q15.12 representation */ encoderChannelContext->outputY2 = encoderChannelContext->outputY1; encoderChannelContext->outputY1 = acc; } return; }
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; opus_val16 ms; celt_norm *Xptr; Xptr = X; ms = NEG16(s); for (i=0; i<len-stride; i++) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } Xptr = &X[len-2*stride-1]; for (i=len-2*stride-1; i>=0; i--) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } }
void generateAdaptativeCodebookVector(word16_t excitationVector[], int16_t intPitchDelay, int16_t fracPitchDelay) { int n,i,j; word16_t *delayedExcitationVector; word16_t *b30Increased; word16_t *b30Decreased; /* fracPitchDelay is in range [-1, 1], convert it to [0,2] needed by eqA.8 */ fracPitchDelay = -fracPitchDelay; if (fracPitchDelay <0) { /* if fracPitchDelay is 1 -> pitchDelay of int+(1/3) -> int+1-(2/3)*/ intPitchDelay++; fracPitchDelay = 2; } /**/ delayedExcitationVector = &(excitationVector[-intPitchDelay]); /* delayedExcitationVector is used to address the excitation vector at index -intPitchDelay (-k in eq40) */ b30Increased = &(b30[fracPitchDelay]); /* b30 increased points to b30[fracPitchDelay] : b30[t] in eq40. b30 in Q15 */ b30Decreased = &(b30[3-fracPitchDelay]); /* b30 decreased points to b30[-fracPitchDelay] : b30[3-t] in eq40. b30 in Q15 */ for (n=0; n<L_SUBFRAME; n++) { word32_t acc = 0; /* acc in Q15 */ for (i=0, j=0; i<10; i++, j+=3) { /* j is used as a 3*i index */ acc = MAC16_16(acc, delayedExcitationVector[n-i], b30Increased[j]); /* WARNING: spec 3.7.1 and A.8 give an equation leading to delayedExcitationVector[n+i] but ITU code uses delayedExcitationVector[n-i], implemented as code */ acc = MAC16_16(acc, delayedExcitationVector[n+1+i], b30Decreased[j]); } excitationVector[n] = SATURATE(PSHR(acc, 15), MAXINT16); /* acc in Q15, shift/round to unscaled value and check overflow on 16 bits */ } }
void celt_fir_c( const opus_val16 *_x, const opus_val16 *num, opus_val16 *_y, int N, int ord, opus_val16 *mem, int arch) { int i,j; VARDECL(opus_val16, rnum); VARDECL(opus_val16, x); SAVE_STACK; ALLOC(rnum, ord, opus_val16); ALLOC(x, N+ord, opus_val16); for(i=0;i<ord;i++) rnum[i] = num[ord-i-1]; for(i=0;i<ord;i++) x[i] = mem[ord-i-1]; for (i=0;i<N;i++) x[i+ord]=_x[i]; for(i=0;i<ord;i++) mem[i] = _x[N-i-1]; #ifdef SMALL_FOOTPRINT (void)arch; for (i=0;i<N;i++) { opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); for (j=0;j<ord;j++) { sum = MAC16_16(sum,rnum[j],x[i+j]); } _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); } #else for (i=0;i<N-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; xcorr_kernel(rnum, x+i, sum, ord, arch); _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) sum = MAC16_16(sum,rnum[j],x[i+j]); _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); } #endif RESTORE_STACK; }
spx_word32_t inner_prod(const spx_word16_t* x, const spx_word16_t* y, int len) { spx_word32_t sum = 0; len >>= 2; while (len--) { spx_word32_t part = 0; part = MAC16_16(part, *x++, *y++); part = MAC16_16(part, *x++, *y++); part = MAC16_16(part, *x++, *y++); part = MAC16_16(part, *x++, *y++); /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ sum = ADD32(sum, SHR32(part, 6)); } return sum; }
/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) { int i, j; spx_float_t W; W = FLOAT_AMULT(p, w[0]); prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); for (i=1,j=1;i<N-1;i+=2,j++) { W = FLOAT_AMULT(p, w[j]); prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); } W = FLOAT_AMULT(p, w[j]); prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); }
static int lsp_quant(spx_word16_t *x, const signed char *cdbk, int nbVec, int nbDim) { int i,j; spx_word32_t dist; spx_word16_t tmp; spx_word32_t best_dist=VERY_LARGE32; int best_id=0; const signed char *ptr=cdbk; for (i=0;i<nbVec;i++) { dist=0; for (j=0;j<nbDim;j++) { tmp=SUB16(x[j],SHL16((spx_word16_t)*ptr++,5)); dist=MAC16_16(dist,tmp,tmp); } if (dist<best_dist) { best_dist=dist; best_id=i; } } for (j=0;j<nbDim;j++) x[j] = SUB16(x[j],SHL16((spx_word16_t)cdbk[best_id*nbDim+j],5)); return best_id; }
/*Finds the indices of the n-best entries in a codebook*/ void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack) { int i,j,k,used; used = 0; for (i=0;i<entries;i++) { spx_word32_t dist=0; for (j=0;j<len;j++) dist = MAC16_16(dist,in[j],*codebook++); #ifdef FIXED_POINT dist=SUB32(SHR32(E[i],1),dist); #else dist=.5f*E[i]-dist; #endif if (i<N || dist<best_dist[N-1]) { for (k=N-1; (k >= 1) && (k > used || dist < best_dist[k-1]); k--) { best_dist[k]=best_dist[k-1]; nbest[k] = nbest[k-1]; } best_dist[k]=dist; nbest[k]=i; used++; } } }
/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) { int i, c, N; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; c=0; do { for (i=0;i<end;i++) { int j; opus_val32 maxval=0; opus_val32 sum = 0; j=M*eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<M*eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=M*eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<M*eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bandE[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } } while (++c<C); /*printf ("\n");*/ }
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, int max_pitch, int *best_pitch #ifdef FIXED_POINT , int yshift, opus_val32 maxcorr #endif ) { int i, j; opus_val32 Syy=1; opus_val16 best_num[2]; opus_val32 best_den[2]; #ifdef FIXED_POINT int xshift; xshift = celt_ilog2(maxcorr)-14; #endif best_num[0] = -1; best_num[1] = -1; best_den[0] = 0; best_den[1] = 0; best_pitch[0] = 0; best_pitch[1] = 1; for (j=0;j<len;j++) Syy = MAC16_16(Syy, y[j],y[j]); for (i=0;i<max_pitch;i++) { if (xcorr[i]>0) { opus_val16 num; opus_val32 xcorr16; xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); num = MULT16_16_Q15(xcorr16,xcorr16); if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) { if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) { best_num[1] = best_num[0]; best_den[1] = best_den[0]; best_pitch[1] = best_pitch[0]; best_num[0] = num; best_den[0] = Syy; best_pitch[0] = i; } else { best_num[1] = num; best_den[1] = Syy; best_pitch[1] = i; } } } Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); Syy = MAX32(1, Syy); } }
void mlp_process(const MLP * m, const opus_val16 * in, opus_val16 * out) { int j; opus_val16 hidden[MAX_NEURONS]; const opus_val16 *W = m->weights; /* Copy to tmp_in */ for (j = 0; j < m->topo[1]; j++) { int k; opus_val32 sum = SHL32(EXTEND32(*W++), 8); for (k = 0; k < m->topo[0]; k++) sum = MAC16_16(sum, in[k], *W++); hidden[j] = tansig_approx(sum); } for (j = 0; j < m->topo[2]; j++) { int k; opus_val32 sum = SHL32(EXTEND32(*W++), 14); for (k = 0; k < m->topo[1]; k++) sum = MAC16_16(sum, hidden[k], *W++); out[j] = tansig_approx(EXTRACT16(PSHR32(sum, 17))); } }
static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C) { int c, i; opus_val32 dist = 0; c=0; do { for (i=start;i<end;i++) { opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3)); dist = MAC16_16(dist, d,d); } } while (++c<C); return MIN32(200,SHR32(dist,2*DB_SHIFT-6)); }
static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, opus_val16 *out, int overlap, int channels, const opus_val16 *window, opus_int32 Fs) { int i, c; int inc = 48000/Fs; for (c=0; c<channels; c++) { for (i=0; i<overlap; i++) { opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]); out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]), Q15ONE-w, in1[i*channels+c]), 15); } } }
/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int _C) { int i, c, N; const celt_int16 *eBands = m->eBands; const int C = CHANNELS(_C); N = FRAMESIZE(m); for (c=0;c<C;c++) { for (i=0;i<m->nbEBands;i++) { int j; celt_word32 maxval=0; celt_word32 sum = 0; j=eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bank[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bank[i+c*m->nbEBands]);*/ } } /*printf ("\n");*/ }
/*Finds the indices of the n-best entries in a codebook with sign*/ void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack) { int i,j,k, sign, used; used=0; for (i=0;i<entries;i++) { spx_word32_t dist=0; for (j=0;j<len;j++) dist = MAC16_16(dist,in[j],*codebook++); if (dist>0) { sign=0; dist=-dist; } else { sign=1; } #ifdef FIXED_POINT dist = ADD32(dist,SHR32(E[i],1)); #else dist = ADD32(dist,.5f*E[i]); #endif if (i<N || dist<best_dist[N-1]) { for (k=N-1; (k >= 1) && (k > used || dist < best_dist[k-1]); k--) { best_dist[k]=best_dist[k-1]; nbest[k] = nbest[k-1]; } best_dist[k]=dist; nbest[k]=i; used++; if (sign) nbest[k]+=entries; } } }
opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y, int N) { int i; float xy; __m128 sum; sum = _mm_setzero_ps(); /* FIXME: We should probably go 8-way and use 2 sums. */ for (i=0;i<N-3;i+=4) { __m128 xi = _mm_loadu_ps(x+i); __m128 yi = _mm_loadu_ps(y+i); sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi)); } /* Horizontal sum */ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); _mm_store_ss(&xy, sum); for (;i<N;i++) { xy = MAC16_16(xy, x[i], y[i]); } return xy; }
void celt_iir(const opus_val32 *_x, const opus_val16 *den, opus_val32 *_y, int N, int ord, opus_val16 *mem, int arch) { #ifdef SMALL_FOOTPRINT int i,j; (void)arch; for (i=0;i<N;i++) { opus_val32 sum = _x[i]; for (j=0;j<ord;j++) { sum -= MULT16_16(den[j],mem[j]); } for (j=ord-1;j>=1;j--) { mem[j]=mem[j-1]; } mem[0] = ROUND16(sum,SIG_SHIFT); _y[i] = sum; } #else int i,j; VARDECL(opus_val16, rden); VARDECL(opus_val16, y); SAVE_STACK; celt_assert((ord&3)==0); ALLOC(rden, ord, opus_val16); ALLOC(y, N+ord, opus_val16); for(i=0;i<ord;i++) rden[i] = den[ord-i-1]; for(i=0;i<ord;i++) y[i] = -mem[ord-i-1]; for(;i<N+ord;i++) y[i]=0; for (i=0;i<N-3;i+=4) { /* Unroll by 4 as if it were an FIR filter */ opus_val32 sum[4]; sum[0]=_x[i]; sum[1]=_x[i+1]; sum[2]=_x[i+2]; sum[3]=_x[i+3]; xcorr_kernel(rden, y+i, sum, ord, arch); /* Patch up the result to compensate for the fact that this is an IIR */ y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); _y[i ] = sum[0]; sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); _y[i+1] = sum[1]; sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); _y[i+2] = sum[2]; sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); _y[i+3] = sum[3]; } for (;i<N;i++) { opus_val32 sum = _x[i]; for (j=0;j<ord;j++) sum -= MULT16_16(rden[j],y[i+j]); y[i+ord] = ROUND16(sum,SIG_SHIFT); _y[i] = sum; } for(i=0;i<ord;i++) mem[i] = _y[N-i-1]; RESTORE_STACK; #endif }
int _celt_autocorr( const opus_val16 *x, /* in: [0...n-1] samples x */ opus_val32 *ac, /* out: [0...lag-1] ac values */ const opus_val16 *window, int overlap, int lag, int n, int arch ) { opus_val32 d; int i, k; int fastN=n-lag; int shift; const opus_val16 *xptr; VARDECL(opus_val16, xx); SAVE_STACK; ALLOC(xx, n, opus_val16); celt_assert(n>0); celt_assert(overlap>=0); if (overlap == 0) { xptr = x; } else { for (i=0;i<n;i++) xx[i] = x[i]; for (i=0;i<overlap;i++) { xx[i] = MULT16_16_Q15(x[i],window[i]); xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); } xptr = xx; } shift=0; #ifdef OPUS_FIXED_POINT { opus_val32 ac0; ac0 = 1+(n<<7); if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); for(i=(n&1);i<n;i+=2) { ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); } shift = celt_ilog2(ac0)-30+10; shift = (shift)/2; if (shift>0) { for(i=0;i<n;i++) xx[i] = PSHR32(xptr[i], shift); xptr = xx; } else shift = 0; } #endif celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); for (k=0;k<=lag;k++) { for (i = k+fastN, d = 0; i < n; i++) d = MAC16_16(d, xptr[i], xptr[i-k]); ac[k] += d; } #ifdef OPUS_FIXED_POINT shift = 2*shift; if (shift<=0) ac[0] += SHL32((opus_int32)1, -shift); if (ac[0] < 268435456) { int shift2 = 29 - EC_ILOG(ac[0]); for (i=0;i<=lag;i++) ac[i] = SHL32(ac[i], shift2); shift -= shift2; } else if (ac[0] >= 536870912) { int shift2=1; if (ac[0] >= 1073741824) shift2++; for (i=0;i<=lag;i++) ac[i] = SHR32(ac[i], shift2); shift += shift2; } #endif RESTORE_STACK; return shift; }
void bcg729Encoder(bcg729EncoderChannelContextStruct *encoderChannelContext, int16_t inputFrame[], uint8_t bitStream[]) { int i; uint16_t parameters[NB_PARAMETERS]; /* the output parameters in an array */ /* internal buffers which we do not need to keep between calls */ word16_t LPCoefficients[NB_LSP_COEFF]; /* the LP coefficients in Q3.12 */ word16_t qLPCoefficients[2*NB_LSP_COEFF]; /* the quantized LP coefficients in Q3.12 computed from the qLSP one after interpolation: two sets, one for each subframe */ word16_t weightedqLPCoefficients[2*NB_LSP_COEFF]; /* the qLP coefficients in Q3.12 weighted according to spec A3.3.3 */ word16_t LSPCoefficients[NB_LSP_COEFF]; /* the LSP coefficients in Q15 */ word16_t qLSPCoefficients[NB_LSP_COEFF]; /* the quantized LSP coefficients in Q15 */ word16_t interpolatedqLSP[NB_LSP_COEFF]; /* the interpolated qLSP used for first subframe in Q15 */ /*****************************************************************************************/ /*** on frame basis : preProcessing, LP Analysis, Open-loop pitch search ***/ preProcessing(encoderChannelContext, inputFrame, encoderChannelContext->signalLastInputFrame); /* output of the function in the signal buffer */ computeLP(encoderChannelContext->signalBuffer, LPCoefficients); /* use the whole signal Buffer for windowing and autocorrelation */ /*** compute LSP: it might fail, get the previous one in this case ***/ if (!LP2LSPConversion(LPCoefficients, LSPCoefficients)) { /* unable to find the 10 roots repeat previous LSP */ memcpy(LSPCoefficients, encoderChannelContext->previousLSPCoefficients, NB_LSP_COEFF*sizeof(word16_t)); } /*** LSPQuantization and compute L0, L1, L2, L3: the first four parameters ***/ LSPQuantization(encoderChannelContext, LSPCoefficients, qLSPCoefficients, parameters); /*** interpolate qLSP and convert to LP ***/ interpolateqLSP(encoderChannelContext->previousqLSPCoefficients, qLSPCoefficients, interpolatedqLSP); /* copy the currentqLSP to previousqLSP buffer */ for (i=0; i<NB_LSP_COEFF; i++) { encoderChannelContext->previousqLSPCoefficients[i] = qLSPCoefficients[i]; } /* first subframe */ qLSP2LP(interpolatedqLSP, qLPCoefficients); /* second subframe */ qLSP2LP(qLSPCoefficients, &(qLPCoefficients[NB_LSP_COEFF])); /*** Compute the weighted Quantized LP Coefficients according to spec A3.3.3 ***/ /* weightedqLPCoefficients[0] = qLPCoefficients[0]*Gamma^(i+1) (i=0..9) with Gamma = 0.75 in Q15 */ weightedqLPCoefficients[0] = MULT16_16_P15(qLPCoefficients[0], GAMMA_E1); weightedqLPCoefficients[1] = MULT16_16_P15(qLPCoefficients[1], GAMMA_E2); weightedqLPCoefficients[2] = MULT16_16_P15(qLPCoefficients[2], GAMMA_E3); weightedqLPCoefficients[3] = MULT16_16_P15(qLPCoefficients[3], GAMMA_E4); weightedqLPCoefficients[4] = MULT16_16_P15(qLPCoefficients[4], GAMMA_E5); weightedqLPCoefficients[5] = MULT16_16_P15(qLPCoefficients[5], GAMMA_E6); weightedqLPCoefficients[6] = MULT16_16_P15(qLPCoefficients[6], GAMMA_E7); weightedqLPCoefficients[7] = MULT16_16_P15(qLPCoefficients[7], GAMMA_E8); weightedqLPCoefficients[8] = MULT16_16_P15(qLPCoefficients[8], GAMMA_E9); weightedqLPCoefficients[9] = MULT16_16_P15(qLPCoefficients[9], GAMMA_E10); weightedqLPCoefficients[10] = MULT16_16_P15(qLPCoefficients[10], GAMMA_E1); weightedqLPCoefficients[11] = MULT16_16_P15(qLPCoefficients[11], GAMMA_E2); weightedqLPCoefficients[12] = MULT16_16_P15(qLPCoefficients[12], GAMMA_E3); weightedqLPCoefficients[13] = MULT16_16_P15(qLPCoefficients[13], GAMMA_E4); weightedqLPCoefficients[14] = MULT16_16_P15(qLPCoefficients[14], GAMMA_E5); weightedqLPCoefficients[15] = MULT16_16_P15(qLPCoefficients[15], GAMMA_E6); weightedqLPCoefficients[16] = MULT16_16_P15(qLPCoefficients[16], GAMMA_E7); weightedqLPCoefficients[17] = MULT16_16_P15(qLPCoefficients[17], GAMMA_E8); weightedqLPCoefficients[18] = MULT16_16_P15(qLPCoefficients[18], GAMMA_E9); weightedqLPCoefficients[19] = MULT16_16_P15(qLPCoefficients[19], GAMMA_E10); /*** Compute weighted signal according to spec A3.3.3, this function also set LPResidualSignal(entire frame values) as specified in eq A.3 in excitationVector[L_PAST_EXCITATION] ***/ computeWeightedSpeech(encoderChannelContext->signalCurrentFrame, qLPCoefficients, weightedqLPCoefficients, &(encoderChannelContext->weightedInputSignal[MAXIMUM_INT_PITCH_DELAY]), &(encoderChannelContext->excitationVector[L_PAST_EXCITATION])); /* weightedInputSignal contains MAXIMUM_INT_PITCH_DELAY values from previous frame, points to current frame */ /*** find the open loop pitch delay ***/ uint16_t openLoopPitchDelay = findOpenLoopPitchDelay(&(encoderChannelContext->weightedInputSignal[MAXIMUM_INT_PITCH_DELAY])); /* define boundaries for closed loop pitch delay search as specified in 3.7 */ int16_t intPitchDelayMin = openLoopPitchDelay-3; if (intPitchDelayMin < 20) { intPitchDelayMin = 20; } int16_t intPitchDelayMax = intPitchDelayMin + 6; if (intPitchDelayMax > MAXIMUM_INT_PITCH_DELAY) { intPitchDelayMax = MAXIMUM_INT_PITCH_DELAY; intPitchDelayMin = MAXIMUM_INT_PITCH_DELAY - 6; } /*****************************************************************************************/ /* loop over the two subframes: Closed-loop pitch search(adaptative codebook), fixed codebook, memory update */ /* set index and buffers */ int subframeIndex; int LPCoefficientsIndex = 0; int parametersIndex = 4; /* index to insert parameters in the parameters output array */ word16_t impulseResponseInput[L_SUBFRAME]; /* input buffer for the impulse response computation: in Q12, 1 followed by all zeros see spec A3.5*/ impulseResponseInput[0] = ONE_IN_Q12; memset(&(impulseResponseInput[1]), 0, (L_SUBFRAME-1)*sizeof(word16_t)); for (subframeIndex=0; subframeIndex<L_FRAME; subframeIndex+=L_SUBFRAME) { /*** Compute the impulse response : filter a subframe long buffer filled with unit and only zero through the 1/weightedqLPCoefficients as in spec A.3.5 ***/ word16_t impulseResponseBuffer[NB_LSP_COEFF+L_SUBFRAME]; /* impulseResponseBuffer in Q12, need NB_LSP_COEFF as past value to go through filtering function */ memset(impulseResponseBuffer, 0, (NB_LSP_COEFF)*sizeof(word16_t)); /* set the past values to zero */ synthesisFilter(impulseResponseInput, &(weightedqLPCoefficients[LPCoefficientsIndex]), &(impulseResponseBuffer[NB_LSP_COEFF])); /*** Compute the target signal (x[n]) as in spec A.3.6 in Q0 ***/ /* excitationVector[L_PAST_EXCITATION+subframeIndex] currently store in Q0 the LPResidualSignal as in spec A.3.3 eq A.3*/ synthesisFilter( &(encoderChannelContext->excitationVector[L_PAST_EXCITATION+subframeIndex]), &(weightedqLPCoefficients[LPCoefficientsIndex]), &(encoderChannelContext->targetSignal[NB_LSP_COEFF])); /*** Adaptative Codebook search : compute the intPitchDelay, fracPitchDelay and associated parameter, compute also the adaptative codebook vector used to generate the excitation ***/ /* after this call, the excitationVector[L_PAST_EXCITATION + subFrameIndex] contains the adaptative codebook vector as in spec 3.7.1 */ int16_t intPitchDelay, fracPitchDelay; adaptativeCodebookSearch(&(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex]), &intPitchDelayMin, &intPitchDelayMax, &(impulseResponseBuffer[NB_LSP_COEFF]), &(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &intPitchDelay, &fracPitchDelay, &(parameters[parametersIndex]), subframeIndex); /*** Compute adaptative codebook gain spec 3.7.3, result in Q14 ***/ /* compute the filtered adaptative codebook vector spec 3.7.3 */ /* this computation makes use of two partial results used for gainQuantization too (yy and xy in eq63), they are part of the function output */ /* note spec 3.7.3 eq44 make use of convolution of impulseResponse and adaptative codebook vector to compute the filtered version */ /* in the Annex A, the filter being simpler, it's faster to directly filter the the vector using the weightedqLPCoefficients */ word16_t filteredAdaptativeCodebookVector[NB_LSP_COEFF+L_SUBFRAME]; /* in Q0, the first NB_LSP_COEFF words are set to zero and used by filter only */ memset(filteredAdaptativeCodebookVector, 0, NB_LSP_COEFF*sizeof(word16_t)); synthesisFilter(&(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex]), &(weightedqLPCoefficients[LPCoefficientsIndex]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF])); word64_t gainQuantizationXy, gainQuantizationYy; /* used to store in Q0 values reused in gain quantization */ word16_t adaptativeCodebookGain = computeAdaptativeCodebookGain(&(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), &gainQuantizationXy, &gainQuantizationYy); /* gain in Q14 */ /* increase parameters index and compute P0 if needed */ parametersIndex++; if (subframeIndex==0) { /* first subframe compute P0, the parity bit of P1 */ parameters[parametersIndex] = computeParity(parameters[parametersIndex-1]); parametersIndex++; } /*** Fixed Codebook Search : compute the parameters for fixed codebook and the regular and convolved version of the fixed codebook vector ***/ word16_t fixedCodebookVector[L_SUBFRAME]; /* in Q13 */ word16_t convolvedFixedCodebookVector[L_SUBFRAME]; /* in Q12 */ fixedCodebookSearch(&(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(impulseResponseBuffer[NB_LSP_COEFF]), intPitchDelay, encoderChannelContext->lastQuantizedAdaptativeCodebookGain, &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), adaptativeCodebookGain, &(parameters[parametersIndex]), &(parameters[parametersIndex+1]), fixedCodebookVector, convolvedFixedCodebookVector); parametersIndex+=2; /*** gains Quantization ***/ word16_t quantizedAdaptativeCodebookGain; /* in Q14 */ word16_t quantizedFixedCodebookGain; /* in Q1 */ gainQuantization(encoderChannelContext, &(encoderChannelContext->targetSignal[NB_LSP_COEFF]), &(filteredAdaptativeCodebookVector[NB_LSP_COEFF]), convolvedFixedCodebookVector, fixedCodebookVector, gainQuantizationXy, gainQuantizationYy, &quantizedAdaptativeCodebookGain, &quantizedFixedCodebookGain, &(parameters[parametersIndex]), &(parameters[parametersIndex+1])); parametersIndex+=2; /*** subframe basis indexes and memory updates ***/ LPCoefficientsIndex+= NB_LSP_COEFF; encoderChannelContext->lastQuantizedAdaptativeCodebookGain = quantizedAdaptativeCodebookGain; if (encoderChannelContext->lastQuantizedAdaptativeCodebookGain>ONE_POINT_2_IN_Q14) encoderChannelContext->lastQuantizedAdaptativeCodebookGain = ONE_POINT_2_IN_Q14; if (encoderChannelContext->lastQuantizedAdaptativeCodebookGain<O2_IN_Q14) encoderChannelContext->lastQuantizedAdaptativeCodebookGain = O2_IN_Q14; /* compute excitation for current subframe as in spec A.3.10 */ /* excitationVector[L_PAST_EXCITATION + subframeIndex] currently contains in Q0 the adaptative codebook vector, quantizedAdaptativeCodebookGain in Q14 */ /* fixedCodebookVector in Q13, quantizedFixedCodebookGain in Q1 */ for (i=0; i<L_SUBFRAME; i++) { encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex + i] = (word16_t)(SATURATE(PSHR(ADD32(MULT16_16(encoderChannelContext->excitationVector[L_PAST_EXCITATION + subframeIndex + i], quantizedAdaptativeCodebookGain), MULT16_16(fixedCodebookVector[i], quantizedFixedCodebookGain)), 14), MAXINT16)); /* result in Q0 */ } /* update targetSignal memory as in spec A.3.10 */ quantizedAdaptativeCodebookGain = PSHR(quantizedAdaptativeCodebookGain, 1); /* quantizedAdaptativeCodebookGain in Q13 */ for (i=0; i<NB_LSP_COEFF; i++) { /* targetSignal[i] = targetSignal[L_SUBFRAME+i] - quantizedAdaptativeCodebookGain*filteredAdaptativeCodebookVector[L_SUBFRAME+i] - quantizedFixedCodebookGain*convolvedFixedCodebookVector[L_SUBFRAME-NB_LSP_COEFF+i]*/ word32_t acc = MAC16_16(MULT16_16(quantizedAdaptativeCodebookGain, filteredAdaptativeCodebookVector[L_SUBFRAME+i]), quantizedFixedCodebookGain, convolvedFixedCodebookVector[L_SUBFRAME-NB_LSP_COEFF+i]); /* acc in Q13 */ encoderChannelContext->targetSignal[i] = (word16_t)(SATURATE(SUB32(encoderChannelContext->targetSignal[L_SUBFRAME+i], PSHR(acc, 13)), MAXINT16)); } } /*****************************************************************************************/ /*** frame basis memory updates ***/ /* shift left by L_FRAME the signal buffer */ memmove(encoderChannelContext->signalBuffer, &(encoderChannelContext->signalBuffer[L_FRAME]), (L_LP_ANALYSIS_WINDOW-L_FRAME)*sizeof(word16_t)); /* update previousLSP coefficient buffer */ memcpy(encoderChannelContext->previousLSPCoefficients, LSPCoefficients, NB_LSP_COEFF*sizeof(word16_t)); memcpy(encoderChannelContext->previousqLSPCoefficients, qLSPCoefficients, NB_LSP_COEFF*sizeof(word16_t)); /* shift left by L_FRAME the weightedInputSignal buffer */ memmove(encoderChannelContext->weightedInputSignal, &(encoderChannelContext->weightedInputSignal[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t)); /* shift left by L_FRAME the excitationVector */ memmove(encoderChannelContext->excitationVector, &(encoderChannelContext->excitationVector[L_FRAME]), L_PAST_EXCITATION*sizeof(word16_t)); /*** Convert array of parameters into bitStream ***/ parametersArray2BitStream(parameters, bitStream); return; }
/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */ static spx_word32_t pitch_gain_search_3tap( const spx_word16_t target[], /* Target vector */ const spx_coef_t ak[], /* LPCs for this subframe */ const spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ const spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const signed char* gain_cdbk, int gain_cdbk_size, int pitch, /* Pitch value */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits* bits, char* stack, const spx_word16_t* exc2, const spx_word16_t* r, spx_word16_t* new_target, int* cdbk_index, int plc_tuning, spx_word32_t cumul_gain, int scaledown ) { int i, j; VARDECL(spx_word16_t * tmp1); VARDECL(spx_word16_t * e); spx_word16_t* x[3]; spx_word32_t corr[3]; spx_word32_t A[3][3]; spx_word16_t gain[3]; spx_word32_t err; spx_word16_t max_gain = 128; int best_cdbk = 0; ALLOC(tmp1, 3 * nsf, spx_word16_t); ALLOC(e, nsf, spx_word16_t); if (cumul_gain > 262144) max_gain = 31; x[0] = tmp1; x[1] = tmp1 + nsf; x[2] = tmp1 + 2 * nsf; for (j = 0; j < nsf; j++) new_target[j] = target[j]; { VARDECL(spx_mem_t * mm); int pp = pitch - 1; ALLOC(mm, p, spx_mem_t); for (j = 0; j < nsf; j++) { if (j - pp < 0) e[j] = exc2[j - pp]; else if (j - pp - pitch < 0) e[j] = exc2[j - pp - pitch]; else e[j] = 0; } #ifdef FIXED_POINT /* Scale target and excitation down if needed (avoiding overflow) */ if (scaledown) { for (j = 0; j < nsf; j++) e[j] = SHR16(e[j], 1); for (j = 0; j < nsf; j++) new_target[j] = SHR16(new_target[j], 1); } #endif for (j = 0; j < p; j++) mm[j] = 0; iir_mem16(e, ak, e, nsf, p, mm, stack); for (j = 0; j < p; j++) mm[j] = 0; filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack); for (j = 0; j < nsf; j++) x[2][j] = e[j]; } for (i = 1; i >= 0; i--) { spx_word16_t e0 = exc2[-pitch - 1 + i]; #ifdef FIXED_POINT /* Scale excitation down if needed (avoiding overflow) */ if (scaledown) e0 = SHR16(e0, 1); #endif x[i][0] = MULT16_16_Q14(r[0], e0); for (j = 0; j < nsf - 1; j++) x[i][j + 1] = ADD32(x[i + 1][j], MULT16_16_P14(r[j + 1], e0)); } for (i = 0; i < 3; i++) corr[i] = inner_prod(x[i], new_target, nsf); for (i = 0; i < 3; i++) for (j = 0; j <= i; j++) A[i][j] = A[j][i] = inner_prod(x[i], x[j], nsf); { spx_word32_t C[9]; #ifdef FIXED_POINT spx_word16_t C16[9]; #else spx_word16_t* C16 = C; #endif C[0] = corr[2]; C[1] = corr[1]; C[2] = corr[0]; C[3] = A[1][2]; C[4] = A[0][1]; C[5] = A[0][2]; C[6] = A[2][2]; C[7] = A[1][1]; C[8] = A[0][0]; /*plc_tuning *= 2;*/ if (plc_tuning < 2) plc_tuning = 2; if (plc_tuning > 30) plc_tuning = 30; #ifdef FIXED_POINT C[0] = SHL32(C[0], 1); C[1] = SHL32(C[1], 1); C[2] = SHL32(C[2], 1); C[3] = SHL32(C[3], 1); C[4] = SHL32(C[4], 1); C[5] = SHL32(C[5], 1); C[6] = MAC16_32_Q15(C[6], MULT16_16_16(plc_tuning, 655), C[6]); C[7] = MAC16_32_Q15(C[7], MULT16_16_16(plc_tuning, 655), C[7]); C[8] = MAC16_32_Q15(C[8], MULT16_16_16(plc_tuning, 655), C[8]); normalize16(C, C16, 32767, 9); #else C[6] *= .5 * (1 + .02 * plc_tuning); C[7] *= .5 * (1 + .02 * plc_tuning); C[8] *= .5 * (1 + .02 * plc_tuning); #endif best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain); #ifdef FIXED_POINT gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4]); gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 1]); gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[best_cdbk * 4 + 2]); /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/ #else gain[0] = 0.015625 * gain_cdbk[best_cdbk * 4] + .5; gain[1] = 0.015625 * gain_cdbk[best_cdbk * 4 + 1] + .5; gain[2] = 0.015625 * gain_cdbk[best_cdbk * 4 + 2] + .5; #endif *cdbk_index = best_cdbk; } SPEEX_MEMSET(exc, 0, nsf); for (i = 0; i < 3; i++) { int j; int tmp1, tmp3; int pp = pitch + 1 - i; tmp1 = nsf; if (tmp1 > pp) tmp1 = pp; for (j = 0; j < tmp1; j++) exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp]); tmp3 = nsf; if (tmp3 > pp + pitch) tmp3 = pp + pitch; for (j = tmp1; j < tmp3; j++) exc[j] = MAC16_16(exc[j], SHL16(gain[2 - i], 7), exc2[j - pp - pitch]); } for (i = 0; i < nsf; i++) { spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0], x[2][i]), MULT16_16(gain[1], x[1][i])), MULT16_16(gain[2], x[0][i])); new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp, 6))); } err = inner_prod(new_target, new_target, nsf); return err; }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,k,m,n,q; VARDECL(spx_word16_t *resp); #ifdef _USE_SSE VARDECL(__m128 *resp2); VARDECL(__m128 *E); #else spx_word16_t *resp2; VARDECL(spx_word32_t *E); #endif VARDECL(spx_word16_t *t); VARDECL(spx_sig_t *e); VARDECL(spx_word16_t *tmp); VARDECL(spx_word32_t *ndist); VARDECL(spx_word32_t *odist); VARDECL(int *itmp); VARDECL(spx_word16_t **ot2); VARDECL(spx_word16_t **nt2); spx_word16_t **ot, **nt; VARDECL(int **nind); VARDECL(int **oind); VARDECL(int *ind); const signed char *shape_cb; int shape_cb_size, subvect_size, nb_subvect; const split_cb_params *params; int N=2; VARDECL(int *best_index); VARDECL(spx_word32_t *best_dist); VARDECL(int *best_nind); VARDECL(int *best_ntarget); int have_sign; N=complexity; if (N>10) N=10; /* Complexity isn't as important for the codebooks as it is for the pitch */ N=(2*N)/3; if (N<1) N=1; if (N==1) { split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target); return; } ALLOC(ot2, N, spx_word16_t*); ALLOC(nt2, N, spx_word16_t*); ALLOC(oind, N, int*); ALLOC(nind, N, int*); params = (const split_cb_params *) par; subvect_size = params->subvect_size; nb_subvect = params->nb_subvect; shape_cb_size = 1<<params->shape_bits; shape_cb = params->shape_cb; have_sign = params->have_sign; ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t); #ifdef _USE_SSE ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128); ALLOC(E, shape_cb_size>>2, __m128); #else resp2 = resp; ALLOC(E, shape_cb_size, spx_word32_t); #endif ALLOC(t, nsf, spx_word16_t); ALLOC(e, nsf, spx_sig_t); ALLOC(ind, nb_subvect, int); ALLOC(tmp, 2*N*nsf, spx_word16_t); for (i=0;i<N;i++) { ot2[i]=tmp+2*i*nsf; nt2[i]=tmp+(2*i+1)*nsf; } ot=ot2; nt=nt2; ALLOC(best_index, N, int); ALLOC(best_dist, N, spx_word32_t); ALLOC(best_nind, N, int); ALLOC(best_ntarget, N, int); ALLOC(ndist, N, spx_word32_t); ALLOC(odist, N, spx_word32_t); ALLOC(itmp, 2*N*nb_subvect, int); for (i=0;i<N;i++) { nind[i]=itmp+2*i*nb_subvect; oind[i]=itmp+(2*i+1)*nb_subvect; } SPEEX_COPY(t, target, nsf); for (j=0;j<N;j++) SPEEX_COPY(&ot[j][0], t, nsf); /* Pre-compute codewords response and energy */ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack); for (j=0;j<N;j++) odist[j]=0; /*For all subvectors*/ for (i=0;i<nb_subvect;i++) { /*"erase" nbest list*/ for (j=0;j<N;j++) ndist[j]=VERY_LARGE32; /* This is not strictly necessary, but it provides an additonal safety to prevent crashes in case something goes wrong in the previous steps (e.g. NaNs) */ for (j=0;j<N;j++) best_nind[j] = best_ntarget[j] = 0; /*For all n-bests of previous subvector*/ for (j=0;j<N;j++) { spx_word16_t *x=ot[j]+subvect_size*i; spx_word32_t tener = 0; for (m=0;m<subvect_size;m++) tener = MAC16_16(tener, x[m],x[m]); #ifdef FIXED_POINT tener = SHR32(tener,1); #else tener *= .5; #endif /*Find new n-best based on previous n-best j*/ #ifndef DISABLE_WIDEBAND if (have_sign) vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); else #endif /* DISABLE_WIDEBAND */ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack); /*For all new n-bests*/ for (k=0;k<N;k++) { /* Compute total distance (including previous sub-vectors */ spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener); /*update n-best list*/ if (err<ndist[N-1]) { for (m=0;m<N;m++) { if (err < ndist[m]) { for (n=N-1;n>m;n--) { ndist[n] = ndist[n-1]; best_nind[n] = best_nind[n-1]; best_ntarget[n] = best_ntarget[n-1]; } /* n is equal to m here, so they're interchangeable */ ndist[m] = err; best_nind[n] = best_index[k]; best_ntarget[n] = j; break; } } } } if (i==0) break; } for (j=0;j<N;j++) { /*previous target (we don't care what happened before*/ for (m=(i+1)*subvect_size;m<nsf;m++) nt[j][m]=ot[best_ntarget[j]][m]; /* New code: update the rest of the target only if it's worth it */ for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_nind[j]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; #ifdef FIXED_POINT g=sign*shape_cb[rind*subvect_size+m]; #else g=sign*0.03125*shape_cb[rind*subvect_size+m]; #endif target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } for (q=0;q<nb_subvect;q++) nind[j][q]=oind[best_ntarget[j]][q]; nind[j][i]=best_nind[j]; } /*update old-new data*/ /* just swap pointers instead of a long copy */ { spx_word16_t **tmp2; tmp2=ot; ot=nt; nt=tmp2; } for (j=0;j<N;j++) for (m=0;m<nb_subvect;m++) oind[j][m]=nind[j][m]; for (j=0;j<N;j++) odist[j]=ndist[j]; } /*save indices*/ for (i=0;i<nb_subvect;i++) { ind[i]=nind[0][i]; speex_bits_pack(bits,ind[i],params->shape_bits+have_sign); } /* Put everything back together */ for (i=0;i<nb_subvect;i++) { int rind; spx_word16_t sign=1; rind = ind[i]; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } #ifdef FIXED_POINT if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } #else for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j]; #endif } /* Update excitation */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); /* Update target: only update target if necessary */ if (update_target) { VARDECL(spx_word16_t *r2); ALLOC(r2, nsf, spx_word16_t); for (j=0;j<nsf;j++) r2[j] = EXTRACT16(PSHR32(e[j] ,6)); syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack); for (j=0;j<nsf;j++) target[j]=SUB16(target[j],PSHR16(r2[j],2)); } }
void split_cb_search_shape_sign( spx_word16_t target[], /* target vector */ spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs for this subframe */ spx_coef_t awk2[], /* Weighted LPCs for this subframe */ const void *par, /* Codebook/search parameters*/ int p, /* number of LPC coeffs */ int nsf, /* number of samples in subframe */ spx_sig_t *exc, spx_word16_t *r, SpeexBits *bits, char *stack, int complexity, int update_target ) { int i,j,m,q; const signed char *shape_cb; int shape_cb_size = 32, subvect_size = 10; int best_index; spx_word32_t best_dist; spx_word16_t resp[320]; spx_word16_t *resp2 = resp; spx_word32_t E[32]; spx_word16_t t[40]; spx_sig_t e[40]; shape_cb=exc_10_32_table; /* FIXME: Do we still need to copy the target? */ SPEEX_COPY(t, target, nsf); //compute_weighted_codebook { int i, k; spx_word16_t shape[10]; for (i=0;i<shape_cb_size;i++) { spx_word16_t *res; res = resp+i*subvect_size; for (k=0;k<subvect_size;k++) shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k]; E[i]=0; /* Compute codeword response using convolution with impulse response */ { spx_word32_t resj; spx_word16_t res16; // 0 resj = MULT16_16(shape[0],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[0] = res16; //++++++++++++++++++++++++++ // 1 resj = MULT16_16(shape[0],r[1]); resj = MAC16_16(resj,shape[1],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[1] = res16; //++++++++++++++++++++++++++ // 2 resj = MULT16_16(shape[0],r[2]); resj = MAC16_16(resj,shape[1],r[1]); resj = MAC16_16(resj,shape[2],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[2] = res16; //++++++++++++++++++++++++++ // 3 resj = MULT16_16(shape[0],r[3]); resj = MAC16_16(resj,shape[1],r[2]); resj = MAC16_16(resj,shape[2],r[1]); resj = MAC16_16(resj,shape[3],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[3] = res16; //++++++++++++++++++++++++++ // 4 resj = MULT16_16(shape[0],r[4]); resj = MAC16_16(resj,shape[1],r[3]); resj = MAC16_16(resj,shape[2],r[2]); resj = MAC16_16(resj,shape[3],r[1]); resj = MAC16_16(resj,shape[4],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[4] = res16; //++++++++++++++++++++++++++ // 5 resj = MULT16_16(shape[0],r[5]); resj = MAC16_16(resj,shape[1],r[4]); resj = MAC16_16(resj,shape[2],r[3]); resj = MAC16_16(resj,shape[3],r[2]); resj = MAC16_16(resj,shape[4],r[1]); resj = MAC16_16(resj,shape[5],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[5] = res16; //++++++++++++++++++++++++++ // 6 resj = MULT16_16(shape[0],r[6]); resj = MAC16_16(resj,shape[1],r[5]); resj = MAC16_16(resj,shape[2],r[4]); resj = MAC16_16(resj,shape[3],r[3]); resj = MAC16_16(resj,shape[4],r[2]); resj = MAC16_16(resj,shape[5],r[1]); resj = MAC16_16(resj,shape[6],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[6] = res16; //++++++++++++++++++++++++++ // 7 resj = MULT16_16(shape[0],r[7]); resj = MAC16_16(resj,shape[1],r[6]); resj = MAC16_16(resj,shape[2],r[5]); resj = MAC16_16(resj,shape[3],r[4]); resj = MAC16_16(resj,shape[4],r[3]); resj = MAC16_16(resj,shape[5],r[2]); resj = MAC16_16(resj,shape[6],r[1]); resj = MAC16_16(resj,shape[7],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[7] = res16; //++++++++++++++++++++++++++ // 8 resj = MULT16_16(shape[0],r[8]); resj = MAC16_16(resj,shape[1],r[7]); resj = MAC16_16(resj,shape[2],r[6]); resj = MAC16_16(resj,shape[3],r[5]); resj = MAC16_16(resj,shape[4],r[4]); resj = MAC16_16(resj,shape[5],r[3]); resj = MAC16_16(resj,shape[6],r[2]); resj = MAC16_16(resj,shape[7],r[1]); resj = MAC16_16(resj,shape[8],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[8] = res16; //++++++++++++++++++++++++++ // 9 resj = MULT16_16(shape[0],r[9]); resj = MAC16_16(resj,shape[1],r[8]); resj = MAC16_16(resj,shape[2],r[7]); resj = MAC16_16(resj,shape[3],r[6]); resj = MAC16_16(resj,shape[4],r[5]); resj = MAC16_16(resj,shape[5],r[4]); resj = MAC16_16(resj,shape[6],r[3]); resj = MAC16_16(resj,shape[7],r[2]); resj = MAC16_16(resj,shape[8],r[1]); resj = MAC16_16(resj,shape[9],r[0]); res16 = EXTRACT16(SHR32(resj, 13)); // Compute codeword energy E[i]=MAC16_16(E[i],res16,res16); res[9] = res16; //++++++++++++++++++++++++++ } } } for (i=0;i<4;i++) { spx_word16_t *x=t+subvect_size*i; /*Find new n-best based on previous n-best j*/ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack); speex_bits_pack(bits,best_index,5); { int rind; spx_word16_t *res; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } res = resp+rind*subvect_size; if (sign>0) for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]); else for (m=0;m<subvect_size;m++) t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]); if (sign==1) { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5); } else { for (j=0;j<subvect_size;j++) e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5)); } } for (m=0;m<subvect_size;m++) { spx_word16_t g; int rind; spx_word16_t sign=1; rind = best_index; if (rind>=shape_cb_size) { sign=-1; rind-=shape_cb_size; } q=subvect_size-m; g=sign*shape_cb[rind*subvect_size+m]; target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1)); } } /* Update excitation */ /* FIXME: We could update the excitation directly above */ for (j=0;j<nsf;j++) exc[j]=ADD32(exc[j],e[j]); }
void decodeAdaptativeCodeVector(bcg729DecoderChannelContextStruct *decoderChannelContext, int subFrameIndex, uint16_t adaptativeCodebookIndex, uint8_t parityFlag, uint8_t frameErasureFlag, int16_t *intPitchDelay, word16_t *excitationVector) { int16_t fracPitchDelay; /*** Compute the Pitch Delay from the Codebook index ***/ /* fracPitchDelay is computed in the range -1,0,1 */ if (subFrameIndex == 0 ) { /* first subframe */ if (parityFlag|frameErasureFlag) { /* there is an error (either parity or frame erased) */ *intPitchDelay = decoderChannelContext->previousIntPitchDelay; /* set the integer part of Pitch Delay to the last second subframe Pitch Delay computed spec: 4.1.2 */ /* Note: unable to find anything regarding this part in the spec, just copied it from the ITU source code */ fracPitchDelay = 0; decoderChannelContext->previousIntPitchDelay++; if (decoderChannelContext->previousIntPitchDelay>MAXIMUM_INT_PITCH_DELAY) decoderChannelContext->previousIntPitchDelay=MAXIMUM_INT_PITCH_DELAY; } else { /* parity and frameErasure flags are off, do the normal computation (doc 4.1.3) */ if (adaptativeCodebookIndex<197) { /* *intPitchDelay = (P1 + 2 )/ 3 + 19 */ *intPitchDelay = ADD16(MULT16_16_Q15(ADD16(adaptativeCodebookIndex,2), 10923), 19); /* MULT in Q15: 1/3 in Q15: 10923 */ /* fracPitchDelay = P1 − 3*intPitchDelay + 58 : fracPitchDelay in -1, 0, 1 */ fracPitchDelay = ADD16(SUB16(adaptativeCodebookIndex, MULT16_16(*intPitchDelay, 3)), 58); } else {/* adaptativeCodebookIndex>= 197 */ *intPitchDelay = SUB16(adaptativeCodebookIndex, 112); fracPitchDelay = 0; } /* backup the intPitchDelay */ decoderChannelContext->previousIntPitchDelay = *intPitchDelay; } } else { /* second subframe */ if (frameErasureFlag) { /* there is an error : frame erased, in case of parity error, it has been taken in account at first subframe */ /* unable to find anything regarding this part in the spec, just copied it from the ITU source code */ *intPitchDelay = decoderChannelContext->previousIntPitchDelay; fracPitchDelay = 0; decoderChannelContext->previousIntPitchDelay++; if (decoderChannelContext->previousIntPitchDelay>MAXIMUM_INT_PITCH_DELAY) decoderChannelContext->previousIntPitchDelay=MAXIMUM_INT_PITCH_DELAY; } else { /* frameErasure flags are off, do the normal computation (doc 4.1.3) */ int16_t tMin = SUB16(*intPitchDelay,5); /* intPitchDelay contains the intPitch computed for subframe one */ if (tMin<20) { tMin = 20; } if (tMin>134) { tMin = 134; } /* intPitchDelay = (P2 + 2 )/ 3 − 1 */ *intPitchDelay = SUB16(MULT16_16_Q15(ADD16(adaptativeCodebookIndex, 2), 10923), 1); /* fracPitchDelay = P2 − 2 − 3((P 2 + 2 )/ 3 − 1) */ fracPitchDelay = SUB16(SUB16(adaptativeCodebookIndex, MULT16_16(*intPitchDelay, 3)), 2); /* *intPitchDelay = (P2 + 2 )/ 3 − 1 + tMin */ *intPitchDelay = ADD16(*intPitchDelay,tMin); /* backup the intPitchDelay */ decoderChannelContext->previousIntPitchDelay = *intPitchDelay; } } /* now compute the adaptative codebook vector using the pitch delay we just get and the past excitation vector */ /* from spec 4.1.3 and 3.7.1 */ /* shall compute v(n ) = ∑ u (n - k + i )b30 (t + 3i ) + ∑ u (n - k + 1 + i )b30 (3 - t + 3i ) for i=0,...,9 and n = 0,...,39 (t in 0, 1, 2) */ /* with k = intPitchDelay and t = fracPitchDelay wich must be converted from range -1,0,1 to 0,1,2 */ /* u the past excitation vector */ /* v the adaptative codebook vector */ /* b30 an interpolation filter */ word16_t *excitationVectorMinusK; /* pointer to u(-k) */ /* scale fracPichDelay from -1,0.1 to 0,1,2 */ if (fracPitchDelay==1) { excitationVectorMinusK = &(excitationVector[-(*intPitchDelay+1)]); /* fracPitchDelay being positive -> increase by one the integer part and set to 2 the fractional part : -(k+1/3) -> -(k+1)+2/3 */ fracPitchDelay = 2; } else { fracPitchDelay = -fracPitchDelay; /* 0 unchanged, -1 -> +1 */ excitationVectorMinusK = &(excitationVector[-(*intPitchDelay)]); /* -(k-1/3) -> -k+1/3 or -(k) -> -k*/ } int n; for (n=0; n<L_SUBFRAME; n++) { /* loop over the whole subframe */ word16_t *excitationVectorNMinusK = &(excitationVectorMinusK[n]); /* point to u(n-k), unscaled value, full range */ word16_t *excitationVectorNMinusKPlusOne = &(excitationVectorMinusK[n+1]); /* point to u(n-k+1), unscaled value, full range */ word16_t *b301 = &(b30[fracPitchDelay]); /* point to b30(t) in Q0.15 : sums of all b30 coeffs is < 2, no overflow possible on 32 bits */ word16_t *b302 = &(b30[3-fracPitchDelay]); /* point to b30(3-t) in Q0.15*/ int i,j; /* j will store 3i */ word32_t acc = 0; /* in Q15 */ for (i=0, j=0; i<10; i++, j+=3) { acc = MAC16_16(acc, excitationVectorNMinusK[-i], b301[j]); /* Note : the spec says: u(n−k+i)b30(t+3i) but the ITU code do (and here too) u(n-k-i )b30(t+3i) */ acc = MAC16_16(acc, excitationVectorNMinusKPlusOne[i], b302[j]); /* u(n-k+1+i)b30(3-t+3i) */ } excitationVector[n] = SATURATE(PSHR(acc, 15), MAXINT16); /* acc in Q15, shift/round to unscaled value and check overflow on 16 bits */ } return; }
/* HINT: If you had a 40-bit accumulator, you could shift only at the end */ sum = ADD32(sum, SHR32(part, 6)); } return sum; } #endif #ifndef OVERRIDE_PITCH_XCORR #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */ void pitch_xcorr(const spx_word16_t* _x, const spx_word16_t* _y, spx_word32_t* corr, int len, int nb_pitch, char* stack) { int i, j; for (i = 0; i < nb_pitch; i += 4) { /* Compute correlation*/ /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/ spx_word32_t sum1 = 0; spx_word32_t sum2 = 0; spx_word32_t sum3 = 0; spx_word32_t sum4 = 0; const spx_word16_t* y = _y + i; const spx_word16_t* x = _x; spx_word16_t y0, y1, y2, y3; /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/ y0 = *y++; y1 = *y++; y2 = *y++; y3 = *y++; for (j = 0; j < len; j += 4) { spx_word32_t part1; spx_word32_t part2; spx_word32_t part3; spx_word32_t part4; part1 = MULT16_16(*x, y0); part2 = MULT16_16(*x, y1); part3 = MULT16_16(*x, y2); part4 = MULT16_16(*x, y3); x++; y0 = *y++; part1 = MAC16_16(part1, *x, y1); part2 = MAC16_16(part2, *x, y2); part3 = MAC16_16(part3, *x, y3); part4 = MAC16_16(part4, *x, y0); x++; y1 = *y++; part1 = MAC16_16(part1, *x, y2); part2 = MAC16_16(part2, *x, y3); part3 = MAC16_16(part3, *x, y0); part4 = MAC16_16(part4, *x, y1); x++; y2 = *y++; part1 = MAC16_16(part1, *x, y3); part2 = MAC16_16(part2, *x, y0); part3 = MAC16_16(part3, *x, y1); part4 = MAC16_16(part4, *x, y2); x++; y3 = *y++; sum1 = ADD32(sum1, SHR32(part1, 6)); sum2 = ADD32(sum2, SHR32(part2, 6)); sum3 = ADD32(sum3, SHR32(part3, 6)); sum4 = ADD32(sum4, SHR32(part4, 6)); } corr[nb_pitch - 1 - i] = sum1; corr[nb_pitch - 2 - i] = sum2; corr[nb_pitch - 3 - i] = sum3; corr[nb_pitch - 4 - i] = sum4; } }
void pitch_unquant_3tap( spx_word16_t exc[], /* Input excitation */ spx_word32_t exc_out[], /* Output excitation */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ const void* par, int nsf, /* Number of samples in subframe */ int* pitch_val, spx_word16_t* gain_val, SpeexBits* bits, char* stack, int count_lost, int subframe_offset, spx_word16_t last_pitch_gain, int cdbk_offset ) { int i; int pitch; int gain_index; spx_word16_t gain[3]; const signed char* gain_cdbk; int gain_cdbk_size; const ltp_params* params; params = (const ltp_params*) par; gain_cdbk_size = 1 << params->gain_bits; gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset; pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits); pitch += start; gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits); /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/ #ifdef FIXED_POINT gain[0] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4]); gain[1] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 1]); gain[2] = ADD16(32, (spx_word16_t)gain_cdbk[gain_index * 4 + 2]); #else gain[0] = 0.015625 * gain_cdbk[gain_index * 4] + .5; gain[1] = 0.015625 * gain_cdbk[gain_index * 4 + 1] + .5; gain[2] = 0.015625 * gain_cdbk[gain_index * 4 + 2] + .5; #endif if (count_lost && pitch > subframe_offset) { spx_word16_t gain_sum; if (1) { #ifdef FIXED_POINT spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain, 1); if (tmp > 62) tmp = 62; #else spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain; if (tmp > .95) tmp = .95; #endif gain_sum = gain_3tap_to_1tap(gain); if (gain_sum > tmp) { spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp), 14), gain_sum); for (i = 0; i < 3; i++) gain[i] = MULT16_16_Q14(fact, gain[i]); } } } *pitch_val = pitch; gain_val[0] = gain[0]; gain_val[1] = gain[1]; gain_val[2] = gain[2]; gain[0] = SHL16(gain[0], 7); gain[1] = SHL16(gain[1], 7); gain[2] = SHL16(gain[2], 7); SPEEX_MEMSET(exc_out, 0, nsf); for (i = 0; i < 3; i++) { int j; int tmp1, tmp3; int pp = pitch + 1 - i; tmp1 = nsf; if (tmp1 > pp) tmp1 = pp; for (j = 0; j < tmp1; j++) exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp]); tmp3 = nsf; if (tmp3 > pp + pitch) tmp3 = pp + pitch; for (j = tmp1; j < tmp3; j++) exc_out[j] = MAC16_16(exc_out[j], gain[2 - i], exc[j - pp - pitch]); } /*for (i=0;i<nsf;i++) exc[i]=PSHR32(exc32[i],13);*/ }