void SynthesisFilter( short *output, short *input, short *coef, short *memory, short order, short length ) { register short i, j; long acc; /* iir filter for each subframe */ for (i = 0; i < length; i++) { acc = L_deposit_h(*input++); acc = L_shr(acc, 3); for (j = order - 1; j > 0; j--) { /* acc = L_sub(acc, L_mult(memory[j], coef[j])); */ acc = L_msu(acc, memory[j], coef[j]); memory[j] = memory[j - 1]; } /* acc = L_sub(acc, L_mult(memory[0], coef[0])); */ acc = L_msu(acc, memory[0], coef[0]); acc = L_shl(acc, 3); *output++ = round(acc); memory[0] = round(acc); } }
static void Get_lsp_pol(int16_t *lsp, int32_t *f) { int16_t i,j, hi, lo; int32_t t0; /* All computation in Q24 */ *f = L_mult(4096, 2048); /* f[0] = 1.0; in Q24 */ f++; *f = L_msu((int32_t)0, *lsp, 512); /* f[1] = -2.0 * lsp[0]; in Q24 */ f++; lsp += 2; /* Advance lsp pointer */ for(i=2; i<=5; i++) { *f = f[-2]; for(j=1; j<i; j++, f--) { WebRtcG729fix_L_Extract(f[-1] ,&hi, &lo); t0 = WebRtcG729fix_Mpy_32_16(hi, lo, *lsp); /* t0 = f[-1] * lsp */ t0 = L_shl(t0, 1); *f = WebRtcSpl_AddSatW32(*f, f[-2]); /* *f += f[-2] */ *f = WebRtcSpl_SubSatW32(*f, t0); /* *f -= t0 */ } *f = L_msu(*f, *lsp, 512); /* *f -= lsp<<9 */ f += i; /* Advance f pointer */ lsp += 2; /* Advance lsp pointer */ } }
static void Get_lsp_pol(Word16 *lsp, Word32 *f) { Word16 i,j, hi, lo; Word32 t0; /* All computation in Q24 */ *f = L_mult(4096, 2048); /* f[0] = 1.0; in Q24 */ f++; *f = L_msu((Word32)0, *lsp, 512); /* f[1] = -2.0 * lsp[0]; in Q24 */ f++; lsp += 2; /* Advance lsp pointer */ for(i=2; i<=5; i++) { *f = f[-2]; for(j=1; j<i; j++, f--) { L_Extract(f[-1] ,&hi, &lo); t0 = Mpy_32_16(hi, lo, *lsp); /* t0 = f[-1] * lsp */ t0 = L_shl(t0, 1); *f = L_add(*f, f[-2]); /* *f += f[-2] */ *f = L_sub(*f, t0); /* *f -= t0 */ } *f = L_msu(*f, *lsp, 512); /* *f -= lsp<<9 */ f += i; /* Advance f pointer */ lsp += 2; /* Advance lsp pointer */ } return; }
/* * The decimation-in-time complex FFT is implemented below. * The input complex numbers are presented as real part followed by * imaginary part for each sample. The counters are therefore * incremented by two to access the complex valued samples. */ void r_fft(Word16 * farray_ptr, Flag *pOverflow) { Word16 ftmp1_real; Word16 ftmp1_imag; Word16 ftmp2_real; Word16 ftmp2_imag; Word32 Lftmp1_real; Word32 Lftmp1_imag; Word16 i; Word16 j; Word32 Ltmp1; /* Perform the complex FFT */ c_fft(farray_ptr, pOverflow); /* First, handle the DC and foldover frequencies */ ftmp1_real = *farray_ptr; ftmp2_real = *(farray_ptr + 1); *farray_ptr = add(ftmp1_real, ftmp2_real, pOverflow); *(farray_ptr + 1) = sub(ftmp1_real, ftmp2_real, pOverflow); /* Now, handle the remaining positive frequencies */ for (i = 2, j = SIZE - i; i <= SIZE_BY_TWO; i = i + 2, j = SIZE - i) { ftmp1_real = add(*(farray_ptr + i), *(farray_ptr + j), pOverflow); ftmp1_imag = sub(*(farray_ptr + i + 1), *(farray_ptr + j + 1), pOverflow); ftmp2_real = add(*(farray_ptr + i + 1), *(farray_ptr + j + 1), pOverflow); ftmp2_imag = sub(*(farray_ptr + j), *(farray_ptr + i), pOverflow); Lftmp1_real = L_deposit_h(ftmp1_real); Lftmp1_imag = L_deposit_h(ftmp1_imag); Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[i], pOverflow); Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[i + 1], pOverflow); *(farray_ptr + i) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow); Ltmp1 = L_mac(Lftmp1_imag, ftmp2_imag, phs_tbl[i], pOverflow); Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[i + 1], pOverflow); *(farray_ptr + i + 1) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow); Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[j], pOverflow); Ltmp1 = L_mac(Ltmp1, ftmp2_imag, phs_tbl[j + 1], pOverflow); *(farray_ptr + j) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow); Ltmp1 = L_negate(Lftmp1_imag); Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[j], pOverflow); Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[j + 1], pOverflow); *(farray_ptr + j + 1) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow); } } /* end r_fft () */
Word32 Inv_sqrt( /* (o) Q30 : output value (range: 0<=val<1) */ Word32 L_x /* (i) Q0 : input value (range: 0<=val<=7fffffff) */ ) { Word16 exp, i, a, tmp; Word32 L_y; if( L_x <= (Word32)0) return ( (Word32)0x3fffffffL); exp = norm_l(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalize */ exp = sub(30, exp); if( (exp & 1) == 0 ) /* If exponent even -> shift right */ L_x = L_shr(L_x, 1); exp = shr(exp, 1); exp = add(exp, 1); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 */ a = a & (Word16)0x7fff; i = sub(i, 16); L_y = L_deposit_h(tabsqr[i]); /* tabsqr[i] << 16 */ tmp = sub(tabsqr[i], tabsqr[i+1]); /* tabsqr[i] - tabsqr[i+1]) */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ L_y = L_shr(L_y, exp); /* denormalization */ return(L_y); }
void WebRtcG729fix_Log2( int32_t L_x, /* (i) Q0 : input value */ int16_t *exponent, /* (o) Q0 : Integer part of Log2. (range: 0<=val<=30) */ int16_t *fraction /* (o) Q15: Fractional part of Log2. (range: 0<=val<1) */ ) { int16_t exp, i, a, tmp; int32_t L_y; if( L_x <= (int32_t)0 ) { *exponent = 0; *fraction = 0; return; } exp = WebRtcSpl_NormW32(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalized */ *exponent = WebRtcSpl_SubSatW16(30, exp); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 of fraction */ a = a & (int16_t)0x7fff; i = WebRtcSpl_SubSatW16(i, 32); L_y = L_deposit_h(WebRtcG729fix_tablog[i]); /* tablog[i] << 16 */ tmp = WebRtcSpl_SubSatW16(WebRtcG729fix_tablog[i], WebRtcG729fix_tablog[i+1]); /* tablog[i] - tablog[i+1] */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ *fraction = extract_h( L_y); }
/* extract elementary LSP from composed LSP with previous LSP */ void Lsp_prev_extract( Word16 lsp[M], /* (i) Q13 : unquantized LSP parameters */ Word16 lsp_ele[M], /* (o) Q13 : target vector */ Word16 fg[MA_NP][M], /* (i) Q15 : MA prediction coef. */ Word16 freq_prev[MA_NP][M], /* (i) Q13 : previous LSP vector */ Word16 fg_sum_inv[M] /* (i) Q12 : inverse previous LSP vector */ ) { Word16 j, k; Word32 L_temp; /* Q19 */ Word16 temp; /* Q13 */ for ( j = 0 ; j < M ; j++ ) { L_temp = L_deposit_h(lsp[j]); for ( k = 0 ; k < MA_NP ; k++ ) L_temp = L_msu( L_temp, freq_prev[k][j], fg[k][j] ); temp = extract_h(L_temp); L_temp = L_mult( temp, fg_sum_inv[j] ); lsp_ele[j] = extract_h( L_shl( L_temp, 3 ) ); } return; }
int32_t WebRtcG729fix_Inv_sqrt( /* (o) Q30 : output value (range: 0<=val<1) */ int32_t L_x /* (i) Q0 : input value (range: 0<=val<=7fffffff) */ ) { int16_t exp, i, a, tmp; int32_t L_y; if( L_x <= (int32_t)0) return ( (int32_t)0x3fffffffL); exp = WebRtcSpl_NormW32(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalize */ exp = WebRtcSpl_SubSatW16(30, exp); if( (exp & 1) == 0 ) /* If exponent even -> shift right */ L_x = L_shr(L_x, 1); exp = shr(exp, 1); exp = WebRtcSpl_AddSatW16(exp, 1); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 */ a = a & (int16_t)0x7fff; i = WebRtcSpl_SubSatW16(i, 16); L_y = L_deposit_h(WebRtcG729fix_tabsqr[i]); /* tabsqr[i] << 16 */ tmp = WebRtcSpl_SubSatW16(WebRtcG729fix_tabsqr[i], WebRtcG729fix_tabsqr[i+1]); /* tabsqr[i] - tabsqr[i+1]) */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ L_y = L_shr(L_y, exp); /* denormalization */ return(L_y); }
void maxeloc(INT16 *maxloc, INT32 *maxener, INT16 *signal, INT16 dp, INT16 length, INT16 ewl) { INT32 ener; register int i; int tail, front; ener = 0; front = add(dp, ewl); tail = sub(dp, ewl); for (i = tail; i <= front; i++) ener = L_mac(ener, signal[i], signal[i]); *maxloc = 0; *maxener = ener; for (i = 1; i < length; i++) { front++; ener = L_msu(ener, signal[tail], signal[tail]); ener = L_mac(ener, signal[front], signal[front]); tail++; if (*maxener < ener) { *maxloc = i; *maxener = ener; } } *maxloc = add(*maxloc,dp); *maxener = L_shr(*maxener, 1); }
void get_pq_polynomials( Word32 *f, /* Q23 */ Word16 *lsp) /* Q15 */ { Word16 i, n, hi, lo; Word16 index, offset, coslsp, c; Word32 a0; f[0] = L_mult(2048, 2048); // 1.0 Q23 for(i = 1; i <= LPCO ; i++) f[i]= 0; for(n=1; n<=(LPCO>>1); n++) { /* cosine mapping */ index = shr(lsp[2*n-2],9); // Q6 offset = lsp[2*n-2]&(Word16)0x01ff; // Q9 a0 = L_mult(sub(costable[index+1], costable[index]), offset); // Q10 coslsp = add(costable[index], intround(L_shl(a0, 6))); // Q15 cos((double)PI*lsp[2*n-2]) c = coslsp; // Q14 c = 2. * cos((double)PI*lsp[2*n-2]) for(i = 2*n; i >= 2; i--) { L_Extract(f[i-1], &hi, &lo); f[i] = L_add(f[i], f[i-2]); // Q23 f[i] += f[i-2] a0 = Mpy_32_16(hi, lo, c); // Q22 f[i] = L_sub(f[i], L_shl(a0,1)); // Q23 f[i] += f[i-2] - c*f[i-1]; } f[1] = L_msu(f[1], c, 256); // Q23 f[1] -= c; } return; }
static Word16 Chebps_10(Word16 x, Word16 f[], Word16 n) { Word16 i, cheb; Word16 b0_h, b0_l, b1_h, b1_l, b2_h, b2_l; Word32 t0; /* Note: All computation are done in Q23. */ b2_h = 128; /* b2 = 1.0 in Q23 DPF */ b2_l = 0; t0 = L_mult(x, 256); /* 2*x in Q23 */ t0 = L_mac(t0, f[1], 4096); /* + f[1] in Q23 */ L_Extract(t0, &b1_h, &b1_l); /* b1 = 2*x + f[1] */ for (i = 2; i<n; i++) { t0 = Mpy_32_16(b1_h, b1_l, x); /* t0 = 2.0*x*b1 */ t0 = L_shl(t0, 1); t0 = L_mac(t0,b2_h,(Word16)-32768L); /* t0 = 2.0*x*b1 - b2 */ t0 = L_msu(t0, b2_l, 1); t0 = L_mac(t0, f[i], 4096); /* t0 = 2.0*x*b1 - b2 + f[i]; */ L_Extract(t0, &b0_h, &b0_l); /* b0 = 2.0*x*b1 - b2 + f[i]; */ b2_l = b1_l; /* b2 = b1; */ b2_h = b1_h; b1_l = b0_l; /* b1 = b0; */ b1_h = b0_h; } t0 = Mpy_32_16(b1_h, b1_l, x); /* t0 = x*b1; */ t0 = L_mac(t0, b2_h,(Word16)-32768L); /* t0 = x*b1 - b2 */ t0 = L_msu(t0, b2_l, 1); t0 = L_mac(t0, f[i], 2048); /* t0 = x*b1 - b2 + f[i]/2 */ t0 = L_shl(t0, 7); /* Q23 to Q30 with saturation */ cheb = extract_h(t0); /* Result in Q14 */ return(cheb); }
/* ************************************************************************** * * Function : Chebps * Purpose : Evaluates the Chebyshev polynomial series * Description : - The polynomial order is n = m/2 = 5 * - The polynomial F(z) (F1(z) or F2(z)) is given by * F(w) = 2 exp(-j5w) C(x) * where * C(x) = T_n(x) + f(1)T_n-1(x) + ... +f(n-1)T_1(x) + f(n)/2 * and T_m(x) = cos(mw) is the mth order Chebyshev * polynomial ( x=cos(w) ) * Returns : C(x) for the input x. * ************************************************************************** */ static Word16 Chebps (Word16 x, Word16 f[], /* (n) */ Word16 n) { Word16 i, cheb; Word16 b0_h, b0_l, b1_h, b1_l, b2_h, b2_l; Word32 t0; b2_h = 256; move16 (); /* b2 = 1.0 */ b2_l = 0; move16 (); t0 = L_mult (x, 512); /* 2*x */ t0 = L_mac (t0, f[1], 8192); /* + f[1] */ L_Extract (t0, &b1_h, &b1_l); /* b1 = 2*x + f[1] */ for (i = 2; i < n; i++) { t0 = Mpy_32_16 (b1_h, b1_l, x); /* t0 = 2.0*x*b1 */ t0 = L_shl (t0, 1); t0 = L_mac (t0, b2_h, (Word16) 0x8000); /* t0 = 2.0*x*b1 - b2 */ t0 = L_msu (t0, b2_l, 1); t0 = L_mac (t0, f[i], 8192); /* t0 = 2.0*x*b1 - b2 + f[i] */ L_Extract (t0, &b0_h, &b0_l); /* b0 = 2.0*x*b1 - b2 + f[i]*/ b2_l = b1_l; move16 (); /* b2 = b1; */ b2_h = b1_h; move16 (); b1_l = b0_l; move16 (); /* b1 = b0; */ b1_h = b0_h; move16 (); } t0 = Mpy_32_16 (b1_h, b1_l, x); /* t0 = x*b1; */ t0 = L_mac (t0, b2_h, (Word16) 0x8000); /* t0 = x*b1 - b2 */ t0 = L_msu (t0, b2_l, 1); t0 = L_mac (t0, f[i], 4096); /* t0 = x*b1 - b2 + f[i]/2 */ t0 = L_shl (t0, 6); cheb = extract_h (t0); return (cheb); }
Word32 sqrt_l_exp( /* o : output value, Q31 */ Word32 L_x, /* i : input value, Q31 */ Word16 *pExp, /* o : right shift to be applied to result, Q1 */ Flag *pOverflow /* i : pointer to overflow flag */ ) { Word16 e; Word16 i; Word16 a; Word16 tmp; Word32 L_y; /* y = sqrt(x) x = f * 2^-e, 0.5 <= f < 1 (normalization) y = sqrt(f) * 2^(-e/2) a) e = 2k --> y = sqrt(f) * 2^-k (k = e div 2, 0.707 <= sqrt(f) < 1) b) e = 2k+1 --> y = sqrt(f/2) * 2^-k (k = e div 2, 0.5 <= sqrt(f/2) < 0.707) */ if (L_x <= (Word32) 0) { *pExp = 0; return (Word32) 0; } e = norm_l(L_x) & 0xFFFE; /* get next lower EVEN norm. exp */ L_x = L_shl(L_x, e, pOverflow); /* L_x is normalized to [0.25..1) */ *pExp = e; /* return 2*exponent (or Q1) */ L_x = L_shr(L_x, 9, pOverflow); i = (Word16)(L_x >> 16); /* Extract b25-b31, 16 <= i <= 63 because of normalization */ L_x = L_shr(L_x, 1, pOverflow); a = (Word16)(L_x); /* Extract b10-b24 */ a &= (Word16) 0x7fff; i = sub(i, 16, pOverflow); /* 0 <= i <= 47 */ L_y = L_deposit_h(sqrt_l_tbl[i]); /* sqrt_l_tbl[i] << 16 */ /* sqrt_l_tbl[i] - sqrt_l_tbl[i+1]) */ tmp = sub(sqrt_l_tbl[i], sqrt_l_tbl[i + 1], pOverflow); L_y = L_msu(L_y, tmp, a, pOverflow); /* L_y -= tmp*a*2 */ /* L_y = L_shr (L_y, *exp); */ /* denormalization done by caller */ return (L_y); }
void Syn_filte( Word16 m, /* (i) : LPC order */ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients (m=10) */ Word16 x[], /* (i) : input signal */ Word16 y[], /* (o) : output signal */ Word16 lg, /* (i) : size of filtering */ Word16 mem[], /* (i/o) : memory associated with this filtering. */ Word16 update /* (i) : 0=no update, 1=update of memory. */ ) { Word16 i, j; Word32 s; Word16 tmp[80]; /* This is usually done by memory allocation (lg+M) */ Word16 *yy; /* Copy mem[] to yy[] */ yy = tmp; for(i=0; i<m; i++) { *yy++ = mem[i]; } /* Do the filtering. */ for (i = 0; i < lg; i++) { s = L_mult(x[i], a[0]); for (j = 1; j <= m; j++) s = L_msu(s, a[j], yy[-j]); s = L_shl(s, 3); *yy++ = round(s); } for(i=0; i<lg; i++) { y[i] = tmp[i+m]; } /* Update of memory if update==1 */ if(update != 0) for (i = 0; i < m; i++) { mem[i] = y[lg-m+i]; } return; }
/*-------------------------------------------------------------------* * Function search_ixiy() * * ~~~~~~~~~~~~~~~~~~~~~~~ * * Find the best positions of 2 pulses in a subframe. * *-------------------------------------------------------------------*/ static void search_ixiy( Word16 track_x, /* (i) track of pulse 1 */ Word16 track_y, /* (i) track of pulse 2 */ Word16 *ps, /* (i/o) correlation of all fixed pulses */ Word16 *alp, /* (i/o) energy of all fixed pulses */ Word16 *ix, /* (o) position of pulse 1 */ Word16 *iy, /* (o) position of pulse 2 */ Word16 dn[], /* (i) corr. between target and h[] */ Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */ Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */ Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */ ) { Word16 x, y, pos; Word16 ps1, ps2, sq, sqk; Word16 alp1, alp2, alpk; Word16 *p0, *p1, *p2; Word32 s; p0 = cor_x; p1 = cor_y; p2 = rrixiy[track_x]; sqk = -1; alpk = 1; for (x=track_x; x<L_SUBFR; x+=STEP) { ps1 = add(*ps, dn[x]); alp1 = add(*alp, *p0++); pos = -1; for (y=track_y; y<L_SUBFR; y+=STEP) { ps2 = add(ps1, dn[y]); alp2 = add(alp1, add(*p1++, *p2++)); sq = mult(ps2, ps2); s = L_msu(L_mult(alpk,sq),sqk,alp2); if (s > 0) { sqk = sq; alpk = alp2; pos = y; } } p1 -= NB_POS; if (pos >= 0) { *ix = x; *iy = pos; } } *ps = add(*ps, add(dn[*ix], dn[*iy])); *alp = alpk; return; }
/* ** ** Function: Lsp_Int() ** ** Description: Computes the quantized LPC coefficients for a ** frame. First the quantized LSP frequencies ** for all subframes are computed by linear ** interpolation. These frequencies are then ** transformed to quantized LPC coefficients. ** ** Links to text: Sections 2.7, 3.3 ** ** Arguments: ** ** Word16 *QntLpc Empty buffer ** Word16 CurrLsp[] Quantized LSP frequencies for the current frame, ** subframe 3 (10 words) ** Word16 PrevLsp[] Quantized LSP frequencies for the previous frame, ** subframe 3 (10 words) ** ** Outputs: ** ** Word16 QntLpc[] Quantized LPC coefficients for current frame, all ** subframes (40 words) ** ** Return value: None ** */ void Lsp_Int( Word16 *QntLpc, Word16 *CurrLsp, Word16 *PrevLsp ) { int i,j ; Word16 Tmp ; Word16 *Dpnt ; Word32 Acc0 ; /* * Initialize the interpolation factor */ Tmp = (Word16) (MIN_16 / SubFrames ) ; Dpnt = QntLpc ; /* * Do for all subframes */ for ( i = 0 ; i < SubFrames ; i ++ ) { /* * Compute the quantized LSP frequencies by linear interpolation * of the frequencies from subframe 3 of the current and * previous frames */ for ( j = 0 ; j < LpcOrder ; j ++ ) { Acc0 = L_deposit_h( PrevLsp[j] ) ; Acc0 = L_mac( Acc0, Tmp, PrevLsp[j] ) ; Acc0 = L_msu( Acc0, Tmp, CurrLsp[j] ) ; Dpnt[j] = round( Acc0 ) ; } /* * Convert the quantized LSP frequencies to quantized LPC * coefficients */ LsptoA( Dpnt ) ; Dpnt += LpcOrder ; /* Update the interpolation factor */ Tmp = add( Tmp, (Word16) (MIN_16 / SubFrames ) ) ; } }
void Preemph_( Word16 x[], /* (i/o) : input signal overwritten by the output */ Word16 mu, /* (i) Q15 : preemphasis coefficient */ Word16 lg /* (i) : lenght of filtering */ ) { Word16 i; Word32 L_tmp; for (i = lg - 1; i > 0; i--) { L_tmp = (Word32)x[i] << 16; L_tmp = L_msu(L_tmp, x[i - 1], mu); x[i] = round16(L_tmp); } return; }
int32_t WebRtcG729fix_Pow2( /* (o) Q0 : result (range: 0<=val<=0x7fffffff) */ int16_t exponent, /* (i) Q0 : Integer part. (range: 0<=val<=30) */ int16_t fraction /* (i) Q15 : Fractional part. (range: 0.0<=val<1.0) */ ) { int16_t exp, i, a, tmp; int32_t L_x; L_x = L_mult(fraction, 32); /* L_x = fraction<<6 */ i = extract_h(L_x); /* Extract b10-b15 of fraction */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b0-b9 of fraction */ a = a & (int16_t)0x7fff; L_x = L_deposit_h(WebRtcG729fix_tabpow[i]); /* tabpow[i] << 16 */ tmp = WebRtcSpl_SubSatW16(WebRtcG729fix_tabpow[i], WebRtcG729fix_tabpow[i+1]); /* tabpow[i] - tabpow[i+1] */ L_x = L_msu(L_x, tmp, a); /* L_x -= tmp*a*2 */ exp = WebRtcSpl_SubSatW16(30, exponent); L_x = L_shr_r(L_x, exp); return(L_x); }
Word32 Pow2( /* (o) Q0 : result (range: 0<=val<=0x7fffffff) */ Word16 exponent, /* (i) Q0 : Integer part. (range: 0<=val<=30) */ Word16 fraction /* (i) Q15 : Fractional part. (range: 0.0<=val<1.0) */ ) { Word16 exp, i, a, tmp; Word32 L_x; L_x = L_mult(fraction, 32); /* L_x = fraction<<6 */ i = extract_h(L_x); /* Extract b10-b15 of fraction */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b0-b9 of fraction */ a = a & (Word16)0x7fff; L_x = L_deposit_h(tabpow[i]); /* tabpow[i] << 16 */ tmp = sub(tabpow[i], tabpow[i+1]); /* tabpow[i] - tabpow[i+1] */ L_x = L_msu(L_x, tmp, a); /* L_x -= tmp*a*2 */ exp = sub(30, exponent); L_x = L_shr_r(L_x, exp); return(L_x); }
void Log2( Word32 L_x, /* (i) Q0 : input value */ Word16 *exponent, /* (o) Q0 : Integer part of Log2. (range: 0<=val<=30) */ Word16 *fraction /* (o) Q15: Fractional part of Log2. (range: 0<=val<1) */ ) { Word16 exp, i, a, tmp; Word32 L_y; if( L_x <= (Word32)0 ) { *exponent = 0; *fraction = 0; return; } exp = norm_l(L_x); L_x = L_shl(L_x, exp ); /* L_x is normalized */ *exponent = sub(30, exp); L_x = L_shr(L_x, 9); i = extract_h(L_x); /* Extract b25-b31 */ L_x = L_shr(L_x, 1); a = extract_l(L_x); /* Extract b10-b24 of fraction */ a = a & (Word16)0x7fff; i = sub(i, 32); L_y = L_deposit_h(tablog[i]); /* tablog[i] << 16 */ tmp = sub(tablog[i], tablog[i+1]); /* tablog[i] - tablog[i+1] */ L_y = L_msu(L_y, tmp, a); /* L_y -= tmp*a*2 */ *fraction = extract_h( L_y); return; }
/* ************************************************************************** * * Function : Az_lsp * Purpose : Compute the LSPs from the LP coefficients * ************************************************************************** */ void Az_lsp ( Word16 a[], /* (i) : predictor coefficients (MP1) */ Word16 lsp[], /* (o) : line spectral pairs (M) */ Word16 old_lsp[] /* (i) : old lsp[] (in case not found 10 roots) (M) */ ) { Word16 i, j, nf, ip; Word16 xlow, ylow, xhigh, yhigh, xmid, ymid, xint; Word16 x, y, sign, exp; Word16 *coef; Word16 f1[M / 2 + 1], f2[M / 2 + 1]; Word32 t0; /*-------------------------------------------------------------* * find the sum and diff. pol. F1(z) and F2(z) * * F1(z) <--- F1(z)/(1+z**-1) & F2(z) <--- F2(z)/(1-z**-1) * * * * f1[0] = 1.0; * * f2[0] = 1.0; * * * * for (i = 0; i< NC; i++) * * { * * f1[i+1] = a[i+1] + a[M-i] - f1[i] ; * * f2[i+1] = a[i+1] - a[M-i] + f2[i] ; * * } * *-------------------------------------------------------------*/ f1[0] = 1024; move16 (); /* f1[0] = 1.0 */ f2[0] = 1024; move16 (); /* f2[0] = 1.0 */ for (i = 0; i < NC; i++) { t0 = L_mult (a[i + 1], 8192); /* x = (a[i+1] + a[M-i]) >> 2 */ t0 = L_mac (t0, a[M - i], 8192); x = extract_h (t0); /* f1[i+1] = a[i+1] + a[M-i] - f1[i] */ f1[i + 1] = sub (x, f1[i]);move16 (); t0 = L_mult (a[i + 1], 8192); /* x = (a[i+1] - a[M-i]) >> 2 */ t0 = L_msu (t0, a[M - i], 8192); x = extract_h (t0); /* f2[i+1] = a[i+1] - a[M-i] + f2[i] */ f2[i + 1] = add (x, f2[i]);move16 (); } /*-------------------------------------------------------------* * find the LSPs using the Chebychev pol. evaluation * *-------------------------------------------------------------*/ nf = 0; move16 (); /* number of found frequencies */ ip = 0; move16 (); /* indicator for f1 or f2 */ coef = f1; move16 (); xlow = grid[0]; move16 (); ylow = Chebps (xlow, coef, NC);move16 (); j = 0; test (); test (); /* while ( (nf < M) && (j < grid_points) ) */ while ((sub (nf, M) < 0) && (sub (j, grid_points) < 0)) { j++; xhigh = xlow; move16 (); yhigh = ylow; move16 (); xlow = grid[j]; move16 (); ylow = Chebps (xlow, coef, NC); move16 (); test (); if (L_mult (ylow, yhigh) <= (Word32) 0L) { /* divide 4 times the interval */ for (i = 0; i < 4; i++) { /* xmid = (xlow + xhigh)/2 */ xmid = add (shr (xlow, 1), shr (xhigh, 1)); ymid = Chebps (xmid, coef, NC); move16 (); test (); if (L_mult (ylow, ymid) <= (Word32) 0L) { yhigh = ymid; move16 (); xhigh = xmid; move16 (); } else { ylow = ymid; move16 (); xlow = xmid; move16 (); } } /*-------------------------------------------------------------* * Linear interpolation * * xint = xlow - ylow*(xhigh-xlow)/(yhigh-ylow); * *-------------------------------------------------------------*/ x = sub (xhigh, xlow); y = sub (yhigh, ylow); test (); if (y == 0) { xint = xlow; move16 (); } else { sign = y; move16 (); y = abs_s (y); exp = norm_s (y); y = shl (y, exp); y = div_s ((Word16) 16383, y); t0 = L_mult (x, y); t0 = L_shr (t0, sub (20, exp)); y = extract_l (t0); /* y= (xhigh-xlow)/(yhigh-ylow) */ test (); if (sign < 0) y = negate (y); t0 = L_mult (ylow, y); t0 = L_shr (t0, 11); xint = sub (xlow, extract_l (t0)); /* xint = xlow - ylow*y */ } lsp[nf] = xint; move16 (); xlow = xint; move16 (); nf++; test (); if (ip == 0) { ip = 1; move16 (); coef = f2; move16 (); } else { ip = 0; move16 (); coef = f1; move16 (); } ylow = Chebps (xlow, coef, NC); move16 (); } test (); test (); } /* Check if M roots found */ test (); if (sub (nf, M) < 0) { for (i = 0; i < M; i++) { lsp[i] = old_lsp[i]; move16 (); } } return; }
void dct_type_iv_a (Int16 *input, Int16 *output, Int16 dct_length) { Int16 *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base; Int16 *out_ptr_low, *out_ptr_high, *next_out_base; Int16 *out_buffer, *in_buffer, *buffer_swap; Int16 *outptr[2] = {buffer_a, buffer_b}; Int16 in_val_low, in_val_high; Int16 in_low_even, in_low_odd; Int16 in_high_even, in_high_odd; Int16 out_low_even, out_low_odd; Int16 out_high_even, out_high_odd; Int16 *pair_ptr; Int16 cosine, sine; Int32 sum, acca; Int16 set_span, set_count, set_count_log, pairs_left, sets_left; Int16 k, temp; cos_msin_t **table_ptr_ptr, *cos_msin_ptr; /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /* Do the sum/difference butterflies, the first part of */ /* converting one N-point transform into N/2 two-point */ /* transforms, where N = 1 << DCT_LENGTH_LOG. = 64/128 */ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ if (dct_length == 320) /* Add bias offsets */ DSP_add(input, anal_bias, dct_length); in_buffer = input; set_span = dct_length; set_count = 1; for (set_count_log = 0; set_count_log < DCT_LENGTH_LOG - 1; set_count_log++) { out_buffer = outptr[0]; outptr[0] = outptr[1]; outptr[1] = out_buffer; /*===========================================================*/ /* Initialization for the loop over sets at the current size */ /*===========================================================*/ in_ptr = in_buffer; next_out_base = out_buffer; /*=====================================*/ /* Loop over all the sets of this size */ /*=====================================*/ for (sets_left = set_count; sets_left > 0; sets_left--) { /*||||||||||||||||||||||||||||||||||||||||||||*/ /* Set up output pointers for the current set */ /*||||||||||||||||||||||||||||||||||||||||||||*/ out_ptr_low = next_out_base; next_out_base += set_span; out_ptr_high = next_out_base; /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ /* Loop over all the butterflies in the current set */ /*||||||||||||||||||||||||||||||||||||||||||||||||||*/ do { in_val_low = *in_ptr++; in_val_high = *in_ptr++; acca = (Int32)in_val_low + (Int32)in_val_high; *out_ptr_low++ = (Int16)(acca >> 1); acca = (Int32)in_val_low - (Int32)in_val_high; *--out_ptr_high = (Int16)(acca >> 1); } while (out_ptr_low < out_ptr_high); } /* End of loop over sets of the current size */ /*============================================================*/ /* Decide which buffers to use as input and output next time. */ /* Except for the first time (when the input buffer is the */ /* subroutine input) we just alternate the local buffers. */ /*============================================================*/ in_buffer = out_buffer; set_span >>= 1; set_count <<= 1; } /* End of loop over set sizes */ /*++++++++++++++++++++++++++++++++*/ /* Do N/2 two-point transforms, */ /* where N = 1 << DCT_LENGTH_LOG */ /*++++++++++++++++++++++++++++++++*/ pair_ptr = in_buffer; buffer_swap = buffer_c; for (pairs_left = 1<<(DCT_LENGTH_LOG-1); pairs_left > 0; pairs_left--) { for (k = 0; k < CORE_SIZE; k++) *buffer_swap++ = DSP_mac(pair_ptr, dct_core_a[k], CORE_SIZE); /* for (k = 0; k < CORE_SIZE; k++) { sum=0L; for (i = 0; i < CORE_SIZE; i++) sum = L_mac(sum, pair_ptr[i], dct_core_a[k][i]); *buffer_swap++ = round16(sum); } */ /* address arithmetic */ pair_ptr += CORE_SIZE; } DSP_memcpy(in_buffer, buffer_c, dct_length>>1); // for (i = 0; i < dct_length; i++) // in_buffer[i] = buffer_c[i]; table_ptr_ptr = a_cos_msin_table; /*++++++++++++++++++++++++++++++*/ /* Perform rotation butterflies */ /*++++++++++++++++++++++++++++++*/ out_buffer = outptr[0]; set_span = 10; for (set_count_log = DCT_LENGTH_LOG -2; set_count_log >= 0; set_count_log--) { /*===========================================================*/ /* Initialization for the loop over sets at the current size */ /*===========================================================*/ /* set_span = 1 << (DCT_LENGTH_LOG - set_count_log); */ set_span <<= 1; set_count = 1 << set_count_log; next_in_base = in_buffer; next_out_base = (set_count_log) ? out_buffer : output; /*=====================================*/ /* Loop over all the sets of this size */ /*=====================================*/ for (sets_left = set_count; sets_left > 0; sets_left--) { /*|||||||||||||||||||||||||||||||||||||||||*/ /* Set up the pointers for the current set */ /*|||||||||||||||||||||||||||||||||||||||||*/ in_ptr_low = next_in_base; temp = set_span >> 1; /* address arithmetic */ in_ptr_high = in_ptr_low + temp; next_in_base += set_span; out_ptr_low = next_out_base; next_out_base += set_span; out_ptr_high = next_out_base; cos_msin_ptr = *table_ptr_ptr; /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ /* Loop over all the butterfly pairs in the current set */ /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/ do { /* address arithmetic */ in_low_even = *in_ptr_low++; in_low_odd = *in_ptr_low++; in_high_even = *in_ptr_high++; in_high_odd = *in_ptr_high++; cosine = (*cos_msin_ptr).cosine; sine = (*cos_msin_ptr++).minus_sine; sum = L_mult(cosine, in_low_even); sum = L_msu(sum, sine, in_high_even); out_low_even = round16(sum); sum = L_mult(sine, in_low_even); sum = L_mac(sum, cosine, in_high_even); out_high_even = round16(sum); cosine = (*cos_msin_ptr).cosine; sine = (*cos_msin_ptr++).minus_sine; sum = L_mult(cosine, in_low_odd); sum = L_mac(sum, sine, in_high_odd); out_low_odd = round16(sum); sum = L_mult(sine, in_low_odd); sum = L_msu(sum, cosine, in_high_odd); out_high_odd = round16(sum); *out_ptr_low++ = out_low_even; *--out_ptr_high = out_high_even; *out_ptr_low++ = out_low_odd; *--out_ptr_high = out_high_odd; } while (out_ptr_low < out_ptr_high); } /* End of loop over sets of the current size */ /*=============================================*/ /* Swap input and output buffers for next time */ /*=============================================*/ buffer_swap = in_buffer; in_buffer = out_buffer; out_buffer = buffer_swap; table_ptr_ptr++; } }
/* ** ** Function: Calc_Exc_Rand() ** ** Description: Computation of random excitation for inactive frames: ** Adaptive codebook entry selected randomly ** Higher rate innovation pattern selected randomly ** Computes innovation gain to match curGain ** ** Links to text: ** ** Arguments: ** ** Word16 curGain current average gain to match ** Word16 *PrevExc previous/current excitation (updated) ** Word16 *DataEXc current frame excitation ** Word16 *nRandom random generator status (input/output) ** ** Outputs: ** ** Word16 *PrevExc ** Word16 *DataExc ** Word16 *nRandom ** ** Return value: None ** */ void Calc_Exc_Rand(Word16 curGain, Word16 *PrevExc, Word16 *DataExc, Word16 *nRandom, LINEDEF *Line) { int i, i_subfr, iblk; Word16 temp, temp2; Word16 j; Word16 TabPos[2*NbPulsBlk], TabSign[2*NbPulsBlk]; Word16 *ptr_TabPos, *ptr_TabSign; Word16 *ptr1, *curExc; Word16 sh1, x1, x2, inter_exc, delta, b0; Word32 L_acc, L_c, L_temp; Word16 tmp[SubFrLen/Sgrid]; Word16 offset[SubFrames]; Word16 tempExc[SubFrLenD]; /* * generate LTP codes */ Line->Olp[0] = random_number(21, nRandom) + (Word16)123; Line->Olp[1] = random_number(21, nRandom) + (Word16)123; for(i_subfr=0; i_subfr<SubFrames; i_subfr++) { /* in [1, NbFilt] */ Line->Sfs[i_subfr].AcGn = random_number(NbFilt, nRandom) + (Word16)1; } Line->Sfs[0].AcLg = 1; Line->Sfs[1].AcLg = 0; Line->Sfs[2].AcLg = 1; Line->Sfs[3].AcLg = 3; /* * Random innovation : * Selection of the grids, signs and pulse positions */ /* Signs and Grids */ ptr_TabSign = TabSign; ptr1 = offset; for(iblk=0; iblk<SubFrames/2; iblk++) { temp = random_number((1 << (NbPulsBlk+2)), nRandom); *ptr1++ = temp & (Word16)0x0001; temp = shr(temp, 1); *ptr1++ = add( (Word16) SubFrLen, (Word16) (temp & 0x0001) ); for(i=0; i<NbPulsBlk; i++) { *ptr_TabSign++= shl(sub((temp & (Word16)0x0002), 1), 14); temp = shr(temp, 1); } } /* Positions */ ptr_TabPos = TabPos; for(i_subfr=0; i_subfr<SubFrames; i_subfr++) { for(i=0; i<(SubFrLen/Sgrid); i++) tmp[i] = (Word16)i; temp = (SubFrLen/Sgrid); for(i=0; i<Nb_puls[i_subfr]; i++) { j = random_number(temp, nRandom); *ptr_TabPos++ = add(shl(tmp[(int)j],1), offset[i_subfr]); temp = sub(temp, 1); tmp[(int)j] = tmp[(int)temp]; } } /* * Compute fixed codebook gains */ ptr_TabPos = TabPos; ptr_TabSign = TabSign; curExc = DataExc; i_subfr = 0; for(iblk=0; iblk<SubFrames/2; iblk++) { /* decode LTP only */ Decod_Acbk(curExc, &PrevExc[0], Line->Olp[iblk], Line->Sfs[i_subfr].AcLg, Line->Sfs[i_subfr].AcGn); Decod_Acbk(&curExc[SubFrLen], &PrevExc[SubFrLen], Line->Olp[iblk], Line->Sfs[i_subfr+1].AcLg, Line->Sfs[i_subfr+1].AcGn); temp2 = 0; for(i=0; i<SubFrLenD; i++) { temp = abs_s(curExc[i]); if(temp > temp2) temp2 = temp; } if(temp2 == 0) sh1 = 0; else { sh1 = sub(4,norm_s(temp2)); /* 4 bits of margin */ if(sh1 < -2) sh1 = -2; } L_temp = 0L; for(i=0; i<SubFrLenD; i++) { temp = shr(curExc[i], sh1); /* left if sh1 < 0 */ L_temp = L_mac(L_temp, temp, temp); tempExc[i] = temp; } /* ener_ltp x 2**(-2sh1+1) */ L_acc = 0L; for(i=0; i<NbPulsBlk; i++) { L_acc = L_mac(L_acc, tempExc[(int)ptr_TabPos[i]], ptr_TabSign[i]); } inter_exc = extract_h(L_shl(L_acc, 1)); /* inter_exc x 2-sh1 */ /* compute SubFrLenD x curGain**2 x 2**(-2sh1+1) */ /* curGain input = 2**5 curGain */ // L_acc = L_mult(curGain, SubFrLen); L_MULT(curGain, SubFrLen, L_acc); L_acc = L_shr(L_acc, 6); temp = extract_l(L_acc); /* SubFrLen x curGain : avoids overflows */ // L_acc = L_mult(temp, curGain); L_MULT(temp, curGain, L_acc); temp = shl(sh1, 1); temp = add(temp, 4); L_acc = L_shr(L_acc, temp); /* SubFrLenD x curGain**2 x 2**(-2sh1+1) */ /* c = (ener_ltp - SubFrLenD x curGain**2)/nb_pulses_blk */ /* compute L_c = c >> 2sh1-1 */ L_acc = L_sub(L_temp, L_acc); /* x 1/nb_pulses_blk */ L_c = L_mls(L_acc, InvNbPulsBlk); /* * Solve EQ(X) = X**2 + 2 b0 X + c */ /* delta = b0 x b0 - c */ b0 = mult_r(inter_exc, InvNbPulsBlk); /* b0 >> sh1 */ L_acc = L_msu(L_c, b0, b0); /* (c - b0**2) >> 2sh1-1 */ L_acc = L_negate(L_acc); /* delta x 2**(-2sh1+1) */ /* Case delta <= 0 */ if(L_acc <= 0) { /* delta <= 0 */ x1 = negate(b0); /* sh1 */ } /* Case delta > 0 */ else { delta = Sqrt_lbc(L_acc); /* >> sh1 */ x1 = sub(delta, b0); /* x1 >> sh1 */ x2 = add(b0, delta); /* (-x2) >> sh1 */ if(abs_s(x2) < abs_s(x1)) { x1 = negate(x2); } } /* Update DataExc */ sh1 = add(sh1, 1); temp = shl(x1, sh1); if(temp > (2*Gexc_Max)) temp = (2*Gexc_Max); if(temp < -(2*Gexc_Max)) temp = -(2*Gexc_Max); for(i=0; i<NbPulsBlk; i++) { j = *ptr_TabPos++; curExc[(int)j] = add(curExc[(int)j], mult(temp, (*ptr_TabSign++)) ); } /* update PrevExc */ ptr1 = PrevExc; for(i=SubFrLenD; i<PitchMax; i++) *ptr1++ = PrevExc[i]; for(i=0; i<SubFrLenD; i++) *ptr1++ = curExc[i]; curExc += SubFrLenD; i_subfr += 2; } /* end of loop on LTP blocks */ return; }
void fndppf(short *delay, short *beta, short *buf, short dmin, short dmax, short length) { static short b = -10224; /* rom storage */ static short a[3] = {-18739, 16024, -4882}; /* a[] scaled down by 4 */ short dnew = 0; short sum; long Lsum; register short m, i, n; static short DECbuf[FrameSize / 4]; long Lcorrmax, Lcmax, Ltmp; short tap1; short M1, M2, dnewtmp = 0; static short lastgoodpitch = 0; static short lastbeta = 0; static short memory[3]; static int FirstTime = 1; short Lsum_scale; short shift, Lcorr_scale, Lcmax_scale; short n1, n2, nq, nq1; long Ltempf; /* init static variables (should be in init routine for implementation) */ if (FirstTime) { FirstTime = 0; n1 = (shr(FrameSize, 2)); for (i = 0; i < n1; i++) DECbuf[i] = 0; memory[0] = memory[1] = memory[2] = 0; } /* Shift memory of DECbuf */ for (i = 0; i < shr(length, 3); i++) { DECbuf[i] = DECbuf[i + shr(length, 3)]; } /* filter signal and decimate */ for (i = 0, n = shr(length, 3); i < shr(length, 1); i++) { Ltempf = L_shr(L_deposit_h(buf[i + shr(length, 1)]), 4); Ltempf = L_msu(Ltempf, memory[0], a[0]); Ltempf = L_msu(Ltempf, memory[1], a[1]); Ltempf = L_msu(Ltempf, memory[2], a[2]); Ltempf = L_shl(Ltempf, 2); shift = 0; if ((i + 1) % 4 == 0) { Lsum = L_add(Ltempf, L_deposit_h(memory[2])); Lsum = L_mac(Lsum, memory[0], b); Lsum = L_mac(Lsum, memory[1], b); DECbuf[n++] = round(L_shl(Lsum, 1)); } memory[2] = memory[1]; memory[1] = memory[0]; memory[0] = round(Ltempf); } /* perform first search for best delay value in decimated domain */ Lcorrmax = (LW_MIN); Lcorr_scale = 1; for (m = shr(dmin, 2); m <= shr(dmax, 2); m++) { n1 = 1; for (i = 0, Lsum = 0; i < sub(shr(length, 2), m); i++) { Ltempf = L_mult(DECbuf[i], DECbuf[i + m]); Ltempf = L_shr(Ltempf, n1); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); n1++; } } if ( ((Lcorr_scale >= n1) && (L_shr(Lsum, sub(Lcorr_scale, n1)) > Lcorrmax)) || ((Lcorr_scale < n1) && (Lsum > L_shr(Lcorrmax, sub(n1, Lcorr_scale)))) ) { Lcorrmax = Lsum; Lcorr_scale = n1; dnew = m; } } /* Compare against lastgoodpitch */ if (lastgoodpitch != 0 && (abs_s(sub(lastgoodpitch, shl(dnew, 2))) > 2)) { M1 = sub(shr(lastgoodpitch, 2), 2); if (M1 < shr(dmin, 2)) M1 = shr(dmin, 2); M2 = add(M1, 4); if (M2 > shr(dmax, 2)) M2 = shr(dmax, 2); Lcmax = LW_MIN; Lcmax_scale = 1; for (m = M1; m <= M2; m++) { n1 = 1; for (i = 0, Lsum = 0; i < sub(shr(length, 2), m); i++) { Ltempf = L_mult(DECbuf[i], DECbuf[i + m]); Ltempf = L_shr(Ltempf, n1); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); n1++; } } if ( ((Lcmax_scale >= n1) && (L_shr(Lsum, sub(Lcmax_scale, n1)) > Lcmax)) || ((Lcmax_scale < n1) && (Lsum > L_shr(Lcmax, sub(n1, Lcmax_scale)))) ) { /* Gives some bias to low delays */ Lcmax = Lsum; Lcmax_scale = n1; dnewtmp = m; } } Lsum = L_mpy_ls(Lcorrmax, 27361); if ( ((Lcmax_scale >= Lcorr_scale) && (L_shr(Lsum, sub(Lcmax_scale, Lcorr_scale)) < Lcmax)) || ((Lcmax_scale < Lcorr_scale) && (Lsum < L_shr(Lcmax, sub(Lcorr_scale, Lcmax_scale)))) ) { dnew = dnewtmp; } } /* perform first search for best delay value in non-decimated buffer */ M1 = Max(sub(shl(dnew, 2), 3), dmin); if (M1 < dmin) M1 = dmin; M2 = Min(add(shl(dnew, 2), 3), dmax); if (M2 > dmax) M2 = dmax; Lcorrmax = LW_MIN; Lcorr_scale = 1; for (m = M1; m <= M2; m++) { n1 = 1; for (i = 0, Lsum = 0; i < sub(length, m); i++) { Ltempf = L_mult(buf[i], buf[i + m]); Ltempf = L_shr(Ltempf, n1); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); n1++; } } if ( ((Lcorr_scale >= n1) && (L_shr(Lsum, sub(Lcorr_scale, n1)) > Lcorrmax)) || ((Lcorr_scale < n1) && (Lsum > L_shr(Lcorrmax, sub(n1, Lcorr_scale)))) ) { Lcorrmax = Lsum; Lcorr_scale = n1; dnew = m; } } Lsum_scale = 1; for (i = 0, Lsum = 0; i < sub(length, dnew); i++) { Ltempf = L_mult(buf[i + dnew], buf[i + dnew]); Ltempf = L_shr(Ltempf, Lsum_scale); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); Lsum_scale++; } } Lcmax_scale = 1; for (i = 0, Lcmax = 0; i < length - dnew; i++) { Ltempf = L_mult(buf[i], buf[i]); Ltempf = L_shr(Ltempf, Lcmax_scale); Lcmax = L_add(Lcmax, Ltempf); if (L_abs(Lcmax) >= 0x40000000) { Lcmax = L_shr(Lcmax, 1); Lcmax_scale++; } } nq = norm_l(Lsum); Lsum = L_shl(Lsum, nq); nq1 = norm_l(Lcmax); Lcmax = L_shl(Lcmax, nq1); Lsum = L_mpy_ll(Lsum, Lcmax); n1 = norm_l(Lsum); Lsum = L_shl(Lsum, n1); sum = sqroot(Lsum); n1 = add(add(n1, nq), nq1); n1 = sub(sub(n1, Lcmax_scale), Lsum_scale); n2 = shr(n1, 1); if (n1 & 1) Lsum = L_mult(sum, 23170); else Lsum = L_deposit_h(sum); n2 = add(n2, Lcorr_scale); Lcorrmax = L_shl(Lcorrmax, n2); if ((Lsum == 0) || (Lcorrmax <= 0)) *beta = 0; else if (Lcorrmax > Lsum) *beta = 0x7fff; else *beta = round(L_divide(Lcorrmax, Lsum)); /* perform search for best delay value in around old pitch delay */ if (lastgoodpitch != 0) { M1 = lastgoodpitch - 6; M2 = lastgoodpitch + 6; if (M1 < dmin) M1 = dmin; if (M2 > dmax) M2 = dmax; if (dnew > M2 || dnew < M1) { Lcmax = LW_MIN; Lcmax_scale = 1; for (m = M1; m <= M2; m++) { n1 = 1; for (i = 0, Lsum = 0; i < length - m; i++) { Ltempf = L_mult(buf[i], buf[i + m]); Ltempf = L_shr(Ltempf, n1); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); n1++; } } if ( ((Lcmax_scale >= n1) && (L_shr(Lsum, sub(Lcmax_scale, n1)) > Lcmax)) || ((Lcmax_scale < n1) && (Lsum > L_shr(Lcmax, sub(n1, Lcmax_scale)))) ) { Lcmax = Lsum; dnewtmp = m; Lcmax_scale = n1; } } Lcorr_scale = 1; for (i = 0, Ltmp = 0; i < length - dnewtmp; i++) { Ltempf = L_mult(buf[i + dnewtmp], buf[i + dnewtmp]); Ltempf = L_shr(Ltempf, Lcorr_scale); Ltmp = L_add(Ltmp, Ltempf); if (L_abs(Ltmp) >= 0x40000000) { Ltmp = L_shr(Ltmp, 1); Lcorr_scale++; } } Lsum_scale = 1; for (i = 0, Lsum = 0; i < length - dnewtmp; i++) { Ltempf = L_mult(buf[i], buf[i]); Ltempf = L_shr(Ltempf, Lsum_scale); Lsum = L_add(Lsum, Ltempf); if (L_abs(Lsum) >= 0x40000000) { Lsum = L_shr(Lsum, 1); Lsum_scale++; } } nq = norm_l(Ltmp); Ltmp = L_shl(Ltmp, nq); nq1 = norm_l(Lsum); Lsum = L_shl(Lsum, nq1); Ltmp = L_mpy_ll(Ltmp, Lsum); n1 = norm_l(Ltmp); Ltmp = L_shl(Ltmp, n1); sum = sqroot(Ltmp); n1 = add(add(n1, nq), nq1); n1 = sub(sub(n1, Lsum_scale), Lcorr_scale); n2 = shr(n1, 1); if (n1 & 1) Ltmp = L_mult(sum, 23170); else Ltmp = L_deposit_h(sum); n2 = add(n2, Lcmax_scale); Lcmax = L_shl(Lcmax, n2); if ((Ltmp == 0) || (Lcmax <= 0)) tap1 = 0; else if (Lcmax >= Ltmp) tap1 = 0x7fff; else tap1 = round(L_divide(Lcmax, Ltmp)); /* Replace dnew with dnewtmp if tap1 is large enough */ if ((dnew > M2 && (shr(tap1, 1) > mult_r(9830, *beta))) || (dnew < M1 && (shr(tap1, 1) > mult_r(19661, *beta)))) { dnew = dnewtmp; *beta = (tap1); } } } *delay = dnew; if (*beta > 13107) { lastgoodpitch = dnew; lastbeta = *beta; } else { lastbeta = mult_r(24576, lastbeta); if (lastbeta < 9830) lastgoodpitch = 0; } }
static Word16 Lag_max( /* o : lag found */ vadState *vadSt, /* i/o : VAD state struct */ Word32 corr[], /* i : correlation vector. */ Word16 scal_sig[], /* i : scaled signal. */ Word16 L_frame, /* i : length of frame to compute pitch */ Word16 lag_max, /* i : maximum lag */ Word16 lag_min, /* i : minimum lag */ Word16 old_lag, /* i : old open-loop lag */ Word16 *cor_max, /* o : normalized correlation of selected lag */ Word16 wght_flg, /* i : is weighting function used */ Word16 *gain_flg, /* o : open-loop flag */ Flag dtx, /* i : dtx flag; use dtx=1, do not use dtx=0 */ Flag *pOverflow /* o : overflow flag */ ) { Word16 i; Word16 j; Word16 *p; Word16 *p1; Word32 max; Word32 t0; Word16 t0_h; Word16 t0_l; Word16 p_max; const Word16 *ww; const Word16 *we; Word32 t1; Word16 temp; ww = &corrweight[250]; we = &corrweight[123 + lag_max - old_lag]; max = MIN_32; p_max = lag_max; for (i = lag_max; i >= lag_min; i--) { t0 = corr[-i]; /* Weighting of the correlation function. */ L_Extract(corr[-i], &t0_h, &t0_l, pOverflow); t0 = Mpy_32_16(t0_h, t0_l, *ww, pOverflow); ww--; if (wght_flg > 0) { /* Weight the neighbourhood of the old lag. */ L_Extract(t0, &t0_h, &t0_l, pOverflow); t0 = Mpy_32_16(t0_h, t0_l, *we, pOverflow); we--; } /* if (L_sub (t0, max) >= 0) */ if (t0 >= max) { max = t0; p_max = i; } } p = &scal_sig[0]; p1 = &scal_sig[-p_max]; t0 = 0; t1 = 0; for (j = 0; j < L_frame; j++, p++, p1++) { t0 = L_mac(t0, *p, *p1, pOverflow); t1 = L_mac(t1, *p1, *p1, pOverflow); } if (dtx) { /* no test() call since this if is only in simulation env */ #ifdef VAD2 /* Save max correlation */ vadSt->L_Rmax = L_add(vadSt->L_Rmax, t0, pOverflow); /* Save max energy */ vadSt->L_R0 = L_add(vadSt->L_R0, t1, pOverflow); #else /* update and detect tone */ vad_tone_detection_update(vadSt, 0, pOverflow); vad_tone_detection(vadSt, t0, t1, pOverflow); #endif } /* gain flag is set according to the open_loop gain */ /* is t2/t1 > 0.4 ? */ temp = pv_round(t1, pOverflow); t1 = L_msu(t0, temp, 13107, pOverflow); *gain_flg = pv_round(t1, pOverflow); *cor_max = 0; return (p_max); }
void L_Extract(Word32 L_32, Word16 *hi, Word16 *lo) { *hi = extract_h(L_32); *lo = extract_l( L_msu( L_shr(L_32, 1) , *hi, 16384)); /* lo = L_32>>1 */ return; }
void rmlt_coefs_to_samples(Int16 *coefs, Int16 *out_samples, Int16 dct_length, Int16 mag_shift, Uint16 chn) { Int16 i; Int16 *new_ptr, *old_ptr; Int16 *win_new, *win_old; Int16 *out_ptr; Int16 half_dct_size; Int32 sum; half_dct_size = dct_length >> 1; /* Perform a Type IV (inverse) DCT on the coefficients */ dct_type_iv_s(coefs, windowed_data, dct_length); if(mag_shift != 0) for(i = dct_length; i--; ) windowed_data[i] = shr(windowed_data[i], mag_shift); /* Get the first half of the windowed samples */ out_ptr = out_samples; if (dct_length != MAX_DCT_LENGTH) win_new = rmlt_to_samples_window; else win_new = max_rmlt_to_samples_window; win_old = win_new + dct_length; old_ptr = &old_samples[chn * dct_length]; new_ptr = windowed_data + half_dct_size; for (i = half_dct_size; i--; ) { sum = L_mult(*win_new++, *--new_ptr); sum = L_mac(sum, *--win_old, *old_ptr++); *out_ptr++ = round16(L_shl(sum, 2)); } /* Get the second half of the windowed samples */ for (i = half_dct_size; i--; ) { sum = L_mult(*win_new++, *new_ptr++); sum = L_msu(sum, *--win_old, *--old_ptr); *out_ptr++ = round16(L_shl(sum, 2)); } /* Save the second half of the new samples for */ /* next time, when they will be the old samples. */ /* pointer arithmetic */ DSP_memcpy(&old_samples[chn*dct_length], &windowed_data[half_dct_size], half_dct_size); // // new_ptr = windowed_data + (DCT_LENGTH>>1); // old_ptr = old_samples; // for (i = 0; i < (DCT_LENGTH>>1); i++) // *old_ptr++ = *new_ptr++; }
/*-------------------------------------------------------------------* * Function ACELP_10i40_35bits() * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Algebraic codebook; 35 bits: 10 pulses in a frame of 40 samples. * *-------------------------------------------------------------------* * The code length is 40, containing 10 nonzero pulses: i0...i9. * * All pulses can have two (2) possible amplitudes: +1 or -1. * * Each pulse can have eight (8) possible positions: * * * * i0,i5 : 0, 5, 10, 15, 20, 25, 30, 35. * * i1,i6 : 1, 6, 11, 16, 21, 26, 31, 36. * * i2,i7 : 2, 7, 12, 17, 22, 27, 32, 37. * * i3,i8 : 3, 8, 13, 18, 23, 28, 33, 38. * * i4,i9 : 4, 9, 14, 19, 24, 29, 34, 39. * *-------------------------------------------------------------------*/ void ACELP_10i40_35bits( Word16 x[], /* (i) Q0 : target vector */ Word16 cn[], /* (i) Q0 : residual after long term prediction */ Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */ Word16 code[], /* (o) Q12: algebraic (fixed) codebook excitation */ Word16 y[], /* (o) Q11: filtered fixed codebook excitation */ Word16 indx[] /* (o) : index 5 words: 7,7,7,7,7 = 35 bits */ ) { Word16 i, j, k, ix, iy, pos, track; Word16 psk, ps, alpk, alp, itrk[3]; Word32 s, corr[NB_TRACK], L_tmp; Word16 *p0, *p1, *h, *h_inv; /* these vectors are not static */ Word16 dn[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR]; Word16 ip[10], codvec[10], pos_max[NB_TRACK]; Word16 cor_x[NB_POS], cor_y[NB_POS]; Word16 h_buf[4*L_SUBFR]; Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE]; h = h_buf; h_inv = h_buf + (2*L_SUBFR); for (i=0; i<L_SUBFR; i++) { *h++ = 0; *h_inv++ = 0; } /* Compute correlation between target x[] and H[] */ cor_h_x_e(H, x, dn); /* find the sign of each pulse position */ set_sign(32767, cn, dn, sign, vec, pos_max, corr); /* Compute correlations of h[] needed for the codebook search. */ cor_h_e(H, sign, vec, h, h_inv, rrixix, rrixiy); /*-------------------------------------------------------------------* * Search starting position for pulse i0 and i1. * * In the deep first search, we start 4 times with different * * position for i0 and i1. At all, we have 5 possible positions to * * start (position 0 to 5). The following loop remove 1 position * * to keep 4 positions for deep first search step. * *-------------------------------------------------------------------*/ s = L_add(corr[4], corr[0]); for (k=0; k<NB_TRACK-1; k++) corr[k] = L_add(corr[k], corr[k+1]); corr[4] = s; for (k=0; k<3; k++) { s = corr[0]; track = 0; for (i=1; i<NB_TRACK; i++) { L_tmp = L_sub(corr[i], s); if (L_tmp > 0) { s = corr[i]; track = i; } } corr[track] = -1; itrk[k] = track; } /*-------------------------------------------------------------------* * Deep first search: 4 iterations of 256 tests = 1024 tests. * * * * Stages of deep first search: * * stage 1 : fix i0 and i1 --> 2 positions is fixed previously. * * stage 2 : fix i2 and i3 --> try 8x8 = 64 positions. * * stage 3 : fix i4 and i5 --> try 8x8 = 64 positions. * * stage 4 : fix i6 and i7 --> try 8x8 = 64 positions. * * stage 5 : fix i8 and i9 --> try 8x8 = 64 positions. * *-------------------------------------------------------------------*/ psk = -1; alpk = 1; for (pos=0; pos<3; pos++) { k = itrk[pos]; /* starting position index */ /* stage 1: fix pulse i0 and i1 according to max of correlation */ ix = pos_max[ipos[k]]; iy = pos_max[ipos[k+1]]; ps = add(dn[ix], dn[iy]); i = mult(ix, Q15_1_5); j = mult(iy, Q15_1_5); alp = add(rrixix[ipos[k]][i], rrixix[ipos[k+1]][j]); i = add(shl(i,3), j); alp = add(alp, rrixiy[ipos[k]][i]); ip[0] = ix; ip[1] = iy; for (i=0; i<L_SUBFR; i++) vec[i] = 0; /* stage 2..5: fix pulse i2,i3,i4,i5,i6,i7,i8 and i9 */ for (j=2; j<10; j+=2) { /*--------------------------------------------------* * Store all impulse response of all fixed pulses * * in vector vec[] for the "cor_h_vec()" function. * *--------------------------------------------------*/ if (sign[ix] < 0) p0 = h_inv - ix; else p0 = h - ix; if (sign[iy] < 0) p1 = h_inv - iy; else p1 = h - iy; for (i=0; i<L_SUBFR; i++) { vec[i] = add(vec[i], add(*p0, *p1)); p0++; p1++; } /*--------------------------------------------------* * Calculate correlation of all possible positions * * of the next 2 pulses with previous fixed pulses. * * Each pulse can have 8 possible positions * *--------------------------------------------------*/ cor_h_vec(h, vec, ipos[k+j], sign, rrixix, cor_x); cor_h_vec(h, vec, ipos[k+j+1], sign, rrixix, cor_y); /*--------------------------------------------------* * Fix 2 pulses, try 8x8 = 64 positions. * *--------------------------------------------------*/ search_ixiy(ipos[k+j], ipos[k+j+1], &ps, &alp, &ix, &iy, dn, cor_x, cor_y, rrixiy); ip[j] = ix; ip[j+1] = iy; } /* memorise new codevector if it's better than the last one. */ ps = mult(ps,ps); s = L_msu(L_mult(alpk,ps),psk,alp); if (s > 0) { psk = ps; alpk = alp; for (i=0; i<10; i++) codvec[i] = ip[i]; } } /* end of for (pos=0; pos<3; pos++) */ /*-------------------------------------------------------------------* * index of 10 pulses = 35 bits on 5 words * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * indx[0] = 7 bits --> 3(pos#6) + 1(sign#1) + 3(pos#1) * * indx[1] = 7 bits --> 3(pos#7) + 1(sign#2) + 3(pos#2) * * indx[2] = 7 bits --> 3(pos#8) + 1(sign#3) + 3(pos#3) * * indx[3] = 7 bits --> 3(pos#9) + 1(sign#4) + 3(pos#4) * * indx[4] = 7 bits --> 3(pos#10)+ 1(sign#5) + 3(pos#5) * *-------------------------------------------------------------------*/ build_code(codvec, sign, 10, H, code, y, indx); for (i=0; i<NB_TRACK; i++) indx[i] = indx[i] & (Word16)127; return; }
static Word16 D4i40_17_fast(/*(o) : Index of pulses positions. */ Word16 dn[], /* (i) : Correlations between h[] and Xn[]. */ Word16 rr[], /* (i) : Correlations of impulse response h[]. */ Word16 h[], /* (i) Q12: Impulse response of filters. */ Word16 cod[], /* (o) Q13: Selected algebraic codeword. */ Word16 y[], /* (o) Q12: Filtered algebraic codeword. */ Word16 *sign /* (o) : Signs of 4 pulses. */ ) { Word16 i0, i1, i2, i3, ip0, ip1, ip2, ip3; Word16 i, j, ix, iy, track, trk, max; Word16 prev_i0, i1_offset; Word16 psk, ps, ps0, ps1, ps2, sq, sq2; Word16 alpk, alp, alp_16; Word32 s, alp0, alp1, alp2; Word16 *p0, *p1, *p2, *p3, *p4; Word16 sign_dn[L_SUBFR], sign_dn_inv[L_SUBFR], *psign; Word16 tmp_vect[NB_POS]; Word16 *rri0i0, *rri1i1, *rri2i2, *rri3i3, *rri4i4; Word16 *rri0i1, *rri0i2, *rri0i3, *rri0i4; Word16 *rri1i2, *rri1i3, *rri1i4; Word16 *rri2i3, *rri2i4; Word16 *ptr_rri0i3_i4; Word16 *ptr_rri1i3_i4; Word16 *ptr_rri2i3_i4; Word16 *ptr_rri3i3_i4; /* Init pointers */ rri0i0 = rr; rri1i1 = rri0i0 + NB_POS; rri2i2 = rri1i1 + NB_POS; rri3i3 = rri2i2 + NB_POS; rri4i4 = rri3i3 + NB_POS; rri0i1 = rri4i4 + NB_POS; rri0i2 = rri0i1 + MSIZE; rri0i3 = rri0i2 + MSIZE; rri0i4 = rri0i3 + MSIZE; rri1i2 = rri0i4 + MSIZE; rri1i3 = rri1i2 + MSIZE; rri1i4 = rri1i3 + MSIZE; rri2i3 = rri1i4 + MSIZE; rri2i4 = rri2i3 + MSIZE; /*-----------------------------------------------------------------------* * Chose the sign of the impulse. * *-----------------------------------------------------------------------*/ for (i=0; i<L_SUBFR; i++) { if (dn[i] >= 0) { sign_dn[i] = MAX_16; sign_dn_inv[i] = MIN_16; } else { sign_dn[i] = MIN_16; sign_dn_inv[i] = MAX_16; dn[i] = negate(dn[i]); } } /*-------------------------------------------------------------------* * Modification of rrixiy[] to take signs into account. * *-------------------------------------------------------------------*/ p0 = rri0i1; p1 = rri0i2; p2 = rri0i3; p3 = rri0i4; for(i0=0; i0<L_SUBFR; i0+=STEP) { psign = sign_dn; if (psign[i0] < 0) psign = sign_dn_inv; for(i1=1; i1<L_SUBFR; i1+=STEP) { *p0++ = mult(*p0, psign[i1]); *p1++ = mult(*p1, psign[i1+1]); *p2++ = mult(*p2, psign[i1+2]); *p3++ = mult(*p3, psign[i1+3]); } } p0 = rri1i2; p1 = rri1i3; p2 = rri1i4; for(i1=1; i1<L_SUBFR; i1+=STEP) { psign = sign_dn; if (psign[i1] < 0) psign = sign_dn_inv; for(i2=2; i2<L_SUBFR; i2+=STEP) { *p0++ = mult(*p0, psign[i2]); *p1++ = mult(*p1, psign[i2+1]); *p2++ = mult(*p2, psign[i2+2]); } } p0 = rri2i3; p1 = rri2i4; for(i2=2; i2<L_SUBFR; i2+=STEP) { psign = sign_dn; if (psign[i2] < 0) psign = sign_dn_inv; for(i3=3; i3<L_SUBFR; i3+=STEP) { *p0++ = mult(*p0, psign[i3]); *p1++ = mult(*p1, psign[i3+1]); } } /*-------------------------------------------------------------------* * Search the optimum positions of the four pulses which maximize * * square(correlation) / energy * *-------------------------------------------------------------------*/ psk = -1; alpk = 1; ptr_rri0i3_i4 = rri0i3; ptr_rri1i3_i4 = rri1i3; ptr_rri2i3_i4 = rri2i3; ptr_rri3i3_i4 = rri3i3; /* Initializations only to remove warning from some compilers */ ip0=0; ip1=1; ip2=2; ip3=3; ix=0; iy=0; ps=0; /* search 2 times: track 3 and 4 */ for (track=3, trk=0; track<5; track++, trk++) { /*------------------------------------------------------------------* * depth first search 3, phase A: track 2 and 3/4. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 2 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 2 */ for (j=2; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = rri2i2 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 2 */ p0 = ptr_rri2i3_i4 + shl(j, 3); p1 = ptr_rri3i3_i4; for (i1=track; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0++, _1_2); alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 3, phase B: track 0 and 1. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = rri1i2 + mult(i0, 6554); p1 = ptr_rri1i3_i4 + mult(i1, 6554); p2 = rri1i1; p3 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1, _1_4); p1 += NB_POS; s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 0 */ p0 = rri0i2 + mult(i0, 6554); p1 = ptr_rri0i3_i4 + mult(i1, 6554); p2 = rri0i0; p3 = rri0i1; for (i2=0; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1, _1_8); p1 += NB_POS; alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 1 */ p4 = tmp_vect; for (i3=1; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 3: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip2 = i0; ip3 = i1; ip0 = ix; ip1 = iy; } /*------------------------------------------------------------------* * depth first search 4, phase A: track 3 and 0. * *------------------------------------------------------------------*/ sq = -1; alp = 1; /* i0 loop: 2 positions in track 3/4 */ prev_i0 = -1; for (i=0; i<2; i++) { max = -1; /* search "dn[]" maximum position in track 3/4 */ for (j=track; j<L_SUBFR; j+=STEP) { if ((sub(dn[j], max) > 0) && (sub(prev_i0,j) != 0)) { max = dn[j]; i0 = j; } } prev_i0 = i0; j = mult(i0, 6554); /* j = i0/5 */ p0 = ptr_rri3i3_i4 + j; ps1 = dn[i0]; alp1 = L_mult(*p0, _1_4); /* i1 loop: 8 positions in track 0 */ p0 = ptr_rri0i3_i4 + j; p1 = rri0i0; for (i1=0; i1<L_SUBFR; i1+=STEP) { ps2 = add(ps1, dn[i1]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i1] + 1/2*rr[i1][i1]; */ alp2 = L_mac(alp1, *p0, _1_2); p0 += NB_POS; alp2 = L_mac(alp2, *p1++, _1_4); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; ps = ps2; alp = alp_16; ix = i0; iy = i1; } } } i0 = ix; i1 = iy; i1_offset = shl(mult(i1, 6554), 3); /* j = 8*(i1/5) */ /*------------------------------------------------------------------* * depth first search 4, phase B: track 1 and 2. * *------------------------------------------------------------------*/ ps0 = ps; alp0 = L_mult(alp, _1_4); sq = -1; alp = 1; /* build vector for next loop to decrease complexity */ p0 = ptr_rri2i3_i4 + mult(i0, 6554); p1 = rri0i2 + i1_offset; p2 = rri2i2; p3 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { /* rrv[i3] = rr[i3][i3] + rr[i0][i3] + rr[i1][i3]; */ s = L_mult(*p0, _1_4); p0 += NB_POS; s = L_mac(s, *p1++, _1_4); s = L_mac(s, *p2++, _1_8); *p3++ = round(s); } /* i2 loop: 8 positions in track 1 */ p0 = ptr_rri1i3_i4 + mult(i0, 6554); p1 = rri0i1 + i1_offset; p2 = rri1i1; p3 = rri1i2; for (i2=1; i2<L_SUBFR; i2+=STEP) { ps1 = add(ps0, dn[i2]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i2] + rr[i1][i2] + 1/2*rr[i2][i2]; */ alp1 = L_mac(alp0, *p0, _1_8); p0 += NB_POS; alp1 = L_mac(alp1, *p1++, _1_8); alp1 = L_mac(alp1, *p2++, _1_16); /* i3 loop: 8 positions in track 2 */ p4 = tmp_vect; for (i3=2; i3<L_SUBFR; i3+=STEP) { ps2 = add(ps1, dn[i3]); /* index increment = STEP */ /* alp1 = alp0 + rr[i0][i3] + rr[i1][i3] + rr[i2][i3] + 1/2*rr[i3][i3]; */ alp2 = L_mac(alp1, *p3++, _1_8); alp2 = L_mac(alp2, *p4++, _1_2); sq2 = mult(ps2, ps2); alp_16 = round(alp2); s = L_msu(L_mult(alp,sq2),sq,alp_16); if (s > 0) { sq = sq2; alp = alp_16; ix = i2; iy = i3; } } } /*----------------------------------------------------------------* * depth first search 1: compare codevector with the best case. * *----------------------------------------------------------------*/ s = L_msu(L_mult(alpk,sq),psk,alp); if (s > 0) { psk = sq; alpk = alp; ip3 = i0; ip0 = i1; ip1 = ix; ip2 = iy; } ptr_rri0i3_i4 = rri0i4; ptr_rri1i3_i4 = rri1i4; ptr_rri2i3_i4 = rri2i4; ptr_rri3i3_i4 = rri4i4; } /* Set the sign of impulses */ i0 = sign_dn[ip0]; i1 = sign_dn[ip1]; i2 = sign_dn[ip2]; i3 = sign_dn[ip3]; /* Find the codeword corresponding to the selected positions */ for(i=0; i<L_SUBFR; i++) { cod[i] = 0; } cod[ip0] = shr(i0, 2); /* From Q15 to Q13 */ cod[ip1] = shr(i1, 2); cod[ip2] = shr(i2, 2); cod[ip3] = shr(i3, 2); /* find the filtered codeword */ for (i = 0; i < ip0; i++) y[i] = 0; if(i0 > 0) for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = h[j]; else for(i=ip0, j=0; i<L_SUBFR; i++, j++) y[i] = negate(h[j]); if(i1 > 0) for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip1, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i2 > 0) for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip2, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); if(i3 > 0) for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], h[j]); else for(i=ip3, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], h[j]); /* find codebook index; 17-bit address */ i = 0; if(i0 > 0) i = add(i, 1); if(i1 > 0) i = add(i, 2); if(i2 > 0) i = add(i, 4); if(i3 > 0) i = add(i, 8); *sign = i; ip0 = mult(ip0, 6554); /* ip0/5 */ ip1 = mult(ip1, 6554); /* ip1/5 */ ip2 = mult(ip2, 6554); /* ip2/5 */ i = mult(ip3, 6554); /* ip3/5 */ j = add(i, shl(i, 2)); /* j = i*5 */ j = sub(ip3, add(j, 3)); /* j= ip3%5 -3 */ ip3 = add(shl(i, 1), j); i = add(ip0, shl(ip1, 3)); i = add(i , shl(ip2, 6)); i = add(i , shl(ip3, 9)); return i; }
/*-------------------------------------------------------------------* * Function ACELP_12i40_44bits() * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Algebraic codebook; 44 bits: 12 pulses in a frame of 40 samples. * *-------------------------------------------------------------------* * The code length is 40, containing 12 nonzero pulses: i0...i11. * * 12 pulses can have two (2) possible amplitudes: +1 or -1. * * 10 pulses can have eight (8) possible positions: * * i2,i7 : 0, 5, 10, 15, 20, 25, 30, 35. --> t0 * * i3,i8 : 1, 6, 11, 16, 21, 26, 31, 36. --> t1 * * i4,i9 : 2, 7, 12, 17, 22, 27, 32, 37. --> t2 * * i5,i10 : 3, 8, 13, 18, 23, 28, 33, 38. --> t3 * * i6,i11 : 4, 9, 14, 19, 24, 29, 34, 39. --> t4 * * * * The 2 other pulses can be on the following track: * * t0-t1,t1-t2,t2-t3,t3-t4,t4-t0. * *-------------------------------------------------------------------*/ void ACELP_12i40_44bits( Word16 x[], /* (i) Q0 : target vector */ Word16 cn[], /* (i) Q0 : residual after long term prediction */ Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */ Word16 code[], /* (o) Q12: algebraic (fixed) codebook excitation */ Word16 y[], /* (o) Q11: filtered fixed codebook excitation */ Word16 indx[] /* (o) : index 5 words: 13,10,7,7,7 = 44 bits */ ) { Word16 i, j, k, ix, iy, itrk[3], track, pos, index, idx[NB_TRACK]; Word16 psk, ps, alpk, alp; Word32 s, corr[NB_TRACK]; Word16 *p0, *p1, *h, *h_inv; Word16 dn[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR]; Word16 ip[12], codvec[12], pos_max[NB_TRACK]; Word16 cor_x[NB_POS], cor_y[NB_POS]; Word16 h_buf[4*L_SUBFR]; Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE]; Word32 L_tmp; h = h_buf; h_inv = h_buf + (2*L_SUBFR); for (i=0; i<L_SUBFR; i++) { *h++ = 0; *h_inv++ = 0; } /* Compute correlation between target x[] and H[] */ cor_h_x_e(H, x, dn); /* find the sign of each pulse position */ set_sign(32767, cn, dn, sign, vec, pos_max, corr); /* Compute correlations of h[] needed for the codebook search. */ cor_h_e(H, sign, vec, h, h_inv, rrixix, rrixiy); /*-------------------------------------------------------------------* * Search position for pulse i0 and i1. * *-------------------------------------------------------------------*/ s = L_add(corr[4], corr[0]); for (k=0; k<NB_TRACK-1; k++) corr[k] = L_add(corr[k], corr[k+1]); corr[4] = s; for (k=0; k<3; k++) { s = corr[0]; track = 0; for (i=1; i<NB_TRACK; i++) { L_tmp = L_sub(corr[i], s); if (L_tmp > 0) { s = corr[i]; track = i; } } corr[track] = -1; itrk[k] = track; } /*-------------------------------------------------------------------* * Deep first search: 3 iterations of 320 tests = 960 tests. * * * * Stages of deep first search: * * stage 1 : fix i0 and i1 --> 2 positions is fixed previously. * * stage 2 : fix i2 and i3 --> try 8x8 = 64 positions. * * stage 3 : fix i4 and i5 --> try 8x8 = 64 positions. * * stage 4 : fix i6 and i7 --> try 8x8 = 64 positions. * * stage 5 : fix i8 and i9 --> try 8x8 = 64 positions. * * stage 6 : fix i10 and i11 --> try 8x8 = 64 positions. * *-------------------------------------------------------------------*/ /* stage 0: fix pulse i0 and i1 according to max of correlation */ psk = -1; alpk = 1; for (pos=0; pos<3; pos++) { k = itrk[pos]; /* starting position index */ /* stage 1: fix pulse i0 and i1 according to max of correlation */ ix = pos_max[ipos[k]]; iy = pos_max[ipos[k+1]]; ps = add(dn[ix], dn[iy]); i = mult(ix, Q15_1_5); j = mult(iy, Q15_1_5); alp = add(rrixix[ipos[k]][i], rrixix[ipos[k+1]][j]); i = add(shl(i,3), j); alp = add(alp, rrixiy[ipos[k]][i]); ip[0] = ix; ip[1] = iy; for (i=0; i<L_SUBFR; i++) vec[i] = 0; /* stage 2..5: fix pulse i2,i3,i4,i5,i6,i7,i8 and i9 */ for (j=2; j<12; j+=2) { /*--------------------------------------------------* * Store all impulse response of all fixed pulses * * in vector vec[] for the "cor_h_vec()" function. * *--------------------------------------------------*/ if (sign[ix] < 0) p0 = h_inv - ix; else p0 = h - ix; if (sign[iy] < 0) p1 = h_inv - iy; else p1 = h - iy; for (i=0; i<L_SUBFR; i++) { vec[i] = add(vec[i], add(*p0, *p1)); p0++; p1++; } /*--------------------------------------------------* * Calculate correlation of all possible positions * * of the next 2 pulses with previous fixed pulses. * * Each pulse can have 8 possible positions * *--------------------------------------------------*/ cor_h_vec(h, vec, ipos[k+j], sign, rrixix, cor_x); cor_h_vec(h, vec, ipos[k+j+1], sign, rrixix, cor_y); /*--------------------------------------------------* * Fix 2 pulses, try 8x8 = 64 positions. * *--------------------------------------------------*/ search_ixiy(ipos[k+j], ipos[k+j+1], &ps, &alp, &ix, &iy, dn, cor_x, cor_y, rrixiy); ip[j] = ix; ip[j+1] = iy; } /* memorise new codevector if it's better than the last one. */ ps = mult(ps,ps); s = L_msu(L_mult(alpk,ps),psk,alp); if (s > 0) { psk = ps; alpk = alp; for (i=0; i<12; i++) codvec[i] = ip[i]; } } /* end of for (pos=0; pos<3; pos++) */ /*-------------------------------------------------------------------* * index of 12 pulses = 44 bits on 5 words * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * indx[0] =13 bits --> 3(track) + * * 3(pos#11) + 3(pos#6) + 1(sign#1) + 3(pos#1) * * indx[1] =10 bits --> 3(pos#12) + 3(pos#7) + 1(sign#2) + 3(pos#2) * * indx[2] = 7 bits --> 3(pos#8) + 1(sign#3) + 3(pos#3) * * indx[3] = 7 bits --> 3(pos#9) + 1(sign#4) + 3(pos#4) * * indx[4] = 7 bits --> 3(pos#10)+ 1(sign#5) + 3(pos#5) * *-------------------------------------------------------------------*/ build_code(codvec+2, sign, 10, H, code, y, idx); for (k=0; k<2; k++) { pos = codvec[k]; index = mult(pos, Q15_1_5); /* index = pos/5 */ track = sub(pos, extract_l(L_shr(L_mult(index, 5), 1))); if (sign[pos] > 0) { code[pos] = add(code[pos], 4096); /* 1.0 in Q12 */ for (i=pos, j=0; i<L_SUBFR; i++, j++) y[i] = add(y[i], H[j]); } else { code[pos] = sub(code[pos], 4096); /* 1.0 in Q12 */ for (i=pos, j=0; i<L_SUBFR; i++, j++) y[i] = sub(y[i], H[j]); index = add(index, 8); } ix = shr(idx[track], (Word16)4) & (Word16)15; iy = idx[track] & (Word16)15; index = pack3(ix, iy, index); if (k == 0) index = add(shl(track, 10), index); indx[k] = index; } for (k=2; k<NB_TRACK; k++) { track = add(track, 1); if (track >= NB_TRACK) track = 0; indx[k] = (idx[track] & (Word16)127); } return; }