/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) { int i, c, N; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; c=0; do { for (i=0;i<end;i++) { int j; opus_val32 maxval=0; opus_val32 sum = 0; j=M*eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<M*eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=M*eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<M*eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bandE[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } } while (++c<C); /*printf ("\n");*/ }
/* * * Allocates all necessary storage space for the fft and ifft. * The return value is a contiguous block of memory. As such, * It can be freed with free(). * */ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch) { kiss_fft_state *st=NULL; size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ if ( lenmem==NULL ) { st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded ); }else{ if (mem != NULL && *lenmem >= memneeded) st = (kiss_fft_state*)mem; *lenmem = memneeded; } if (st) { opus_int16 *bitrev; kiss_twiddle_cpx *twiddles; st->nfft=nfft; #ifdef FIXED_POINT st->scale_shift = celt_ilog2(st->nfft); if (st->nfft == 1<<st->scale_shift) st->scale = Q15ONE; else st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); #else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; while (st->shift < 32 && nfft<<st->shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; } else { st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); compute_twiddles(twiddles, nfft); st->shift = -1; } if (!kf_factor(nfft,st->factors)) { goto fail; } /* bitrev */ st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); if (st->bitrev==NULL) goto fail; compute_bitrev_table(0, bitrev, 1,1, st->factors,st); /* Initialize architecture specific fft parameters */ if (opus_fft_alloc_arch(st, arch)) goto fail; } return st; fail: opus_fft_free(st, arch); return NULL; }
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, int max_pitch, int *best_pitch #ifdef FIXED_POINT , int yshift, opus_val32 maxcorr #endif ) { int i, j; opus_val32 Syy=1; opus_val16 best_num[2]; opus_val32 best_den[2]; #ifdef FIXED_POINT int xshift; xshift = celt_ilog2(maxcorr)-14; #endif best_num[0] = -1; best_num[1] = -1; best_den[0] = 0; best_den[1] = 0; best_pitch[0] = 0; best_pitch[1] = 1; for (j=0;j<len;j++) Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift)); for (i=0;i<max_pitch;i++) { if (xcorr[i]>0) { opus_val16 num; opus_val32 xcorr16; xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); #ifndef FIXED_POINT /* Considering the range of xcorr16, this should avoid both underflows and overflows (inf) when squaring xcorr16 */ xcorr16 *= 1e-12f; #endif num = MULT16_16_Q15(xcorr16,xcorr16); if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) { if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) { best_num[1] = best_num[0]; best_den[1] = best_den[0]; best_pitch[1] = best_pitch[0]; best_num[0] = num; best_den[0] = Syy; best_pitch[0] = i; } else { best_num[1] = num; best_den[1] = Syy; best_pitch[1] = i; } } } Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); Syy = MAX32(1, Syy); } }
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, int max_pitch, int *best_pitch #ifdef FIXED_POINT , int yshift, opus_val32 maxcorr #endif ) { int i, j; opus_val32 Syy=1; opus_val16 best_num[2]; opus_val32 best_den[2]; #ifdef FIXED_POINT int xshift; xshift = celt_ilog2(maxcorr)-14; #endif best_num[0] = -1; best_num[1] = -1; best_den[0] = 0; best_den[1] = 0; best_pitch[0] = 0; best_pitch[1] = 1; for (j=0;j<len;j++) Syy = MAC16_16(Syy, y[j],y[j]); for (i=0;i<max_pitch;i++) { if (xcorr[i]>0) { opus_val16 num; opus_val32 xcorr16; xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); num = MULT16_16_Q15(xcorr16,xcorr16); if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) { if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) { best_num[1] = best_num[0]; best_den[1] = best_den[0]; best_pitch[1] = best_pitch[0]; best_num[0] = num; best_den[0] = Syy; best_pitch[0] = i; } else { best_num[1] = num; best_den[1] = Syy; best_pitch[1] = i; } } } Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); Syy = MAX32(1, Syy); } }
void testilog2(void) { celt_word32 x; for (x=1;x<=268435455;x+=127) { celt_word32 error = abs(celt_ilog2(x)-(int)floor(log2(x))); if (error!=0) { print("celt_ilog2 failed: celt_ilog2(x)!=floor(log2(x)) (x = %d, error = %d)\n",x,error); ret = 1; } } }
void _celt_autocorr( const opus_val16 *x, /* in: [0...n-1] samples x */ opus_val32 *ac, /* out: [0...lag-1] ac values */ const opus_val16 *window, int overlap, int lag, int n ) { opus_val32 d; int i; VARDECL(opus_val16, xx); SAVE_STACK; ALLOC(xx, n, opus_val16); celt_assert(n>0); celt_assert(overlap>=0); for (i=0;i<n;i++) xx[i] = x[i]; for (i=0;i<overlap;i++) { xx[i] = MULT16_16_Q15(x[i],window[i]); xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); } #ifdef FIXED_POINT { opus_val32 ac0=0; int shift; for(i=0;i<n;i++) ac0 += SHR32(MULT16_16(xx[i],xx[i]),9); ac0 += 1+n; shift = celt_ilog2(ac0)-30+10; shift = (shift+1)/2; for(i=0;i<n;i++) xx[i] = VSHR32(xx[i], shift); } #endif while (lag>=0) { for (i = lag, d = 0; i < n; i++) d += xx[i] * xx[i-lag]; ac[lag] = d; /*printf ("%f ", ac[lag]);*/ lag--; } /*printf ("\n");*/ ac[0] += 10; RESTORE_STACK; }
/* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int _C) { int i, c, N; const celt_int16 *eBands = m->eBands; const int C = CHANNELS(_C); N = FRAMESIZE(m); for (c=0;c<C;c++) { for (i=0;i<m->nbEBands;i++) { int j; celt_word32 maxval=0; celt_word32 sum = 0; j=eBands[i]; do { maxval = MAX32(maxval, X[j+c*N]); maxval = MAX32(maxval, -X[j+c*N]); } while (++j<eBands[i+1]); if (maxval > 0) { int shift = celt_ilog2(maxval)-10; j=eBands[i]; do { sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), EXTRACT16(VSHR32(X[j+c*N],shift))); } while (++j<eBands[i+1]); /* We're adding one here to make damn sure we never end up with a pitch vector that's larger than unity norm */ bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { bank[i+c*m->nbEBands] = EPSILON; } /*printf ("%f ", bank[i+c*m->nbEBands]);*/ } } /*printf ("\n");*/ }
int _celt_autocorr( const opus_val16 *x, /* in: [0...n-1] samples x */ opus_val32 *ac, /* out: [0...lag-1] ac values */ const opus_val16 *window, int overlap, int lag, int n, int arch ) { opus_val32 d; int i, k; int fastN=n-lag; int shift; const opus_val16 *xptr; VARDECL(opus_val16, xx); SAVE_STACK; ALLOC(xx, n, opus_val16); celt_assert(n>0); celt_assert(overlap>=0); if (overlap == 0) { xptr = x; } else { for (i=0;i<n;i++) xx[i] = x[i]; for (i=0;i<overlap;i++) { xx[i] = MULT16_16_Q15(x[i],window[i]); xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); } xptr = xx; } shift=0; #ifdef OPUS_FIXED_POINT { opus_val32 ac0; ac0 = 1+(n<<7); if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); for(i=(n&1);i<n;i+=2) { ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); } shift = celt_ilog2(ac0)-30+10; shift = (shift)/2; if (shift>0) { for(i=0;i<n;i++) xx[i] = PSHR32(xptr[i], shift); xptr = xx; } else shift = 0; } #endif celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); for (k=0;k<=lag;k++) { for (i = k+fastN, d = 0; i < n; i++) d = MAC16_16(d, xptr[i], xptr[i-k]); ac[k] += d; } #ifdef OPUS_FIXED_POINT shift = 2*shift; if (shift<=0) ac[0] += SHL32((opus_int32)1, -shift); if (ac[0] < 268435456) { int shift2 = 29 - EC_ILOG(ac[0]); for (i=0;i<=lag;i++) ac[i] = SHL32(ac[i], shift2); shift -= shift2; } else if (ac[0] >= 536870912) { int shift2=1; if (ac[0] >= 1073741824) shift2++; for (i=0;i<=lag;i++) ac[i] = SHR32(ac[i], shift2); shift += shift2; } #endif RESTORE_STACK; return shift; }
/** Takes the pitch vector and the decoded residual vector, computes the gain that will give ||p+g*y||=1 and mixes the residual with the pitch. */ static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X, int N, int K, const celt_norm_t * restrict P) { int i; celt_word32_t Ryp, Ryy, Rpp; celt_word16_t ryp, ryy, rpp; celt_word32_t g; VARDECL(celt_norm_t, y); #ifdef FIXED_POINT int yshift; #endif SAVE_STACK; #ifdef FIXED_POINT yshift = 13-celt_ilog2(K); #endif ALLOC(y, N, celt_norm_t); Rpp = 0; i=0; do { Rpp = MAC16_16(Rpp,P[i],P[i]); y[i] = SHL16(iy[i],yshift); } while (++i < N); Ryp = 0; Ryy = 0; /* If this doesn't generate a dual MAC (on supported archs), fire the compiler guy */ i=0; do {