static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { const int N = st->filt_len; int out_sample = 0; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; const int out_stride = st->out_stride; const int int_advance = st->int_advance; const int frac_advance = st->frac_advance; const spx_uint32_t den_rate = st->den_rate; spx_word32_t sum; while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { const spx_word16_t *iptr = & in[last_sample]; const int offset = samp_frac_num*st->oversample/st->den_rate; #ifdef FIXED_POINT const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); #else const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; #endif spx_word16_t interp[4]; #ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE int j; spx_word32_t accum[4] = {0,0,0,0}; for(j=0;j<N;j++) { const spx_word16_t curr_in=iptr[j]; accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]); accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); } cubic_coef(frac, interp); sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1)); sum = SATURATE32PSHR(sum, 15, 32767); #else cubic_coef(frac, interp); sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); #endif out[out_stride * out_sample++] = sum; last_sample += int_advance; samp_frac_num += frac_advance; if (samp_frac_num >= den_rate) { samp_frac_num -= den_rate; last_sample++; } } st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; }
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, int max_pitch, int *best_pitch #ifdef FIXED_POINT , int yshift, opus_val32 maxcorr #endif ) { int i, j; opus_val32 Syy=1; opus_val16 best_num[2]; opus_val32 best_den[2]; #ifdef FIXED_POINT int xshift; xshift = celt_ilog2(maxcorr)-14; #endif best_num[0] = -1; best_num[1] = -1; best_den[0] = 0; best_den[1] = 0; best_pitch[0] = 0; best_pitch[1] = 1; for (j=0;j<len;j++) Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift)); for (i=0;i<max_pitch;i++) { if (xcorr[i]>0) { opus_val16 num; opus_val32 xcorr16; xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); #ifndef FIXED_POINT /* Considering the range of xcorr16, this should avoid both underflows and overflows (inf) when squaring xcorr16 */ xcorr16 *= 1e-12f; #endif num = MULT16_16_Q15(xcorr16,xcorr16); if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) { if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) { best_num[1] = best_num[0]; best_den[1] = best_den[0]; best_pitch[1] = best_pitch[0]; best_num[0] = num; best_den[0] = Syy; best_pitch[0] = i; } else { best_num[1] = num; best_den[1] = Syy; best_pitch[1] = i; } } } Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); Syy = MAX32(1, Syy); } }
static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int N, int mm ) { kiss_fft_cpx * Fout2; kiss_fft_cpx * tw1; kiss_fft_cpx t; if (!st->inverse) { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { /* Almost the same as the code path below, except that we divide the input by two (while keeping the best accuracy possible) */ ms_word32_t tr, ti; tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); tw1 += fstride; Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); ++Fout2; ++Fout; } } } else { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { C_MUL (t, *Fout2 , *tw1); tw1 += fstride; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); ++Fout2; ++Fout; } } } }
/** Compute power spectrum of a half-complex (packed) vector and accumulate */ static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) { int i, j; ps[0]+=MULT16_16(X[0],X[0]); for (i=1,j=1;i<N-1;i+=2,j++) { ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); } ps[j]+=MULT16_16(X[i],X[i]); }
static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, int max_pitch, int *best_pitch #ifdef FIXED_POINT , int yshift, opus_val32 maxcorr #endif ) { int i, j; opus_val32 Syy=1; opus_val16 best_num[2]; opus_val32 best_den[2]; #ifdef FIXED_POINT int xshift; xshift = celt_ilog2(maxcorr)-14; #endif best_num[0] = -1; best_num[1] = -1; best_den[0] = 0; best_den[1] = 0; best_pitch[0] = 0; best_pitch[1] = 1; for (j=0;j<len;j++) Syy = MAC16_16(Syy, y[j],y[j]); for (i=0;i<max_pitch;i++) { if (xcorr[i]>0) { opus_val16 num; opus_val32 xcorr16; xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); num = MULT16_16_Q15(xcorr16,xcorr16); if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) { if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) { best_num[1] = best_num[0]; best_den[1] = best_den[0]; best_pitch[1] = best_pitch[0]; best_num[0] = num; best_den[0] = Syy; best_pitch[0] = i; } else { best_num[1] = num; best_den[1] = Syy; best_pitch[1] = i; } } } Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); Syy = MAX32(1, Syy); } }
void filterbank_compute_psd16(FilterBank *bank, spx_word16_t *mel, spx_word16_t *ps) { int i; for (i=0;i<bank->len;i++) { spx_word32_t tmp; int id1, id2; id1 = bank->bank_left[i]; id2 = bank->bank_right[i]; tmp = MULT16_16(mel[id1],bank->filter_left[i]); tmp += MULT16_16(mel[id2],bank->filter_right[i]); ps[i] = EXTRACT16(PSHR32(tmp,15)); } }
/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) { int i, j; spx_float_t W; W = FLOAT_AMULT(p, w[0]); prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); for (i=1,j=1;i<N-1;i+=2,j++) { W = FLOAT_AMULT(p, w[j]); prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); } W = FLOAT_AMULT(p, w[j]); prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); }
static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) { /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc */ spx_word16_t x2, x3; x2 = MULT16_16_P15(x, x); x3 = MULT16_16_P15(x, x2); interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); /* Just to make sure we don't have rounding problems */ interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; if (interp[2]<32767) interp[2]+=1; }
static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) { int i,j; spx_word32_t tmp1=0,tmp2=0; for (j=0;j<M;j++) { tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); } acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); for (i=1;i<N-1;i+=2) { tmp1 = tmp2 = 0; for (j=0;j<M;j++) { tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); } acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); } tmp1 = tmp2 = 0; for (j=0;j<M;j++) { tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); } acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); }
spx_word32_t speex_rand(spx_word16_t std, spx_int32_t *seed) { spx_word32_t res; *seed = 1664525 * *seed + 1013904223; res = MULT16_16(EXTRACT16(SHR32(*seed,16)),std); return SUB32(res, SHR(res, 3)); }
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { static const int SPREAD_FACTOR[3]= {15,10,5}; int i; opus_val16 c, s; opus_val16 gain, theta; int stride2=0; int factor; if (2*K>=len || spread==SPREAD_NONE) return; factor = SPREAD_FACTOR[spread-1]; gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); theta = HALF16(MULT16_16_Q15(gain,gain)); c = celt_cos_norm(EXTEND32(theta)); s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ if (len>=8*stride) { stride2 = 1; /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ while ((stride2*stride2+stride2)*stride + (stride>>2) < len) stride2++; }
EXPORT int speex_std_stereo_request_handler(SpeexBits * bits, void *state, void *data) { (void)state; RealSpeexStereoState *stereo; spx_word16_t sign = 1, dexp; int tmp; stereo = (RealSpeexStereoState *) data; COMPATIBILITY_HACK(stereo); if (speex_bits_unpack_unsigned(bits, 1)) sign = -1; dexp = speex_bits_unpack_unsigned(bits, 5); #ifndef FIXED_POINT stereo->balance = exp(sign * .25 * dexp); #else stereo->balance = spx_exp(MULT16_16(sign, SHL16(dexp, 9))); #endif tmp = speex_bits_unpack_unsigned(bits, 2); stereo->e_ratio = e_ratio_quant[tmp]; return 0; }
static int lsp_weight_quant(spx_word16_t *x, spx_word16_t *weight, const signed char *cdbk, int nbVec, int nbDim) { int i,j; spx_word32_t dist; spx_word16_t tmp; spx_word32_t best_dist=VERY_LARGE32; int best_id=0; const signed char *ptr=cdbk; for (i=0;i<nbVec;i++) { dist=0; for (j=0;j<nbDim;j++) { tmp=SUB16(x[j],SHL16((spx_word16_t)*ptr++,5)); dist=MAC16_32_Q15(dist,weight[j],MULT16_16(tmp,tmp)); } if (dist<best_dist) { best_dist=dist; best_id=i; } } for (j=0;j<nbDim;j++) x[j] = SUB16(x[j],SHL16((spx_word16_t)cdbk[best_id*nbDim+j],5)); return best_id; }
/** Unquantize forced pitch delay and gain */ void forced_pitch_unquant( spx_word16_t exc[], /* Input excitation */ spx_word32_t exc_out[], /* Output excitation */ int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ const void* par, int nsf, /* Number of samples in subframe */ int* pitch_val, spx_word16_t* gain_val, SpeexBits* bits, char* stack, int count_lost, int subframe_offset, spx_word16_t last_pitch_gain, int cdbk_offset ) { int i; #ifdef FIXED_POINT if (pitch_coef > 63) pitch_coef = 63; #else if (pitch_coef > .99) pitch_coef = .99; #endif for (i = 0; i < nsf; i++) { exc_out[i] = MULT16_16(exc[i - start], SHL16(pitch_coef, 7)); exc[i] = EXTRACT16(PSHR32(exc_out[i], 13)); } *pitch_val = start; gain_val[0] = gain_val[2] = 0; gain_val[1] = pitch_coef; }
static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, float *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack) { int i, j, k; for (i=0;i<shape_cb_size;i++) { spx_word16_t *res; const signed char *shape; res = resp+i*subvect_size; shape = shape_cb+i*subvect_size; /* Compute codeword response using convolution with impulse response */ for(j=0;j<subvect_size;j++) { spx_word32_t resj=0; for (k=0;k<=j;k++) resj = MAC16_16_Q11(resj,shape[k],r[j-k]); #ifndef FIXED_POINT resj *= 0.03125; #endif res[j] = resj; /*printf ("%d\n", (int)res[j]);*/ } /* Compute codeword energy */ E[i]=0; for(j=0;j<subvect_size;j++) E[i]=ADD32(E[i],MULT16_16(res[j],res[j])); } }
static inline spx_word32_t compute_pitch_error(spx_word16_t* C, spx_word16_t* g, spx_word16_t pitch_control) { spx_word32_t sum = 0; sum = ADD32(sum, MULT16_16(MULT16_16_16(g[0], pitch_control), C[0])); sum = ADD32(sum, MULT16_16(MULT16_16_16(g[1], pitch_control), C[1])); sum = ADD32(sum, MULT16_16(MULT16_16_16(g[2], pitch_control), C[2])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[0], g[1]), C[3])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[2], g[1]), C[4])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[2], g[0]), C[5])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[0], g[0]), C[6])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[1], g[1]), C[7])); sum = SUB32(sum, MULT16_16(MULT16_16_16(g[2], g[2]), C[8])); return sum; }
EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { case SPEEX_ECHO_GET_FRAME_SIZE: (*(int*)ptr) = st->frame_size; break; case SPEEX_ECHO_SET_SAMPLING_RATE: st->sampling_rate = (*(int*)ptr); st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); break; case SPEEX_ECHO_GET_SAMPLING_RATE: (*(int*)ptr) = st->sampling_rate; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: /*FIXME: Implement this for multiple channels */ *((spx_int32_t *)ptr) = st->M * st->frame_size; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE: { int M = st->M, N = st->window_size, n = st->frame_size, i, j; spx_int32_t *filt = (spx_int32_t *) ptr; for(j=0;j<M;j++) { /*FIXME: Implement this for multiple channels */ #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); #else spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); #endif for(i=0;i<n;i++) filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); } } break; default: speex_warning_int("Unknown speex_echo_ctl request: ", request); return -1; } return 0; }
/* By segher */ void fir_mem_up(const spx_sig_t *x, const spx_word16_t *a, spx_sig_t *y, int N, int M, spx_word32_t *mem, char *stack) /* assumptions: all odd x[i] are zero -- well, actually they are left out of the array now N and M are multiples of 4 */ { int i, j; spx_word16_t *xx; xx= PUSH(stack, M+N-1, spx_word16_t); for (i = 0; i < N/2; i++) xx[2*i] = SHR(x[N/2-1-i],SIG_SHIFT+1); for (i = 0; i < M - 1; i += 2) xx[N+i] = mem[i+1]; for (i = 0; i < N; i += 4) { spx_sig_t y0, y1, y2, y3; spx_word16_t x0; y0 = y1 = y2 = y3 = 0; x0 = xx[N-4-i]; for (j = 0; j < M; j += 4) { spx_word16_t x1; spx_word16_t a0, a1; a0 = a[j]; a1 = a[j+1]; x1 = xx[N-2+j-i]; y0 += SHR(MULT16_16(a0, x1),1); y1 += SHR(MULT16_16(a1, x1),1); y2 += SHR(MULT16_16(a0, x0),1); y3 += SHR(MULT16_16(a1, x0),1); a0 = a[j+2]; a1 = a[j+3]; x0 = xx[N+j-i]; y0 += SHR(MULT16_16(a0, x0),1); y1 += SHR(MULT16_16(a1, x0),1); y2 += SHR(MULT16_16(a0, x1),1); y3 += SHR(MULT16_16(a1, x1),1); } y[i] = y0; y[i+1] = y1; y[i+2] = y2; y[i+3] = y3; } for (i = 0; i < M - 1; i += 2) mem[i+1] = xx[i]; }
static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) { const int N = st->filt_len; int out_sample = 0; int last_sample = st->last_sample[channel_index]; spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; const spx_word16_t *sinc_table = st->sinc_table; const int out_stride = st->out_stride; const int int_advance = st->int_advance; const int frac_advance = st->frac_advance; const spx_uint32_t den_rate = st->den_rate; spx_word32_t sum; while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) { const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; const spx_word16_t *iptr = & in[last_sample]; #ifndef OVERRIDE_INNER_PRODUCT_SINGLE int j; sum = 0; for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]); /* This code is slower on most DSPs which have only 2 accumulators. Plus this this forces truncation to 32 bits and you lose the HW guard bits. I think we can trust the compiler and let it vectorize and/or unroll itself. spx_word32_t accum[4] = {0,0,0,0}; for(j=0;j<N;j+=4) { accum[0] += MULT16_16(sinct[j], iptr[j]); accum[1] += MULT16_16(sinct[j+1], iptr[j+1]); accum[2] += MULT16_16(sinct[j+2], iptr[j+2]); accum[3] += MULT16_16(sinct[j+3], iptr[j+3]); } sum = accum[0] + accum[1] + accum[2] + accum[3]; */ sum = SATURATE32PSHR(sum, 15, 32767); #else sum = inner_product_single(sinct, iptr, N); #endif out[out_stride * out_sample++] = sum; last_sample += int_advance; samp_frac_num += frac_advance; if (samp_frac_num >= den_rate) { samp_frac_num -= den_rate; last_sample++; } } st->last_sample[channel_index] = last_sample; st->samp_frac_num[channel_index] = samp_frac_num; return out_sample; }
EXPORT void speex_decode_stereo_int(spx_int16_t * data, int frame_size, SpeexStereoState * _stereo) { int i; spx_word32_t balance; spx_word16_t e_left, e_right, e_ratio; RealSpeexStereoState *stereo = (RealSpeexStereoState *) _stereo; COMPATIBILITY_HACK(stereo); balance = stereo->balance; e_ratio = stereo->e_ratio; /* These two are Q14, with max value just below 2. */ e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15 (e_ratio, ADD32(QCONST32(1., 16), balance)))); e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8); for (i = frame_size - 1; i >= 0; i--) { spx_int16_t tmp = data[i]; stereo->smooth_left = EXTRACT16(PSHR32 (MAC16_16 (MULT16_16 (stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15)); stereo->smooth_right = EXTRACT16(PSHR32 (MAC16_16 (MULT16_16 (stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15)); data[2 * i] = (spx_int16_t) MULT16_16_P14(stereo->smooth_left, tmp); data[2 * i + 1] = (spx_int16_t) MULT16_16_P14(stereo->smooth_right, tmp); } }
void _spx_autocorr( const spx_word16_t *x, /* in: [0...n-1] samples x */ spx_word16_t *ac, /* out: [0...lag-1] ac values */ int lag, int n ) { spx_word32_t d; int i, j; spx_word32_t ac0=1; int shift, ac_shift; for (j=0;j<n;j++) ac0 = ADD32(ac0,SHR32(MULT16_16(x[j],x[j]),8)); ac0 = ADD32(ac0,n); shift = 8; while (shift && ac0<0x40000000) { shift--; ac0 <<= 1; } ac_shift = 18; while (ac_shift && ac0<0x40000000) { ac_shift--; ac0 <<= 1; } for (i=0;i<lag;i++) { d=0; for (j=i;j<n;j++) { d = ADD32(d,SHR32(MULT16_16(x[j],x[j-i]), shift)); } ac[i] = SHR32(d, ac_shift); } }
spx_word16_t compute_rms(const spx_sig_t *x, int len) { int i; spx_word32_t sum=0; spx_sig_t max_val=1; int sig_shift; for (i=0;i<len;i++) { spx_sig_t tmp = x[i]; if (tmp<0) tmp = -tmp; if (tmp > max_val) max_val = tmp; } sig_shift=0; while (max_val>16383) { sig_shift++; max_val >>= 1; } for (i=0;i<len;i+=4) { spx_word32_t sum2=0; spx_word16_t tmp; tmp = SHR(x[i],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+1],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+2],sig_shift); sum2 += MULT16_16(tmp,tmp); tmp = SHR(x[i+3],sig_shift); sum2 += MULT16_16(tmp,tmp); sum += SHR(sum2,6); } return SHR(SHL((spx_word32_t)spx_sqrt(1+DIV32(sum,len)),(sig_shift+3)),SIG_SHIFT); }
/** Forced pitch delay and gain */ int forced_pitch_quant(spx_word16_t target[], /* Target vector */ spx_word16_t * sw, spx_coef_t ak[], /* LPCs for this subframe */ spx_coef_t awk1[], /* Weighted LPCs #1 for this subframe */ spx_coef_t awk2[], /* Weighted LPCs #2 for this subframe */ spx_sig_t exc[], /* Excitation */ const void *par, int start, /* Smallest pitch value allowed */ int end, /* Largest pitch value allowed */ spx_word16_t pitch_coef, /* Voicing (pitch) coefficient */ int p, /* Number of LPC coeffs */ int nsf, /* Number of samples in subframe */ SpeexBits * bits, char *stack, spx_word16_t * exc2, spx_word16_t * r, int complexity, int cdbk_offset, int plc_tuning, spx_word32_t * cumul_gain) { (void)sw; (void)par; (void)end; (void)bits; (void)r; (void)complexity; (void)cdbk_offset; (void)plc_tuning; (void)cumul_gain; int i; spx_word16_t res[nsf]; #ifdef FIXED_POINT if (pitch_coef > 63) pitch_coef = 63; #else if (pitch_coef > .99) pitch_coef = .99; #endif for (i = 0; i < nsf && i < start; i++) { exc[i] = MULT16_16(SHL16(pitch_coef, 7), exc2[i - start]); } for (; i < nsf; i++) { exc[i] = MULT16_32_Q15(SHL16(pitch_coef, 9), exc[i - start]); } for (i = 0; i < nsf; i++) res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT - 1)); syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack); for (i = 0; i < nsf; i++) target[i] = EXTRACT16(SATURATE (SUB32(EXTEND32(target[i]), EXTEND32(res[i])), 32700)); return start; }
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K) { int i, k, iter; celt_word16 c, s; celt_word16 gain, theta; celt_norm *Xptr; gain = celt_div((celt_word32)MULT16_16(Q15_ONE,len),(celt_word32)(3+len+6*K)); /* FIXME: Make that HALF16 instead of HALF32 */ theta = SUB16(Q15ONE, HALF32(MULT16_16_Q15(gain,gain))); /*if (len==30) { for (i=0;i<len;i++) X[i] = 0; X[14] = 1; }*/ c = celt_cos_norm(EXTEND32(theta)); s = dir*celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ if (len > 8*stride) stride *= len/(8*stride); iter = 1; for (k=0;k<iter;k++) { /* We could use MULT16_16_P15 instead of MULT16_16_Q15 for more accuracy, but at this point, I really don't think it's necessary */ Xptr = X; for (i=0;i<len-stride;i++) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1); *Xptr++ = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2); } Xptr = &X[len-2*stride-1]; for (i=len-2*stride-1;i>=0;i--) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1); *Xptr-- = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2); } } /*if (len==30) { for (i=0;i<len;i++) printf ("%f ", X[i]); printf ("\n"); exit(0); }*/ }
void _celt_autocorr( const opus_val16 *x, /* in: [0...n-1] samples x */ opus_val32 *ac, /* out: [0...lag-1] ac values */ const opus_val16 *window, int overlap, int lag, int n ) { opus_val32 d; int i; VARDECL(opus_val16, xx); SAVE_STACK; ALLOC(xx, n, opus_val16); celt_assert(n>0); celt_assert(overlap>=0); for (i=0;i<n;i++) xx[i] = x[i]; for (i=0;i<overlap;i++) { xx[i] = MULT16_16_Q15(x[i],window[i]); xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); } #ifdef FIXED_POINT { opus_val32 ac0=0; int shift; for(i=0;i<n;i++) ac0 += SHR32(MULT16_16(xx[i],xx[i]),9); ac0 += 1+n; shift = celt_ilog2(ac0)-30+10; shift = (shift+1)/2; for(i=0;i<n;i++) xx[i] = VSHR32(xx[i], shift); } #endif while (lag>=0) { for (i = lag, d = 0; i < n; i++) d += xx[i] * xx[i-lag]; ac[lag] = d; /*printf ("%f ", ac[lag]);*/ lag--; } /*printf ("\n");*/ ac[0] += 10; RESTORE_STACK; }
static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { int i; opus_val16 xx; /* Q11 */ /*double x, y;*/ opus_val16 dy, yy; /* Q14 */ /*x = 1.9073e-06*_x;*/ if (_x>=QCONST32(8,19)) return QCONST32(1.,14); if (_x<=-QCONST32(8,19)) return -QCONST32(1.,14); xx = EXTRACT16(SHR32(_x, 8)); /*i = lrint(25*x);*/ i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); /*x -= .04*i;*/ xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); /*x = xx*(1./2048);*/ /*y = tansig_table[250+i];*/ yy = tansig_table[250+i]; /*y = yy*(1./16384);*/ dy = 16384-MULT16_16_Q14(yy,yy); yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); return yy; }
static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, opus_val16 *out, int overlap, int channels, const opus_val16 *window, opus_int32 Fs) { int i, c; int inc = 48000/Fs; for (c=0; c<channels; c++) { for (i=0; i<overlap; i++) { opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]); out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]), Q15ONE-w, in1[i*channels+c]), 15); } } }
/* returns minimum mean square error */ spx_word32_t _spx_lpc( spx_coef_t *lpc, /* out: [0...p-1] LPC coefficients */ const spx_word16_t *ac, /* in: [0...p] autocorrelation values */ int p ) { int i, j; spx_word16_t r; spx_word16_t error = ac[0]; if (ac[0] == 0) { for (i = 0; i < p; i++) lpc[i] = 0; return 0; } for (i = 0; i < p; i++) { /* Sum up this iteration's reflection coefficient */ spx_word32_t rr = NEG32(SHL32(EXTEND32(ac[i + 1]),13)); for (j = 0; j < i; j++) rr = SUB32(rr,MULT16_16(lpc[j],ac[i - j])); #ifdef FIXED_POINT r = DIV32_16(rr+PSHR32(error,1),ADD16(error,8)); #else r = rr/(error+.003*ac[0]); #endif /* Update LPC coefficients and total error */ lpc[i] = r; for (j = 0; j < i>>1; j++) { spx_word16_t tmp = lpc[j]; lpc[j] = MAC16_16_P13(lpc[j],r,lpc[i-1-j]); lpc[i-1-j] = MAC16_16_P13(lpc[i-1-j],r,tmp); } if (i & 1) lpc[j] = MAC16_16_P13(lpc[j],lpc[j],r); error = SUB16(error,MULT16_16_Q13(r,MULT16_16_Q13(error,r))); } return error; }
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; celt_norm *Xptr; Xptr = X; for (i=0;i<len-stride;i++) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); } Xptr = &X[len-2*stride-1]; for (i=len-2*stride-1;i>=0;i--) { celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); } }
void celt_iir(const opus_val32 *x, const opus_val16 *den, opus_val32 *y, int N, int ord, opus_val16 *mem) { int i,j; for (i=0;i<N;i++) { opus_val32 sum = x[i]; for (j=0;j<ord;j++) { sum -= MULT16_16(den[j],mem[j]); } for (j=ord-1;j>=1;j--) { mem[j]=mem[j-1]; } mem[0] = ROUND16(sum,SIG_SHIFT); y[i] = sum; } }