void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) { int i,j,k; VARDECL(spx_word32_t *best_score); spx_word32_t e0; VARDECL(spx_word32_t *corr); VARDECL(spx_word32_t *energy); VARDECL(spx_word32_t *score); #ifdef FIXED_POINT VARDECL(spx_word16_t *swn2); #endif spx_word16_t *swn; ALLOC(best_score, N, spx_word32_t); ALLOC(corr, end-start+1, spx_word32_t); ALLOC(energy, end-start+2, spx_word32_t); ALLOC(score, end-start+1, spx_word32_t); #ifdef FIXED_POINT ALLOC(swn2, end+len, spx_word16_t); normalize16(sw-end, swn2, 16384, end+len); swn = swn2 + end; #else swn = sw; #endif for (i=0; i<N; i++) { best_score[i]=-1; pitch[i]=start; } energy[0]=inner_prod(swn-start, swn-start, len); e0=inner_prod(swn, swn, len); for (i=start; i<=end; i++) { /* Update energy for next pitch*/ energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(swn[-i-1],swn[-i-1]),6)), SHR32(MULT16_16(swn[-i+len-1],swn[-i+len-1]),6)); } pitch_xcorr(swn, swn-end, corr, len, end-start+1, stack); #ifdef FIXED_POINT { VARDECL(spx_word16_t *corr16); VARDECL(spx_word16_t *ener16); ALLOC(corr16, end-start+1, spx_word16_t); ALLOC(ener16, end-start+1, spx_word16_t); normalize16(corr, corr16, 16384, end-start+1); normalize16(energy, ener16, 16384, end-start+1); for (i=start; i<=end; i++) { spx_word16_t g; spx_word32_t tmp; tmp = corr16[i-start]; if (tmp>0) { if (SHR16(corr16[i-start],4)>ener16[i-start]) tmp = SHL32(EXTEND32(ener16[i-start]),14); else if (-SHR16(corr16[i-start],4)>ener16[i-start]) tmp = -SHL32(EXTEND32(ener16[i-start]),14); else tmp = SHL32(tmp,10); g = DIV32_16(tmp, 8+ener16[i-start]); score[i-start] = MULT16_16(corr16[i-start],g); } else { score[i-start] = 1; } } } #else for (i=start; i<=end; i++) { float g = corr[i-start]/(1+energy[i-start]); if (g>16) g = 16; else if (g<-16) g = -16; score[i-start] = g*corr[i-start]; } #endif /* Extract best scores */ for (i=start; i<=end; i++) { if (score[i-start]>best_score[N-1]) { for (j=0; j<N; j++) { if (score[i-start] > best_score[j]) { for (k=N-1; k>j; k--) { best_score[k]=best_score[k-1]; pitch[k]=pitch[k-1]; } best_score[j]=score[i-start]; pitch[j]=i; break; } } } } /* Compute open-loop gain */ if (gain) { for (j=0; j<N; j++) { spx_word16_t g; i=pitch[j]; g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6)); /* FIXME: g = max(g,corr/energy) */ if (g<0) g = 0; gain[j]=g; } } }
void comb_filter( spx_sig_t *exc, /*decoded excitation*/ spx_sig_t *new_exc, /*enhanced excitation*/ spx_coef_t *ak, /*LPC filter coefs*/ int p, /*LPC order*/ int nsf, /*sub-frame size*/ int pitch, /*pitch period*/ spx_word16_t *pitch_gain, /*pitch gain (3-tap)*/ spx_word16_t comb_gain, /*gain of comb filter*/ CombFilterMem *mem ) { int i; spx_word16_t exc_energy=0, new_exc_energy=0; spx_word16_t gain; spx_word16_t step; spx_word16_t fact; /*Compute excitation amplitude prior to enhancement*/ exc_energy = compute_rms(exc, nsf); /*for (i=0;i<nsf;i++) exc_energy+=((float)exc[i])*exc[i];*/ /*Some gain adjustment if pitch is too high or if unvoiced*/ #ifdef FIXED_POINT { spx_word16_t g = gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain); if (g > 166) comb_gain = MULT16_16_Q15(DIV32_16(SHL(165,15),g), comb_gain); if (g < 64) comb_gain = MULT16_16_Q15(SHL(g, 9), comb_gain); } #else { float g=0; g = GAIN_SCALING_1*.5*(gain_3tap_to_1tap(pitch_gain)+gain_3tap_to_1tap(mem->last_pitch_gain)); if (g>1.3) comb_gain*=1.3/g; if (g<.5) comb_gain*=2.*g; } #endif step = DIV32(COMB_STEP, nsf); fact=0; /*Apply pitch comb-filter (filter out noise between pitch harmonics)*/ for (i=0;i<nsf;i++) { spx_word32_t exc1, exc2; fact += step; exc1 = SHL(MULT16_32_Q15(SHL(pitch_gain[0],7),exc[i-pitch+1]) + MULT16_32_Q15(SHL(pitch_gain[1],7),exc[i-pitch]) + MULT16_32_Q15(SHL(pitch_gain[2],7),exc[i-pitch-1]) , 2); exc2 = SHL(MULT16_32_Q15(SHL(mem->last_pitch_gain[0],7),exc[i-mem->last_pitch+1]) + MULT16_32_Q15(SHL(mem->last_pitch_gain[1],7),exc[i-mem->last_pitch]) + MULT16_32_Q15(SHL(mem->last_pitch_gain[2],7),exc[i-mem->last_pitch-1]),2); new_exc[i] = exc[i] + MULT16_32_Q15(comb_gain,MULT16_32_Q15(fact,exc1) + MULT16_32_Q15(SUB16(COMB_STEP,fact), exc2)); } mem->last_pitch_gain[0] = pitch_gain[0]; mem->last_pitch_gain[1] = pitch_gain[1]; mem->last_pitch_gain[2] = pitch_gain[2]; mem->last_pitch = pitch; /*Amplitude after enhancement*/ new_exc_energy = compute_rms(new_exc, nsf); if (exc_energy > new_exc_energy) exc_energy = new_exc_energy; gain = DIV32_16(SHL(exc_energy,15),1+new_exc_energy); #ifdef FIXED_POINT if (gain < 16384) gain = 16384; #else if (gain < .5) gain=.5; #endif #ifdef FIXED_POINT for (i=0;i<nsf;i++) { mem->smooth_gain = MULT16_16_Q15(31457,mem->smooth_gain) + MULT16_16_Q15(1311,gain); new_exc[i] = MULT16_32_Q15(mem->smooth_gain, new_exc[i]); } #else for (i=0;i<nsf;i++) { mem->smooth_gain = .96*mem->smooth_gain + .04*gain; new_exc[i] *= mem->smooth_gain; } #endif }
EXPORT void speex_encode_stereo_int(spx_int16_t *data, int frame_size, SpeexBits *bits) { int i, tmp; spx_word32_t e_left=0, e_right=0, e_tot=0; spx_word32_t balance, e_ratio; spx_word32_t largest, smallest; int balance_id; #ifdef FIXED_POINT int shift; #endif /* In band marker */ speex_bits_pack(bits, 14, 5); /* Stereo marker */ speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); for (i=0;i<frame_size;i++) { e_left += SHR32(MULT16_16(data[2*i],data[2*i]),8); e_right += SHR32(MULT16_16(data[2*i+1],data[2*i+1]),8); #ifdef FIXED_POINT /* I think this is actually unbiased */ data[i] = SHR16(data[2*i],1)+PSHR16(data[2*i+1],1); #else data[i] = .5*(((float)data[2*i])+data[2*i+1]); #endif e_tot += SHR32(MULT16_16(data[i],data[i]),8); } if (e_left > e_right) { speex_bits_pack(bits, 0, 1); largest = e_left; smallest = e_right; } else { speex_bits_pack(bits, 1, 1); largest = e_right; smallest = e_left; } /* Balance quantization */ #ifdef FIXED_POINT shift = spx_ilog2(largest)-15; largest = VSHR32(largest, shift-4); smallest = VSHR32(smallest, shift); balance = DIV32(largest, ADD32(smallest, 1)); if (balance > 32767) balance = 32767; balance_id = scal_quant(EXTRACT16(balance), balance_bounds, 32); #else balance=(largest+1.)/(smallest+1.); balance=4*log(balance); balance_id=floor(.5+fabs(balance)); if (balance_id>30) balance_id=31; #endif speex_bits_pack(bits, balance_id, 5); /* "coherence" quantisation */ #ifdef FIXED_POINT shift = spx_ilog2(e_tot); e_tot = VSHR32(e_tot, shift-25); e_left = VSHR32(e_left, shift-10); e_right = VSHR32(e_right, shift-10); e_ratio = DIV32(e_tot, e_left+e_right+1); #else e_ratio = e_tot/(1.+e_left+e_right); #endif tmp=scal_quant(EXTRACT16(e_ratio), e_ratio_quant_bounds, 4); /*fprintf (stderr, "%d %d %d %d\n", largest, smallest, balance_id, e_ratio);*/ speex_bits_pack(bits, tmp, 2); }
void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) { int i,j,k; VARDECL(spx_word32_t *best_score); VARDECL(spx_word32_t *best_ener); spx_word32_t e0; VARDECL(spx_word32_t *corr); VARDECL(spx_word32_t *energy); ALLOC(best_score, N, spx_word32_t); ALLOC(best_ener, N, spx_word32_t); ALLOC(corr, end-start+1, spx_word32_t); ALLOC(energy, end-start+2, spx_word32_t); for (i=0;i<N;i++) { best_score[i]=-1; best_ener[i]=0; pitch[i]=start; } energy[0]=inner_prod(sw-start, sw-start, len); e0=inner_prod(sw, sw, len); for (i=start;i<end;i++) { /* Update energy for next pitch*/ energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6)); if (energy[i-start+1] < 0) energy[i-start+1] = 0; } pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); /* FIXME: Fixed-point and floating-point code should be merged */ #ifdef FIXED_POINT { VARDECL(spx_word16_t *corr16); VARDECL(spx_word16_t *ener16); ALLOC(corr16, end-start+1, spx_word16_t); ALLOC(ener16, end-start+1, spx_word16_t); /* Normalize to 180 so we can square it and it still fits in 16 bits */ normalize16(corr, corr16, 180, end-start+1); normalize16(energy, ener16, 180, end-start+1); for (i=start;i<=end;i++) { spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); /* Instead of dividing the tmp by the energy, we multiply on the other side */ if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) { /* We can safely put it last and then check */ best_score[N-1]=tmp; best_ener[N-1]=ener16[i-start]+1; pitch[N-1]=i; /* Check if it comes in front of others */ for (j=0;j<N-1;j++) { if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) { for (k=N-1;k>j;k--) { best_score[k]=best_score[k-1]; best_ener[k]=best_ener[k-1]; pitch[k]=pitch[k-1]; } best_score[j]=tmp; best_ener[j]=ener16[i-start]+1; pitch[j]=i; break; } } } } } #else for (i=start;i<=end;i++) { float tmp = corr[i-start]*corr[i-start]; if (tmp*best_ener[N-1]>best_score[N-1]*(1+energy[i-start])) { for (j=0;j<N;j++) { if (tmp*best_ener[j]>best_score[j]*(1+energy[i-start])) { for (k=N-1;k>j;k--) { best_score[k]=best_score[k-1]; best_ener[k]=best_ener[k-1]; pitch[k]=pitch[k-1]; } best_score[j]=tmp; best_ener[j]=energy[i-start]+1; pitch[j]=i; break; } } } } #endif /* Compute open-loop gain */ if (gain) { for (j=0;j<N;j++) { spx_word16_t g; i=pitch[j]; g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6)); /* FIXME: g = max(g,corr/energy) */ if (g<0) g = 0; gain[j]=g; } } }
EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) { int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); st->K = nb_speakers; st->C = nb_mic; C=st->C; K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); rFile = fopen("aec_rec.sw", "wb"); pFile = fopen("aec_play.sw", "wb"); oFile = fopen("aec_out.sw", "wb"); #endif st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; M = st->M = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->sum_adapt = 0; st->saturated = 0; st->screwed_up = 0; /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); #ifdef FIXED_POINT st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); for (i=0;i<N>>1;i++) { st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); st->window[N-i-1] = st->window[i]; } #else for (i=0;i<N;i++) st->window[i] = .5-.5*cos(2*M_PI*i/N); #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; for (i=0;i<N*M*K*C;i++) st->W[i] = 0; { spx_word32_t sum = 0; /* Ratio of ~10 between adaptation rate of first and last block */ spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); st->prop[0] = QCONST16(.7, 15); sum = EXTEND32(st->prop[0]); for (i=1;i<M;i++) { st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); sum = ADD32(sum, EXTEND32(st->prop[i])); } for (i=M-1;i>=0;i--) { st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); } } st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; #ifdef TWO_PATH st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; return st; }
void *sb_encoder_init(const SpeexMode *m) { int i; spx_int32_t tmp; SBEncState *st; const SpeexSBMode *mode; st = (SBEncState*)speex_alloc(sizeof(SBEncState)); if (!st) return NULL; st->mode = m; mode = (const SpeexSBMode*)m->mode; st->st_low = speex_encoder_init(mode->nb_mode); #if defined(VAR_ARRAYS) || defined (USE_ALLOCA) st->stack = NULL; #else /*st->stack = (char*)speex_alloc_scratch(SB_ENC_STACK);*/ speex_encoder_ctl(st->st_low, SPEEX_GET_STACK, &st->stack); #endif st->full_frame_size = 2*mode->frameSize; st->frame_size = mode->frameSize; st->subframeSize = mode->subframeSize; st->nbSubframes = mode->frameSize/mode->subframeSize; st->windowSize = st->frame_size+st->subframeSize; st->lpcSize=mode->lpcSize; st->encode_submode = 1; st->submodes=mode->submodes; st->submodeSelect = st->submodeID=mode->defaultSubmode; tmp=9; speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &tmp); tmp=1; speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &tmp); st->lpc_floor = mode->lpc_floor; st->gamma1=mode->gamma1; st->gamma2=mode->gamma2; st->first=1; st->high=(spx_word16_t*)speex_alloc((st->windowSize-st->frame_size)*sizeof(spx_word16_t)); st->h0_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->h1_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->window= lpc_window; st->lagWindow = lag_window; st->old_lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); st->innov_rms_save = NULL; st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); for (i=0;i<st->lpcSize;i++) st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1); #ifndef DISABLE_VBR st->vbr_quality = 8; st->vbr_enabled = 0; st->vbr_max = 0; st->vbr_max_high = 20000; /* We just need a big value here */ st->vad_enabled = 0; st->abr_enabled = 0; st->relative_quality=0; #endif /* #ifndef DISABLE_VBR */ st->complexity=2; speex_encoder_ctl(st->st_low, SPEEX_GET_SAMPLING_RATE, &st->sampling_rate); st->sampling_rate*=2; #ifdef ENABLE_VALGRIND VALGRIND_MAKE_READABLE(st, (st->stack-(char*)st)); #endif return st; }
void postFilter(bcg729DecoderChannelContextStruct *decoderChannelContext, word16_t *LPCoefficients, word16_t *reconstructedSpeech, int16_t intPitchDelay, int subframeIndex, word16_t *postFilteredSignal) { int i,j; /********************************************************************/ /* Long Term Post Filter */ /********************************************************************/ /*** Compute LPGammaN and LPGammaD coefficients : LPGamma[0] = LP[0]*Gamma^(i+1) (i=0..9) ***/ word16_t LPGammaNCoefficients[NB_LSP_COEFF]; /* in Q12 */ /* GAMMA_XX constants are in Q15 */ LPGammaNCoefficients[0] = MULT16_16_P15(LPCoefficients[0], GAMMA_N1); LPGammaNCoefficients[1] = MULT16_16_P15(LPCoefficients[1], GAMMA_N2); LPGammaNCoefficients[2] = MULT16_16_P15(LPCoefficients[2], GAMMA_N3); LPGammaNCoefficients[3] = MULT16_16_P15(LPCoefficients[3], GAMMA_N4); LPGammaNCoefficients[4] = MULT16_16_P15(LPCoefficients[4], GAMMA_N5); LPGammaNCoefficients[5] = MULT16_16_P15(LPCoefficients[5], GAMMA_N6); LPGammaNCoefficients[6] = MULT16_16_P15(LPCoefficients[6], GAMMA_N7); LPGammaNCoefficients[7] = MULT16_16_P15(LPCoefficients[7], GAMMA_N8); LPGammaNCoefficients[8] = MULT16_16_P15(LPCoefficients[8], GAMMA_N9); LPGammaNCoefficients[9] = MULT16_16_P15(LPCoefficients[9], GAMMA_N10); /*** Compute the residual signal as described in spec 4.2.1 eq79 ***/ /* Compute also a scaled residual signal: shift right by 2 to avoid overflows on 32 bits when computing correlation and energy */ /* pointers to current subframe beginning */ word16_t *residualSignal = &(decoderChannelContext->residualSignalBuffer[MAXIMUM_INT_PITCH_DELAY+subframeIndex]); word16_t *scaledResidualSignal = &(decoderChannelContext->scaledResidualSignalBuffer[MAXIMUM_INT_PITCH_DELAY+subframeIndex]); for (i=0; i<L_SUBFRAME; i++) { word32_t acc = SHL((word32_t)reconstructedSpeech[i], 12); /* reconstructedSpeech in Q0 shifted to set acc in Q12 */ for (j=0; j<NB_LSP_COEFF; j++) { acc = MAC16_16(acc, LPGammaNCoefficients[j],reconstructedSpeech[i-j-1]); /* LPGammaNCoefficients in Q12, reconstructedSpeech in Q0 -> acc in Q12 */ } residualSignal[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* shift back acc to Q0 and saturate it to avoid overflow when going back to 16 bits */ scaledResidualSignal[i] = PSHR(residualSignal[i], 2); /* shift acc to Q-2 and saturate it to get the scaled version of the signal */ } /*** Compute the maximum correlation on scaledResidualSignal delayed by intPitchDelay +/- 3 to get the best delay. Spec 4.2.1 eq80 ***/ /* using a scaled(Q-2) signals gives correlation in Q-4. */ word32_t correlationMax = (word32_t)MININT32; int16_t intPitchDelayMax = intPitchDelay+3; /* intPitchDelayMax shall be < MAXIMUM_INT_PITCH_DELAY(143) */ int16_t bestIntPitchDelay = 0; word16_t *delayedResidualSignal; if (intPitchDelayMax>MAXIMUM_INT_PITCH_DELAY) { intPitchDelayMax = MAXIMUM_INT_PITCH_DELAY; } for (i=intPitchDelay-3; i<=intPitchDelayMax; i++) { word32_t correlation = 0; delayedResidualSignal = &(scaledResidualSignal[-i]); /* delayedResidualSignal points to scaledResidualSignal[-i] */ /* compute correlation: ∑r(n)*rk(n) */ for (j=0; j<L_SUBFRAME; j++) { correlation = MAC16_16(correlation, delayedResidualSignal[j], scaledResidualSignal[j]); } /* if we have a maximum correlation */ if (correlation>correlationMax) { correlationMax = correlation; bestIntPitchDelay = i; /* get the intPitchDelay */ } } /* saturate correlation to a positive integer */ if (correlationMax<0) { correlationMax = 0; } /*** Compute the signal energy ∑r(n)*r(n) and delayed signal energy ∑rk(n)*rk(n) which shall be used to compute gl spec 4.2.1 eq81, eq 82 and eq83 ***/ word32_t residualSignalEnergy = 0; /* in Q-4 */ word32_t delayedResidualSignalEnergy = 0; /* in Q-4 */ delayedResidualSignal = &(scaledResidualSignal[-bestIntPitchDelay]); /* in Q-2, points to the residual signal delayed to give the higher correlation: rk(n) */ for (i=0; i<L_SUBFRAME; i++) { residualSignalEnergy = MAC16_16(residualSignalEnergy, scaledResidualSignal[i], scaledResidualSignal[i]); delayedResidualSignalEnergy = MAC16_16(delayedResidualSignalEnergy, delayedResidualSignal[i], delayedResidualSignal[i]); } /*** Scale correlationMax, residualSignalEnergy and delayedResidualSignalEnergy to the best fit on 16 bits ***/ /* these variables must fit on 16bits for the following computation, to avoid loosing information, scale them */ /* at best fit: scale the higher of three to get the value over 2^14 and shift the other two from the same amount */ /* Note: all three value are >= 0 */ word32_t maximumThree = correlationMax; if (maximumThree<residualSignalEnergy) { maximumThree = residualSignalEnergy; } if (maximumThree<delayedResidualSignalEnergy) { maximumThree = delayedResidualSignalEnergy; } int16_t leadingZeros = 0; word16_t correlationMaxWord16 = 0; word16_t residualSignalEnergyWord16 = 0; word16_t delayedResidualSignalEnergyWord16 = 0; if (maximumThree>0) { /* if all of them a null, just do nothing otherwise shift right to get the max number in range [0x4000,0x8000[ */ leadingZeros = countLeadingZeros(maximumThree); if (leadingZeros<16) { correlationMaxWord16 = (word16_t)SHR32(correlationMax, 16-leadingZeros); residualSignalEnergyWord16 = (word16_t)SHR32(residualSignalEnergy, 16-leadingZeros); delayedResidualSignalEnergyWord16 = (word16_t)SHR32(delayedResidualSignalEnergy, 16-leadingZeros); } else { /* if the values already fit on 16 bits, no need to shift */ correlationMaxWord16 = (word16_t)correlationMax; residualSignalEnergyWord16 = (word16_t)residualSignalEnergy; delayedResidualSignalEnergyWord16 = (word16_t)delayedResidualSignalEnergy; } } /* eq78: Hp(z)=(1 + γp*gl*z(−T))/(1 + γp*gl) -> (with g=γp*gl) Hp(z)=1/(1+g) + (g/(1+g))*z(-T) = g0 + g1*z(-T) */ /* g = gl/2 (as γp=0.5)= (eq83) correlationMax/(2*delayedResidualSignalEnergy) */ /* compute g0 = 1/(1+g)= delayedResidualSignalEnergy/(delayedResidualSignalEnergy+correlationMax/2) = 1-g1*/ /* compute g1 = g/(1+g) = correlationMax/(2*delayedResidualSignalEnergy+correlationMax) = 1-g0 */ /*** eq82 -> (correlationMax^2)/(residualSignalEnergy*delayedResidualSignalEnergy)<0.5 ***/ /* (correlationMax^2) < (residualSignalEnergy*delayedResidualSignalEnergy)*0.5 */ if ((MULT16_16(correlationMaxWord16, correlationMaxWord16) < SHR(MULT16_16(residualSignalEnergyWord16, delayedResidualSignalEnergyWord16), 1)) /* eq82 */ || ((correlationMaxWord16==0) && (delayedResidualSignalEnergyWord16==0))) { /* correlationMax and delayedResidualSignalEnergy values are 0 -> unable to compute g0 and g1 -> disable filter */ /* long term post filter disabled */ for (i=0; i<L_SUBFRAME; i++) { decoderChannelContext->longTermFilteredResidualSignal[i] = residualSignal[i]; } } else { /* eq82 gives long term filter enabled, */ word16_t g0, g1; /* eq83: gl = correlationMax/delayedResidualSignalEnergy bounded in ]0,1] */ /* check if gl > 1 -> gl=1 -> g=1/2 -> g0=2/3 and g1=1/3 */ if (correlationMax > delayedResidualSignalEnergy) { g0 = 21845; /* 2/3 in Q15 */ g1 = 10923; /* 1/3 in Q15 */ } else { /* g1 = correlationMax/(2*delayedResidualSignalEnergy+correlationMax) */ g1 = DIV32((word32_t)SHL32(correlationMaxWord16,15),(word32_t)ADD32(SHL32(delayedResidualSignalEnergyWord16,1), correlationMaxWord16)); /* g1 in Q15 */ g0 = SUB16(32767, g1); /* g0 = 1 - g1 in Q15 */ } /* longTermFilteredResidualSignal[i] = g0*residualSignal[i] + g1*delayedResidualSignal[i]*/ delayedResidualSignal = &(residualSignal[-bestIntPitchDelay]); for (i=0; i<L_SUBFRAME; i++) { decoderChannelContext->longTermFilteredResidualSignal[i] = (word16_t)SATURATE(PSHR(ADD32(MULT16_16(g0, residualSignal[i]), MULT16_16(g1, delayedResidualSignal[i])), 15), MAXINT16); } } /********************************************************************/ /* Tilt Compensation Filter */ /********************************************************************/ /* compute hf the truncated (to 22 coefficients) impulse response of the filter A(z/γn)/A(z/γd) described in spec 4.2.2 eq84 */ /* hf(i) = LPGammaNCoeff[i] - ∑[j:0..9]LPGammaDCoeff[j]*hf[i-j-1]) */ word16_t LPGammaDCoefficients[NB_LSP_COEFF]; /* in Q12 */ /* GAMMA_XX constants are in Q15 */ LPGammaDCoefficients[0] = MULT16_16_P15(LPCoefficients[0], GAMMA_D1); LPGammaDCoefficients[1] = MULT16_16_P15(LPCoefficients[1], GAMMA_D2); LPGammaDCoefficients[2] = MULT16_16_P15(LPCoefficients[2], GAMMA_D3); LPGammaDCoefficients[3] = MULT16_16_P15(LPCoefficients[3], GAMMA_D4); LPGammaDCoefficients[4] = MULT16_16_P15(LPCoefficients[4], GAMMA_D5); LPGammaDCoefficients[5] = MULT16_16_P15(LPCoefficients[5], GAMMA_D6); LPGammaDCoefficients[6] = MULT16_16_P15(LPCoefficients[6], GAMMA_D7); LPGammaDCoefficients[7] = MULT16_16_P15(LPCoefficients[7], GAMMA_D8); LPGammaDCoefficients[8] = MULT16_16_P15(LPCoefficients[8], GAMMA_D9); LPGammaDCoefficients[9] = MULT16_16_P15(LPCoefficients[9], GAMMA_D10); word16_t hf[22]; /* the truncated impulse response to short term filter Hf in Q12 */ hf[0] = 4096; /* 1 in Q12 as LPGammaNCoefficients and LPGammaDCoefficient doesn't contain the first element which is 1 and past values of hf are 0 */ for (i=1; i<11; i++) { word32_t acc = (word32_t)SHL(LPGammaNCoefficients[i-1],12); /* LPGammaNCoefficients in Q12 -> acc in Q24 */ for (j=0; j<NB_LSP_COEFF && j<i; j++) { /* j<i to avoid access to negative index of hf(past values are 0 anyway) */ acc = MSU16_16(acc, LPGammaDCoefficients[j], hf[i-j-1]); /* LPGammaDCoefficient in Q12, hf in Q12 -> Q24 TODO: Possible overflow?? */ } hf[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* get result back in Q12 and saturate on 16 bits */ } for (i=11; i<22; i++) { word32_t acc = 0; for (j=0; j<NB_LSP_COEFF; j++) { /* j<i to avoid access to negative index of hf(past values are 0 anyway) */ acc = MSU16_16(acc, LPGammaDCoefficients[j], hf[i-j-1]); /* LPGammaDCoefficient in Q12, hf in Q12 -> Q24 TODO: Possible overflow?? */ } hf[i] = (word16_t)SATURATE(PSHR(acc, 12), MAXINT16); /* get result back in Q12 and saturate on 16 bits */ } /* hf is then used to compute k'1 spec 4.2.3 eq87: k'1 = -rh1/rh0 */ /* rh0 = ∑[i:0..21]hf[i]*hf[i] */ /* rh1 = ∑[i:0..20]hf[i]*hf[i+1] */ word32_t rh1 = MULT16_16(hf[0], hf[1]); for (i=1; i<21; i++) { rh1 = MAC16_16(rh1, hf[i], hf[i+1]); /* rh1 in Q24 */ } /* tiltCompensationGain is set to 0 if k'1>0 -> rh1<0 (as rh0 is always>0) */ word16_t tiltCompensatedSignal[L_SUBFRAME]; /* in Q0 */ if (rh1<0) { /* tiltCompensationGain = 0 -> no gain filter is off, just copy the input */ memcpy(tiltCompensatedSignal, decoderChannelContext->longTermFilteredResidualSignal, L_SUBFRAME*sizeof(word16_t)); } else { /*compute tiltCompensationGain = k'1*γt */ word32_t rh0 = MULT16_16(hf[0], hf[0]); for (i=1; i<22; i++) { rh0 = MAC16_16(rh0, hf[i], hf[i]); /* rh0 in Q24 */ } rh1 = MULT16_32_Q15(GAMMA_T, rh1); /* GAMMA_T in Q15, rh1 in Q24*/ word16_t tiltCompensationGain = (word16_t)SATURATE((word32_t)(DIV32(rh1,PSHR(rh0,12))), MAXINT16); /* rh1 in Q24, PSHR(rh0,12) in Q12 -> tiltCompensationGain in Q12 */ /* compute filter Ht (spec A.4.2.3 eqA14) = 1 + gain*z(-1) */ for (i=0; i<L_SUBFRAME; i++) { tiltCompensatedSignal[i] = MSU16_16_Q12(decoderChannelContext->longTermFilteredResidualSignal[i], tiltCompensationGain, decoderChannelContext->longTermFilteredResidualSignal[i-1]); } } /* update memory word of longTermFilteredResidualSignal for next subframe */ decoderChannelContext->longTermFilteredResidualSignal[-1] = decoderChannelContext->longTermFilteredResidualSignal[L_SUBFRAME-1]; /********************************************************************/ /* synthesis filter 1/[Â(z /γd)] spec A.4.2.2 */ /* */ /* Note: Â(z/γn) was done before when computing residual signal */ /********************************************************************/ /* shortTermFilteredResidualSignal is accessed in range [-NB_LSP_COEFF,L_SUBFRAME[ */ synthesisFilter(tiltCompensatedSignal, LPGammaDCoefficients, decoderChannelContext->shortTermFilteredResidualSignal); /* get the last NB_LSP_COEFF of shortTermFilteredResidualSignal and set them as memory for next subframe(they do not overlap so use memcpy) */ memcpy(decoderChannelContext->shortTermFilteredResidualSignalBuffer, &(decoderChannelContext->shortTermFilteredResidualSignalBuffer[L_SUBFRAME]), NB_LSP_COEFF*sizeof(word16_t)); /********************************************************************/ /* Adaptive Gain Control spec A.4.2.4 */ /* */ /********************************************************************/ /*** compute G(gain scaling factor) according to eqA15 : G = Sqrt((∑s(n)^2)/∑sf(n)^2 ) ***/ word16_t gainScalingFactor; /* in Q12 */ /* compute ∑sf(n)^2 scale the signal shifting left by 2 to avoid overflow on 32 bits sum */ word32_t shortTermFilteredResidualSignalSquareSum = 0; for (i=0; i<L_SUBFRAME; i++) { shortTermFilteredResidualSignalSquareSum = MAC16_16_Q4(shortTermFilteredResidualSignalSquareSum, decoderChannelContext->shortTermFilteredResidualSignal[i], decoderChannelContext->shortTermFilteredResidualSignal[i]); } /* if the sum is null we can't compute gain -> output of postfiltering is the output of shortTermFilter and previousAdaptativeGain is set to 0 */ /* the reset of previousAdaptativeGain is not mentionned in the spec but in ITU code only */ if (shortTermFilteredResidualSignalSquareSum == 0) { decoderChannelContext->previousAdaptativeGain = 0; for (i=0; i<L_SUBFRAME; i++) { postFilteredSignal[i] = decoderChannelContext->shortTermFilteredResidualSignal[i]; } } else { /* we can compute adaptativeGain and output signal */ /* compute ∑s(n)^2 scale the signal shifting left by 2 to avoid overflow on 32 bits sum */ word32_t reconstructedSpeechSquareSum = 0; for (i=0; i<L_SUBFRAME; i++) { reconstructedSpeechSquareSum = MAC16_16_Q4(reconstructedSpeechSquareSum, reconstructedSpeech[i], reconstructedSpeech[i]); } if (reconstructedSpeechSquareSum==0) { /* numerator is null -> current gain is null */ gainScalingFactor = 0; } else { /* Compute ∑s(n)^2)/∑sf(n)^2 result shall be in Q10 */ /* normalise the numerator on 32 bits */ word16_t numeratorShift = countLeadingZeros(reconstructedSpeechSquareSum); reconstructedSpeechSquareSum = SHL(reconstructedSpeechSquareSum, numeratorShift); /* reconstructedSpeechSquareSum*2^numeratorShift */ /* normalise denominator to get the result directly in Q10 if possible */ word32_t fractionResult; /* stores ∑s(n)^2)/∑sf(n)^2 */ word32_t scaledShortTermFilteredResidualSignalSquareSum = VSHR32(shortTermFilteredResidualSignalSquareSum, 10-numeratorShift); /* shortTermFilteredResidualSignalSquareSum*2^(numeratorShift-10)*/ if (scaledShortTermFilteredResidualSignalSquareSum==0) {/* shift might have sent to zero the denominator */ fractionResult = DIV32(reconstructedSpeechSquareSum, shortTermFilteredResidualSignalSquareSum); /* result in QnumeratorShift */ fractionResult = VSHR32(fractionResult, numeratorShift-10); /* result in Q10 */ } else { /* ok denominator is still > 0 */ fractionResult = DIV32(reconstructedSpeechSquareSum, scaledShortTermFilteredResidualSignalSquareSum); /* result in Q10 */ } /* now compute current Gain = Sqrt((∑s(n)^2)/∑sf(n)^2 ) */ /* g729Sqrt_Q0Q7(Q0)->Q7, by giving a Q10 as input, output is in Q12 */ gainScalingFactor = (word16_t)SATURATE(g729Sqrt_Q0Q7(fractionResult), MAXINT16); /* multiply by 0.1 as described in spec A.4.2.4 */ gainScalingFactor = MULT16_16_P15(gainScalingFactor, 3277); /* in Q12, 3277 = 0.1 in Q15*/ } /* Compute the signal according to eq89 (spec 4.2.4 and section A4.2.4) */ /* currentGain = 0.9*previousGain + 0.1*gainScalingFactor the 0.1 factor has already been integrated in the variable gainScalingFactor */ /* outputsignal = currentGain*shortTermFilteredResidualSignal */ word16_t currentAdaptativeGain = decoderChannelContext->previousAdaptativeGain; for (i=0; i<L_SUBFRAME; i++) { currentAdaptativeGain = ADD16(gainScalingFactor, MULT16_16_P15(currentAdaptativeGain, 29491)); /* 29492 = 0.9 in Q15, result in Q12 */ postFilteredSignal[i] = MULT16_16_Q12(currentAdaptativeGain, decoderChannelContext->shortTermFilteredResidualSignal[i]); } decoderChannelContext->previousAdaptativeGain = currentAdaptativeGain; } /* shift buffers if needed */ if (subframeIndex>0) { /* only after 2nd subframe treatment */ /* shift left by L_FRAME the residualSignal and scaledResidualSignal buffers */ memmove(decoderChannelContext->residualSignalBuffer, &(decoderChannelContext->residualSignalBuffer[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t)); memmove(decoderChannelContext->scaledResidualSignalBuffer, &(decoderChannelContext->scaledResidualSignalBuffer[L_FRAME]), MAXIMUM_INT_PITCH_DELAY*sizeof(word16_t)); } return; }
void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) { int i,j,k; VARDECL(spx_word32_t *best_score); VARDECL(spx_word32_t *best_ener); spx_word32_t e0; VARDECL(spx_word32_t *corr); #ifdef FIXED_POINT /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16) arrays for (normalized) 16-bit values */ VARDECL(spx_word16_t *corr16); VARDECL(spx_word16_t *ener16); spx_word32_t *energy; int cshift=0, eshift=0; int scaledown = 0; ALLOC(corr16, end-start+1, spx_word16_t); ALLOC(ener16, end-start+1, spx_word16_t); ALLOC(corr, end-start+1, spx_word32_t); energy = corr; #else /* In floating-point, we need to float arrays and no normalized copies */ VARDECL(spx_word32_t *energy); spx_word16_t *corr16; spx_word16_t *ener16; ALLOC(energy, end-start+2, spx_word32_t); ALLOC(corr, end-start+1, spx_word32_t); corr16 = corr; ener16 = energy; #endif ALLOC(best_score, N, spx_word32_t); ALLOC(best_ener, N, spx_word32_t); for (i=0;i<N;i++) { best_score[i]=-1; best_ener[i]=0; pitch[i]=start; } #ifdef FIXED_POINT for (i=-end;i<len;i++) { if (ABS16(sw[i])>16383) { scaledown=1; break; } } /* If the weighted input is close to saturation, then we scale it down */ if (scaledown) { for (i=-end;i<len;i++) { sw[i]=SHR16(sw[i],1); } } #endif energy[0]=inner_prod(sw-start, sw-start, len); e0=inner_prod(sw, sw, len); for (i=start;i<end;i++) { /* Update energy for next pitch*/ energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6)); if (energy[i-start+1] < 0) energy[i-start+1] = 0; } #ifdef FIXED_POINT eshift = normalize16(energy, ener16, 32766, end-start+1); #endif /* In fixed-point, this actually overrites the energy array (aliased to corr) */ pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); #ifdef FIXED_POINT /* Normalize to 180 so we can square it and it still fits in 16 bits */ cshift = normalize16(corr, corr16, 180, end-start+1); /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */ if (scaledown) { for (i=-end;i<len;i++) { sw[i]=SHL16(sw[i],1); } } #endif /* Search for the best pitch prediction gain */ for (i=start;i<=end;i++) { spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); /* Instead of dividing the tmp by the energy, we multiply on the other side */ if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) { /* We can safely put it last and then check */ best_score[N-1]=tmp; best_ener[N-1]=ener16[i-start]+1; pitch[N-1]=i; /* Check if it comes in front of others */ for (j=0;j<N-1;j++) { if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) { for (k=N-1;k>j;k--) { best_score[k]=best_score[k-1]; best_ener[k]=best_ener[k-1]; pitch[k]=pitch[k-1]; } best_score[j]=tmp; best_ener[j]=ener16[i-start]+1; pitch[j]=i; break; } } } } /* Compute open-loop gain if necessary */ if (gain) { for (j=0;j<N;j++) { spx_word16_t g; i=pitch[j]; g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6)); /* FIXME: g = max(g,corr/energy) */ if (g<0) g = 0; gain[j]=g; } } }