static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) { int i, j, p; spx_word16_t max_sum = 1; spx_word32_t prop_sum = 1; for (i=0;i<M;i++) { spx_word32_t tmp = 1; for (p=0;p<P;p++) for (j=0;j<N;j++) tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); #ifdef FIXED_POINT /* Just a security in case an overflow were to occur */ tmp = MIN32(ABS32(tmp), 536870912); #endif prop[i] = spx_sqrt(tmp); if (prop[i] > max_sum) max_sum = prop[i]; } for (i=0;i<M;i++) { prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); prop_sum += EXTEND32(prop[i]); } for (i=0;i<M;i++) { prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); /*printf ("%f ", prop[i]);*/ } /*printf ("\n");*/ }
EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { switch(request) { case SPEEX_ECHO_GET_FRAME_SIZE: (*(int*)ptr) = st->frame_size; break; case SPEEX_ECHO_SET_SAMPLING_RATE: st->sampling_rate = (*(int*)ptr); st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); break; case SPEEX_ECHO_GET_SAMPLING_RATE: (*(int*)ptr) = st->sampling_rate; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: /*FIXME: Implement this for multiple channels */ *((spx_int32_t *)ptr) = st->M * st->frame_size; break; case SPEEX_ECHO_GET_IMPULSE_RESPONSE: { int M = st->M, N = st->window_size, n = st->frame_size, i, j; spx_int32_t *filt = (spx_int32_t *) ptr; for(j=0;j<M;j++) { /*FIXME: Implement this for multiple channels */ #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); #else spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); #endif for(i=0;i<n;i++) filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); } } break; default: speex_warning_int("Unknown speex_echo_ctl request: ", request); return -1; } return 0; }
/* Computes a rough approximation of log2(2^a + 2^b) */ static opus_val16 logSum(opus_val16 a, opus_val16 b) { opus_val16 max; opus_val32 diff; opus_val16 frac; static const opus_val16 diff_table[17] = { QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), QCONST16(0.0028123f, DB_SHIFT) }; int low; if (a>b) { max = a; diff = SUB32(EXTEND32(a),EXTEND32(b)); } else { max = b; diff = SUB32(EXTEND32(b),EXTEND32(a)); } if (!(diff < QCONST16(8.f, DB_SHIFT))) /* inverted to catch NaNs */ return max; #ifdef FIXED_POINT low = SHR32(diff, DB_SHIFT-1); frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); #else low = (int)floor(2*diff); frac = 2*diff - low; #endif return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); }
static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) { /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation but I know it's MMSE-optimal on a sinc */ spx_word16_t x2, x3; x2 = MULT16_16_P15(x, x); x3 = MULT16_16_P15(x, x2); interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); /* Just to make sure we don't have rounding problems */ interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; if (interp[2]<32767) interp[2]+=1; }
void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState *_stereo) { int i; spx_word32_t balance; spx_word16_t e_left, e_right, e_ratio; RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo; /* COMPATIBILITY_HACK(stereo); */ balance=stereo->balance; e_ratio=stereo->e_ratio; /* These two are Q14, with max value just below 2. */ e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance)))); e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8); for (i=frame_size-1;i>=0;i--) { spx_int16_t tmp=data[i]; stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15)); stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15)); data[2*i] = (spx_int16_t)MULT16_16_P14(stereo->smooth_left, tmp); data[2*i+1] = (spx_int16_t)MULT16_16_P14(stereo->smooth_right, tmp); } }
static opus_val32 silk_resampler_down2_hp( opus_val32 *S, /* I/O State vector [ 2 ] */ opus_val32 *out, /* O Output signal [ floor(len/2) ] */ const opus_val32 *in, /* I Input signal [ len ] */ int inLen /* I Number of input samples */ ) { int k, len2 = inLen/2; opus_val32 in32, out32, out32_hp, Y, X; opus_val64 hp_ener = 0; /* Internal variables and state are in Q10 format */ for( k = 0; k < len2; k++ ) { /* Convert to Q10 */ in32 = in[ 2 * k ]; /* All-pass section for even input sample */ Y = SUB32( in32, S[ 0 ] ); X = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y); out32 = ADD32( S[ 0 ], X ); S[ 0 ] = ADD32( in32, X ); out32_hp = out32; /* Convert to Q10 */ in32 = in[ 2 * k + 1 ]; /* All-pass section for odd input sample, and add to output of previous section */ Y = SUB32( in32, S[ 1 ] ); X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y); out32 = ADD32( out32, S[ 1 ] ); out32 = ADD32( out32, X ); S[ 1 ] = ADD32( in32, X ); Y = SUB32( -in32, S[ 2 ] ); X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y); out32_hp = ADD32( out32_hp, S[ 2 ] ); out32_hp = ADD32( out32_hp, X ); S[ 2 ] = ADD32( -in32, X ); hp_ener += out32_hp*(opus_val64)out32_hp; /* Add, convert back to int16 and store to output */ out[ k ] = HALF32(out32); } #ifdef FIXED_POINT /* len2 can be up to 480, so we shift by 8 more to make it fit. */ hp_ener = hp_ener >> (2*SIG_SHIFT + 8); #endif return (opus_val32)hp_ener; }
static void kf_bfly2( kiss_fft_cpx * Fout, int m, int N ) { kiss_fft_cpx * Fout2; int i; (void)m; #ifdef CUSTOM_MODES if (m==1) { celt_assert(m==1); for (i=0;i<N;i++) { kiss_fft_cpx t; Fout2 = Fout + 1; t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); Fout += 2; } } else #endif { opus_val16 tw; tw = QCONST16(0.7071067812f, 15); /* We know that m==4 here because the radix-2 is just after a radix-4 */ celt_assert(m==4); for (i=0;i<N;i++) { kiss_fft_cpx t; Fout2 = Fout + 4; t = Fout2[0]; C_SUB( Fout2[0] , Fout[0] , t ); C_ADDTO( Fout[0] , t ); t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); C_SUB( Fout2[1] , Fout[1] , t ); C_ADDTO( Fout[1] , t ); t.r = Fout2[2].i; t.i = -Fout2[2].r; C_SUB( Fout2[2] , Fout[2] , t ); C_ADDTO( Fout[2] , t ); t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); C_SUB( Fout2[3] , Fout[3] , t ); C_ADDTO( Fout[3] , t ); Fout += 8; } } }
CELTEncoder *celt_encoder_create(const CELTMode *mode) { int N, C; CELTEncoder *st; if (check_mode(mode) != CELT_OK) return NULL; N = mode->mdctSize; C = mode->nbChannels; st = celt_alloc(sizeof(CELTEncoder)); if (st==NULL) return NULL; st->marker = ENCODERPARTIAL; st->mode = mode; st->frame_size = N; st->block_size = N; st->overlap = mode->overlap; st->VBR_rate = 0; st->pitch_enabled = 1; st->pitch_permitted = 1; st->pitch_available = 1; st->force_intra = 0; st->delayedIntra = 1; st->tonal_average = QCONST16(1.,8); st->fold_decision = 1; st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig_t)); st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig_t)); st->oldBandE = (celt_word16_t*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16_t)); st->preemph_memE = (celt_word16_t*)celt_alloc(C*sizeof(celt_word16_t)); st->preemph_memD = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t)); #ifdef EXP_PSY st->psy_mem = celt_alloc(MAX_PERIOD*sizeof(celt_word16_t)); psydecay_init(&st->psy, MAX_PERIOD/2, st->mode->Fs); #endif if ((st->in_mem!=NULL) && (st->out_mem!=NULL) && (st->oldBandE!=NULL) #ifdef EXP_PSY && (st->psy_mem!=NULL) #endif && (st->preemph_memE!=NULL) && (st->preemph_memD!=NULL)) { st->marker = ENCODERVALID; return st; } /* If the setup fails for some reason deallocate it. */ celt_encoder_destroy(st); return NULL; }
static int transient_analysis(celt_word32_t *in, int len, int C, int *transient_time, int *transient_shift) { int c, i, n; celt_word32_t ratio; VARDECL(celt_word32_t, begin); SAVE_STACK; ALLOC(begin, len, celt_word32_t); for (i=0;i<len;i++) begin[i] = ABS32(SHR32(in[C*i],SIG_SHIFT)); for (c=1;c<C;c++) { for (i=0;i<len;i++) begin[i] = MAX32(begin[i], ABS32(SHR32(in[C*i+c],SIG_SHIFT))); } for (i=1;i<len;i++) begin[i] = MAX32(begin[i-1],begin[i]); n = -1; for (i=8;i<len-8;i++) { if (begin[i] < MULT16_32_Q15(QCONST16(.2f,15),begin[len-1])) n=i; } if (n<32) { n = -1; ratio = 0; } else { ratio = DIV32(begin[len-1],1+begin[n-16]); } if (ratio < 0) ratio = 0; if (ratio > 1000) ratio = 1000; ratio *= ratio; if (ratio > 2048) *transient_shift = 3; else *transient_shift = 0; *transient_time = n; RESTORE_STACK; return ratio > 20; }
static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) { int i; spx_word16_t den2; #ifdef FIXED_POINT den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); #else den2 = radius*radius + .7*(1-radius)*(1-radius); #endif /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ for (i=0;i<len;i++) { spx_word16_t vin = in[i*stride]; spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); #ifdef FIXED_POINT mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); #else mem[0] = mem[1] + 2*(-vin + radius*vout); #endif mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); } }
0, 1, 0, 0, /*LSP quantization*/ lsp_quant_lbr, lsp_unquant_lbr, /*No pitch quantization*/ forced_pitch_quant, forced_pitch_unquant, NULL, /*Innovation quantization*/ split_cb_search_shape_sign, split_cb_shape_sign_unquant, &split_cb_nb_ulbr, QCONST16(.5,15), 79 }; /* 5.95 kbps very low bit-rate mode */ static const SpeexSubmode nb_submode2 = { 0, 0, 0, 0, /*LSP quantization*/ lsp_quant_lbr, lsp_unquant_lbr, /*No pitch quantization*/ pitch_search_3tap, pitch_unquant_3tap,
void *sb_encoder_init(const SpeexMode *m) { int i; spx_int32_t tmp; SBEncState *st; const SpeexSBMode *mode; st = (SBEncState*)speex_alloc(sizeof(SBEncState)); if (!st) return NULL; st->mode = m; mode = (const SpeexSBMode*)m->mode; st->st_low = speex_encoder_init(mode->nb_mode); #if defined(VAR_ARRAYS) || defined (USE_ALLOCA) st->stack = NULL; #else /*st->stack = (char*)speex_alloc_scratch(SB_ENC_STACK);*/ speex_encoder_ctl(st->st_low, SPEEX_GET_STACK, &st->stack); #endif st->full_frame_size = 2*mode->frameSize; st->frame_size = mode->frameSize; st->subframeSize = mode->subframeSize; st->nbSubframes = mode->frameSize/mode->subframeSize; st->windowSize = st->frame_size+st->subframeSize; st->lpcSize=mode->lpcSize; st->encode_submode = 1; st->submodes=mode->submodes; st->submodeSelect = st->submodeID=mode->defaultSubmode; tmp=9; speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &tmp); tmp=1; speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &tmp); st->lpc_floor = mode->lpc_floor; st->gamma1=mode->gamma1; st->gamma2=mode->gamma2; st->first=1; st->high=(spx_word16_t*)speex_alloc((st->windowSize-st->frame_size)*sizeof(spx_word16_t)); st->h0_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->h1_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->window= lpc_window; st->lagWindow = lag_window; st->old_lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); st->innov_rms_save = NULL; st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); for (i=0;i<st->lpcSize;i++) st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1); #ifndef DISABLE_VBR st->vbr_quality = 8; st->vbr_enabled = 0; st->vbr_max = 0; st->vbr_max_high = 20000; /* We just need a big value here */ st->vad_enabled = 0; st->abr_enabled = 0; st->relative_quality=0; #endif /* #ifndef DISABLE_VBR */ st->complexity=2; speex_encoder_ctl(st->st_low, SPEEX_GET_SAMPLING_RATE, &st->sampling_rate); st->sampling_rate*=2; #ifdef ENABLE_VALGRIND VALGRIND_MAKE_READABLE(st, (st->stack-(char*)st)); #endif return st; }
void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate) { int intra; opus_val16 max_decay; VARDECL(opus_val16, oldEBands_intra); VARDECL(opus_val16, error_intra); ec_enc enc_start_state; opus_uint32 tell; int badness1=0; opus_int32 intra_bias; opus_val32 new_distortion; SAVE_STACK; intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C); intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512)); new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C); tell = ec_tell(enc); if (tell+3 > budget) two_pass = intra = 0; /* Encode the global flags using a simple probability model (first symbols in the stream) */ max_decay = QCONST16(16.f,DB_SHIFT); if (end-start>10) { #ifdef FIXED_POINT max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3)); #else max_decay = MIN32(max_decay, .125f*nbAvailableBytes); #endif } enc_start_state = *enc; ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); ALLOC(error_intra, C*m->nbEBands, opus_val16); OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands); if (two_pass || intra) { badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget, tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay); } if (!intra) { unsigned char *intra_buf; ec_enc enc_intra_state; opus_int32 tell_intra; opus_uint32 nstart_bytes; opus_uint32 nintra_bytes; int badness2; VARDECL(unsigned char, intra_bits); tell_intra = ec_tell_frac(enc); enc_intra_state = *enc; nstart_bytes = ec_range_bytes(&enc_start_state); nintra_bytes = ec_range_bytes(&enc_intra_state); intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes; ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char); /* Copy bits from intra bit-stream */ OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes); *enc = enc_start_state; badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay); if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra))) { *enc = enc_intra_state; /* Copy intra bits to bit-stream */ OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes); OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); OPUS_COPY(error, error_intra, C*m->nbEBands); intra = 1; } } else {
static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, const opus_val16 *eBands, opus_val16 *oldEBands, opus_int32 budget, opus_int32 tell, const unsigned char *prob_model, opus_val16 *error, ec_enc *enc, int C, int LM, int intra, opus_val16 max_decay) { int i, c; int badness = 0; opus_val32 prev[2] = {0,0}; opus_val16 coef; opus_val16 beta; if (tell+3 <= budget) ec_enc_bit_logp(enc, intra, 3); if (intra) { coef = 0; beta = beta_intra; } else { beta = beta_coef[LM]; coef = pred_coef[LM]; } /* Encode at a fixed coarse resolution */ for (i=start;i<end;i++) { c=0; do { int bits_left; int qi, qi0; opus_val32 q; opus_val16 x; opus_val32 f, tmp; opus_val16 oldE; opus_val16 decay_bound; x = eBands[i+c*m->nbEBands]; oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); #ifdef FIXED_POINT f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c]; /* Rounding to nearest integer here is really important! */ qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7); decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT), SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay))); #else f = x-coef*oldE-prev[c]; /* Rounding to nearest integer here is really important! */ qi = (int)floor(.5f+f); decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay; #endif /* Prevent the energy from going down too quickly (e.g. for bands that have just one bin) */ if (qi < 0 && x < decay_bound) { qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT); if (qi > 0) qi = 0; } qi0 = qi; /* If we don't have enough bits to encode all the energy, just assume something safe. */ tell = ec_tell(enc); bits_left = budget-tell-3*C*(end-i); if (i!=start && bits_left < 30) { if (bits_left < 24) qi = IMIN(1, qi); if (bits_left < 16) qi = IMAX(-1, qi); } if (budget-tell >= 15) { int pi; pi = 2*IMIN(i,20); ec_laplace_encode(enc, &qi, prob_model[pi]<<7, prob_model[pi+1]<<6); } else if(budget-tell >= 2) { qi = IMAX(-1, IMIN(qi, 1)); ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2); } else if(budget-tell >= 1) { qi = IMIN(0, qi); ec_enc_bit_logp(enc, -qi, 1); } else qi = -1; error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT); badness += abs(qi0-qi); q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7); #ifdef FIXED_POINT tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); #endif oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); } while (++c < C); } return badness; }
352 }; /* Split-band wideband CELP mode*/ static const SpeexSBMode sb_wb_mode = { &speex_nb_mode, 160, /*frameSize*/ 40, /*subframeSize*/ 8, /*lpcSize*/ #ifdef FIXED_POINT 29491, 19661, /* gamma1, gamma2 */ #else 0.9, 0.6, /* gamma1, gamma2 */ #endif QCONST16(.0002,15), /*lpc_floor*/ QCONST16(0.9f,15), {NULL, &wb_submode1, &wb_submode2, &wb_submode3, &wb_submode4, NULL, NULL, NULL}, 3, {1, 8, 2, 3, 4, 5, 5, 6, 6, 7, 7}, {1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 4}, #ifndef DISABLE_VBR vbr_hb_thresh, #endif 5 }; EXPORT const SpeexMode speex_wb_mode = { &sb_wb_mode, wb_mode_query,
mem[1]=mem1; mem[2]=mem2; mem[3]=mem3; mem[4]=mem4; } void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, int len, int C, int arch) { int i; opus_val32 ac[5]; opus_val16 tmp=Q15ONE; opus_val16 lpc[4], mem[5]={0,0,0,0,0}; opus_val16 lpc2[5]; opus_val16 c1 = QCONST16(.8f,15); #ifdef FIXED_POINT int shift; opus_val32 maxabs = celt_maxabs32(x[0], len); if (C==2) { opus_val32 maxabs_1 = celt_maxabs32(x[1], len); maxabs = MAX32(maxabs, maxabs_1); } if (maxabs<1) maxabs=1; shift = celt_ilog2(maxabs)-10; if (shift<0) shift=0; if (C==2) shift++;
void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch ) { int c; int i; int LM; int pos[8] = {0}; int upsample; int frame_size; int freq_size; opus_val16 channel_offset; opus_val32 bandE[21]; opus_val16 maskLogE[3][21]; VARDECL(opus_val32, in); VARDECL(opus_val16, x); VARDECL(opus_val32, freq); SAVE_STACK; upsample = resampling_factor(rate); frame_size = len*upsample; freq_size = IMIN(960, frame_size); /* LM = log2(frame_size / 120) */ for (LM=0;LM<celt_mode->maxLM;LM++) if (celt_mode->shortMdctSize<<LM==frame_size) break; ALLOC(in, frame_size+overlap, opus_val32); ALLOC(x, len, opus_val16); ALLOC(freq, freq_size, opus_val32); channel_pos(channels, pos); for (c=0;c<3;c++) for (i=0;i<21;i++) maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); for (c=0;c<channels;c++) { int frame; int nb_frames = frame_size/freq_size; celt_assert(nb_frames*freq_size == frame_size); OPUS_COPY(in, mem+c*overlap, overlap); (*copy_channel_in)(x, 1, pcm, channels, c, len); celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); #ifndef FIXED_POINT { opus_val32 sum; sum = celt_inner_prod(in, in, frame_size+overlap, 0); /* This should filter out both NaNs and ridiculous signals that could cause NaNs further down. */ if (!(sum < 1e9f) || celt_isnan(sum)) { OPUS_CLEAR(in, frame_size+overlap); preemph_mem[c] = 0; } } #endif OPUS_CLEAR(bandE, 21); for (frame=0;frame<nb_frames;frame++) { opus_val32 tmpE[21]; clt_mdct_forward(&celt_mode->mdct, in+960*frame, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1, arch); if (upsample != 1) { int bound = freq_size/upsample; for (i=0;i<bound;i++) freq[i] *= upsample; for (;i<freq_size;i++) freq[i] = 0; } compute_band_energies(celt_mode, freq, tmpE, 21, 1, LM); /* If we have multiple frames, take the max energy. */ for (i=0;i<21;i++) bandE[i] = MAX32(bandE[i], tmpE[i]); } amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ for (i=1;i<21;i++) bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT)); for (i=19;i>=0;i--) bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); if (pos[c]==1) { for (i=0;i<21;i++) maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]); } else if (pos[c]==3) { for (i=0;i<21;i++) maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]); } else if (pos[c]==2) { for (i=0;i<21;i++) { maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); } } #if 0 for (i=0;i<21;i++) printf("%f ", bandLogE[21*c+i]); float sum=0; for (i=0;i<21;i++) sum += bandLogE[21*c+i]; printf("%f ", sum/21); #endif OPUS_COPY(mem+c*overlap, in+frame_size, overlap); } for (i=0;i<21;i++) maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]); channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1))); for (c=0;c<3;c++) for (i=0;i<21;i++) maskLogE[c][i] += channel_offset; #if 0 for (c=0;c<3;c++) { for (i=0;i<21;i++) printf("%f ", maskLogE[c][i]); } #endif for (c=0;c<channels;c++) { opus_val16 *mask; if (pos[c]!=0) { mask = &maskLogE[pos[c]-1][0]; for (i=0;i<21;i++) bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i]; } else { for (i=0;i<21;i++) bandLogE[21*c+i] = 0; } #if 0 for (i=0;i<21;i++) printf("%f ", bandLogE[21*c+i]); printf("\n"); #endif #if 0 float sum=0; for (i=0;i<21;i++) sum += bandLogE[21*c+i]; printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT)); printf("\n"); #endif } RESTORE_STACK; }
EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) { int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); st->K = nb_speakers; st->C = nb_mic; C=st->C; K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); rFile = fopen("aec_rec.sw", "wb"); pFile = fopen("aec_play.sw", "wb"); oFile = fopen("aec_out.sw", "wb"); #endif st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; M = st->M = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->sum_adapt = 0; st->saturated = 0; st->screwed_up = 0; /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); #ifdef FIXED_POINT st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); for (i=0;i<N>>1;i++) { st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); st->window[N-i-1] = st->window[i]; } #else for (i=0;i<N;i++) st->window[i] = .5-.5*cos(2*M_PI*i/N); #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; for (i=0;i<N*M*K*C;i++) st->W[i] = 0; { spx_word32_t sum = 0; /* Ratio of ~10 between adaptation rate of first and last block */ spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); st->prop[0] = QCONST16(.7, 15); sum = EXTEND32(st->prop[0]); for (i=1;i<M;i++) { st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); sum = ADD32(sum, EXTEND32(st->prop[i])); } for (i=M-1;i>=0;i--) { st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); } } st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; #ifdef TWO_PATH st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; return st; }
CELTEncoder *celt_encoder_create(const CELTMode *mode, int channels, int *error) { int N, C; CELTEncoder *st; if (check_mode(mode) != CELT_OK) { if (error) *error = CELT_INVALID_MODE; return NULL; } #ifdef DISABLE_STEREO if (channels > 1) { celt_warning("Stereo support was disable from this build"); if (error) *error = CELT_BAD_ARG; return NULL; } #endif if (channels < 0 || channels > 2) { celt_warning("Only mono and stereo supported"); if (error) *error = CELT_BAD_ARG; return NULL; } N = mode->mdctSize; C = channels; st = celt_alloc(sizeof(CELTEncoder)); if (st==NULL) { if (error) *error = CELT_ALLOC_FAIL; return NULL; } st->marker = ENCODERPARTIAL; st->mode = mode; st->frame_size = N; st->block_size = N; st->overlap = mode->overlap; st->channels = channels; st->vbr_rate = 0; st->pitch_enabled = 1; st->pitch_permitted = 1; st->pitch_available = 1; st->force_intra = 0; st->delayedIntra = 1; st->tonal_average = QCONST16(1.,8); st->fold_decision = 1; st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig)); st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig)); st->pitch_buf = celt_alloc(((MAX_PERIOD>>1)+2)*sizeof(celt_word16)); st->oldBandE = (celt_word16*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16)); st->preemph_memE = (celt_word16*)celt_alloc(C*sizeof(celt_word16)); st->preemph_memD = (celt_sig*)celt_alloc(C*sizeof(celt_sig)); if ((st->in_mem!=NULL) && (st->out_mem!=NULL) && (st->oldBandE!=NULL) && (st->preemph_memE!=NULL) && (st->preemph_memD!=NULL)) { if (error) *error = CELT_OK; st->marker = ENCODERVALID; return st; } /* If the setup fails for some reason deallocate it. */ celt_encoder_destroy(st); if (error) *error = CELT_ALLOC_FAIL; return NULL; }
#ifdef HAVE_CONFIG_H #include "config.h" #endif #include <math.h> #include "bands.h" #include "modes.h" #include "vq.h" #include "cwrs.h" #include "stack_alloc.h" #include "os_support.h" #include "mathops.h" #include "rate.h" const celt_word16_t sqrtC_1[2] = {QCONST16(1.f, 14), QCONST16(1.414214f, 14)}; #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *bank) { int i, c, N; const celt_int16_t *eBands = m->eBands; const int C = CHANNELS(m); N = FRAMESIZE(m); for (c=0;c<C;c++) { for (i=0;i<m->nbEBands;i++) { int j; celt_word32_t maxval=0;
/* Default mode for narrowband */ static const SpeexNBMode nb_mode = { 160, /*frameSize*/ 40, /*subframeSize*/ 10, /*lpcSize*/ 17, /*pitchStart*/ 144, /*pitchEnd*/ #ifdef FIXED_POINT 29491, 19661, /* gamma1, gamma2 */ #else 0.9, 0.6, /* gamma1, gamma2 */ #endif .012, /*lag_factor*/ QCONST16(.0002,15), /*lpc_floor*/ #ifdef EPIC_48K 0, #endif {NULL, &nb_submode1, &nb_submode2, &nb_submode3, &nb_submode4, &nb_submode5, &nb_submode6, &nb_submode7, &nb_submode8, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, 5, {1, 8, 2, 3, 3, 4, 4, 5, 5, 6, 7} }; /* Default mode for narrowband */ const SpeexMode speex_nb_mode = { &nb_mode, nb_mode_query, "narrowband",
static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) { void *silk_dec; CELTDecoder *celt_dec; int i, silk_ret=0, celt_ret=0; ec_dec dec; opus_int32 silk_frame_size; int pcm_silk_size; VARDECL(opus_int16, pcm_silk); int pcm_transition_silk_size; VARDECL(opus_val16, pcm_transition_silk); int pcm_transition_celt_size; VARDECL(opus_val16, pcm_transition_celt); opus_val16 *pcm_transition=NULL; int redundant_audio_size; VARDECL(opus_val16, redundant_audio); int audiosize; int mode; int transition=0; int start_band; int redundancy=0; int redundancy_bytes = 0; int celt_to_silk=0; int c; int F2_5, F5, F10, F20; const opus_val16 *window; opus_uint32 redundant_rng = 0; int celt_accum; ALLOC_STACK; silk_dec = (char*)st+st->silk_dec_offset; celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); F20 = st->Fs/50; F10 = F20>>1; F5 = F10>>1; F2_5 = F5>>1; if (frame_size < F2_5) { RESTORE_STACK; return OPUS_BUFFER_TOO_SMALL; } /* Limit frame_size to avoid excessive stack allocations. */ frame_size = IMIN(frame_size, st->Fs/25*3); /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */ if (len<=1) { data = NULL; /* In that case, don't conceal more than what the ToC says */ frame_size = IMIN(frame_size, st->frame_size); } if (data != NULL) { audiosize = st->frame_size; mode = st->mode; ec_dec_init(&dec,(unsigned char*)data,len); } else { audiosize = frame_size; mode = st->prev_mode; if (mode == 0) { /* If we haven't got any packet yet, all we can do is return zeros */ for (i=0; i<audiosize*st->channels; i++) pcm[i] = 0; RESTORE_STACK; return audiosize; } /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT), 10, or 20 (e.g. 12.5 or 30 ms). */ if (audiosize > F20) { do { int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0); if (ret<0) { RESTORE_STACK; return ret; } pcm += ret*st->channels; audiosize -= ret; } while (audiosize > 0); RESTORE_STACK; return frame_size; } else if (audiosize < F20) { if (audiosize > F10) audiosize = F10; else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10) audiosize = F5; } } /* In fixed-point, we can tell CELT to do the accumulation on top of the SILK PCM buffer. This saves some stack space. */ #ifdef FIXED_POINT celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); #else celt_accum = 0; #endif pcm_transition_silk_size = ALLOC_NONE; pcm_transition_celt_size = ALLOC_NONE; if (data!=NULL && st->prev_mode > 0 && ( (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) ) { transition = 1; /* Decide where to allocate the stack memory for pcm_transition */ if (mode == MODE_CELT_ONLY) pcm_transition_celt_size = F5*st->channels; else pcm_transition_silk_size = F5*st->channels; } ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); if (transition && mode == MODE_CELT_ONLY) { pcm_transition = pcm_transition_celt; opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); } if (audiosize > frame_size) { /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/ RESTORE_STACK; return OPUS_BAD_ARG; } else { frame_size = audiosize; } /* Don't allocate any memory when in CELT-only mode */ pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) { int lost_flag, decoded_samples; opus_int16 *pcm_ptr; #ifdef FIXED_POINT if (celt_accum) pcm_ptr = pcm; else #endif pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); /* The SILK PLC cannot produce frames of less than 10 ms */ st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); if (data != NULL) { st->DecControl.nChannelsInternal = st->stream_channels; if( mode == MODE_SILK_ONLY ) { if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { st->DecControl.internalSampleRate = 8000; } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { st->DecControl.internalSampleRate = 12000; } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { st->DecControl.internalSampleRate = 16000; } else { st->DecControl.internalSampleRate = 16000; silk_assert( 0 ); } } else { /* Hybrid mode */ st->DecControl.internalSampleRate = 16000; } } lost_flag = data == NULL ? 1 : 2 * decode_fec; decoded_samples = 0; do { /* Call SILK decoder */ int first_frame = decoded_samples == 0; silk_ret = silk_Decode( silk_dec, &st->DecControl, lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size ); if( silk_ret ) { if (lost_flag) { /* PLC failure should not be fatal */ silk_frame_size = frame_size; for (i=0; i<frame_size*st->channels; i++) pcm_ptr[i] = 0; } else { RESTORE_STACK; return OPUS_INTERNAL_ERROR; } } pcm_ptr += silk_frame_size * st->channels; decoded_samples += silk_frame_size; } while( decoded_samples < frame_size ); } start_band = 0; if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len) { /* Check if we have a redundant 0-8 kHz band */ if (mode == MODE_HYBRID) redundancy = ec_dec_bit_logp(&dec, 12); else redundancy = 1; if (redundancy) { celt_to_silk = ec_dec_bit_logp(&dec, 1); /* redundancy_bytes will be at least two, in the non-hybrid case due to the ec_tell() check above */ redundancy_bytes = mode==MODE_HYBRID ? (opus_int32)ec_dec_uint(&dec, 256)+2 : len-((ec_tell(&dec)+7)>>3); len -= redundancy_bytes; /* This is a sanity check. It should never happen for a valid packet, so the exact behaviour is not normative. */ if (len*8 < ec_tell(&dec)) { len = 0; redundancy_bytes = 0; redundancy = 0; } /* Shrink decoder because of raw bits */ dec.storage -= redundancy_bytes; } } if (mode != MODE_CELT_ONLY) start_band = 17; { int endband=21; switch(st->bandwidth) { case OPUS_BANDWIDTH_NARROWBAND: endband = 13; break; case OPUS_BANDWIDTH_MEDIUMBAND: case OPUS_BANDWIDTH_WIDEBAND: endband = 17; break; case OPUS_BANDWIDTH_SUPERWIDEBAND: endband = 19; break; case OPUS_BANDWIDTH_FULLBAND: endband = 21; break; } celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband)); celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels)); } if (redundancy) { transition = 0; pcm_transition_silk_size=ALLOC_NONE; } ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); if (transition && mode != MODE_CELT_ONLY) { pcm_transition = pcm_transition_silk; opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); } /* Only allocation memory for redundancy if/when needed */ redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE; ALLOC(redundant_audio, redundant_audio_size, opus_val16); /* 5 ms redundant frame for CELT->SILK*/ if (redundancy && celt_to_silk) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } /* MUST be after PLC */ celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band)); if (mode != MODE_SILK_ONLY) { int celt_frame_size = IMIN(F20, frame_size); /* Make sure to discard any previous CELT state */ if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy) celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, len, pcm, celt_frame_size, &dec, celt_accum); } else { unsigned char silence[2] = {0xFF, 0xFF}; if (!celt_accum) { for (i=0; i<frame_size*st->channels; i++) pcm[i] = 0; } /* For hybrid -> SILK transitions, we let the CELT MDCT do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } if (mode != MODE_CELT_ONLY && !celt_accum) { #ifdef FIXED_POINT for (i=0; i<frame_size*st->channels; i++) pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); #else for (i=0; i<frame_size*st->channels; i++) pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); #endif } { const CELTMode *celt_mode; celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)); window = celt_mode->window; } /* 5 ms redundant frame for SILK->CELT */ if (redundancy && !celt_to_silk) { celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); } if (redundancy && celt_to_silk) { for (c=0; c<st->channels; c++) { for (i=0; i<F2_5; i++) pcm[st->channels*i+c] = redundant_audio[st->channels*i+c]; } smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5, pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs); } if (transition) { if (audiosize >= F5) { for (i=0; i<st->channels*F2_5; i++) pcm[i] = pcm_transition[i]; smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5, pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs); } else { /* Not enough time to do a clean transition, but we do it anyway This will not preserve amplitude perfectly and may introduce a bit of temporal aliasing, but it shouldn't be too bad and that's pretty much the best we can do. In any case, generating this transition it pretty silly in the first place */ smooth_fade(pcm_transition, pcm, pcm, F2_5, st->channels, window, st->Fs); } } if(st->decode_gain) { opus_val32 gain; gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain)); for (i=0; i<frame_size*st->channels; i++) { opus_val32 x; x = MULT16_32_P16(pcm[i],gain); pcm[i] = SATURATE(x, 32767); } } if (len <= 1) st->rangeFinal = 0; else st->rangeFinal = dec.rng ^ redundant_rng; st->prev_mode = mode; st->prev_redundancy = redundancy && !celt_to_silk; if (celt_ret>=0) { if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels)) OPUS_PRINT_INT(audiosize); } RESTORE_STACK; return celt_ret < 0 ? celt_ret : audiosize; }
/** Performs echo cancellation on a frame */ EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { int i,j, chan, speak; int N,M, C, K; spx_word32_t Syy,See,Sxx,Sdd, Sff; #ifdef TWO_PATH spx_word32_t Dbf; int update_foreground; #endif spx_word32_t Sey; spx_word16_t ss, ss_1; spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; spx_float_t alpha, alpha_1; spx_word16_t RER; spx_word32_t tmp32; N = st->window_size; M = st->M; C = st->C; K = st->K; st->cancel_count++; #ifdef FIXED_POINT ss=DIV32_16(11469,M); ss_1 = SUB16(32767,ss); #else ss=.35/M; ss_1 = 1-ss; #endif for (chan = 0; chan < C; chan++) { /* Apply a notch filter to make sure DC doesn't end up causing problems */ filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); /* Copy input data to buffer and apply pre-emphasis */ /* Copy input data to buffer */ for (i=0;i<st->frame_size;i++) { spx_word32_t tmp32; /* FIXME: This core has changed a bit, need to merge properly */ tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); #ifdef FIXED_POINT if (tmp32 > 32767) { tmp32 = 32767; if (st->saturated == 0) st->saturated = 1; } if (tmp32 < -32767) { tmp32 = -32767; if (st->saturated == 0) st->saturated = 1; } #endif st->memD[chan] = st->input[chan*st->frame_size+i]; st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); } } for (speak = 0; speak < K; speak++) { for (i=0;i<st->frame_size;i++) { spx_word32_t tmp32; st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); #ifdef FIXED_POINT /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ if (tmp32 > 32767) { tmp32 = 32767; st->saturated = M+1; } if (tmp32 < -32767) { tmp32 = -32767; st->saturated = M+1; } #endif st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); st->memX[speak] = far_end[i*K+speak]; } } for (speak = 0; speak < K; speak++) { /* Shift memory: this could be optimized eventually*/ for (j=M-1;j>=0;j--) { for (i=0;i<N;i++) st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; } /* Convert x (echo input) to frequency domain */ spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); } Sxx = 0; for (speak = 0; speak < K; speak++) { Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); power_spectrum_accum(st->X+speak*N, st->Xf, N); } Sff = 0; for (chan = 0; chan < C; chan++) { #ifdef TWO_PATH /* Compute foreground filter */ spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); #endif } /* Adjust proportional adaption rate */ /* FIXME: Adjust that for C, K*/ if (st->adapted) mdf_adjust_prop (st->W, N, M, C*K, st->prop); /* Compute weight gradient */ if (st->saturated == 0) { for (chan = 0; chan < C; chan++) { for (speak = 0; speak < K; speak++) { for (j=M-1;j>=0;j--) { weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); for (i=0;i<N;i++) st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; } } } } else { st->saturated--; } /* FIXME: MC conversion required */ /* Update weight to prevent circular convolution (MDF / AUMDF) */ for (chan = 0; chan < C; chan++) { for (speak = 0; speak < K; speak++) { for (j=0;j<M;j++) { /* This is a variant of the Alternatively Updated MDF (AUMDF) */ /* Remove the "if" to make this an MDF filter */ if (j==0 || st->cancel_count%(M-1) == j-1) { #ifdef FIXED_POINT for (i=0;i<N;i++) st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); spx_ifft(st->fft_table, st->wtmp2, st->wtmp); for (i=0;i<st->frame_size;i++) { st->wtmp[i]=0; } for (i=st->frame_size;i<N;i++) { st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); } spx_fft(st->fft_table, st->wtmp, st->wtmp2); /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ for (i=0;i<N;i++) st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); #else spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); for (i=st->frame_size;i<N;i++) { st->wtmp[i]=0; } spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); #endif } } } } /* So we can use power_spectrum_accum */ for (i=0;i<=st->frame_size;i++) st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; Dbf = 0; See = 0; #ifdef TWO_PATH /* Difference in response, this is used to estimate the variance of our residual power estimate */ for (chan = 0; chan < C; chan++) { spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); } #endif #ifndef TWO_PATH Sff = See; #endif #ifdef TWO_PATH /* Logic for updating the foreground filter */ /* For two time windows, compute the mean of the energy difference, as well as the variance */ st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); /* Equivalent float code: st->Davg1 = .6*st->Davg1 + .4*(Sff-See); st->Davg2 = .85*st->Davg2 + .15*(Sff-See); st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; */ update_foreground = 0; /* Check if we have a statistically significant reduction in the residual echo */ /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) update_foreground = 1; else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) update_foreground = 1; else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) update_foreground = 1; /* Do we update? */ if (update_foreground) { st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; /* Copy background filter to foreground filter */ for (i=0;i<N*M*C*K;i++) st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); /* Apply a smooth transition so as to not introduce blocking artifacts */ for (chan = 0; chan < C; chan++) for (i=0;i<st->frame_size;i++) st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); } else { int reset_background=0; /* Otherwise, check if the background filter is significantly worse */ if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) reset_background = 1; if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) reset_background = 1; if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) reset_background = 1; if (reset_background) { /* Copy foreground filter to background filter */ for (i=0;i<N*M*C*K;i++) st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); /* We also need to copy the output so as to get correct adaptation */ for (chan = 0; chan < C; chan++) { for (i=0;i<st->frame_size;i++) st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; for (i=0;i<st->frame_size;i++) st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); } See = Sff; st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; } } #endif Sey = Syy = Sdd = 0; for (chan = 0; chan < C; chan++) { /* Compute error signal (for the output with de-emphasis) */ for (i=0;i<st->frame_size;i++) { spx_word32_t tmp_out; #ifdef TWO_PATH tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); #else tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); #endif tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); /* This is an arbitrary test for saturation in the microphone signal */ if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) { if (st->saturated == 0) st->saturated = 1; } out[i*C+chan] = WORD2INT(tmp_out); st->memE[chan] = tmp_out; } #ifdef DUMP_ECHO_CANCEL_DATA dump_audio(in, far_end, out, st->frame_size); #endif /* Compute error signal (filter update version) */ for (i=0;i<st->frame_size;i++) { st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; st->e[chan*N+i] = 0; } /* Compute a bunch of correlations */ /* FIXME: bad merge */ Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); for (i=0;i<st->frame_size;i++) st->y[i+chan*N] = 0; spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ power_spectrum_accum(st->E+chan*N, st->Rf, N); power_spectrum_accum(st->Y+chan*N, st->Yf, N); } /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ /* Do some sanity check */ if (!(Syy>=0 && Sxx>=0 && See >= 0) #ifndef FIXED_POINT || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) #endif ) { /* Things have gone really bad */ st->screwed_up += 50; for (i=0;i<st->frame_size*C;i++) out[i] = 0; } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) { /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ st->screwed_up++; } else { /* Everything's fine */ st->screwed_up=0; } if (st->screwed_up>=50) { speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); speex_echo_state_reset(st); return; } /* Add a small noise floor to make sure not to have problems when dividing */ See = MAX32(See, SHR32(MULT16_16(N, 100),6)); for (speak = 0; speak < K; speak++) { Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); power_spectrum_accum(st->X+speak*N, st->Xf, N); } /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); /* Compute filtered spectra and (cross-)correlations */ for (j=st->frame_size;j>=0;j--) { spx_float_t Eh, Yh; Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); #ifdef FIXED_POINT st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); #else st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; #endif } Pyy = FLOAT_SQRT(Pyy); Pey = FLOAT_DIVU(Pey,Pyy); /* Compute correlation updatete rate */ tmp32 = MULT16_32_Q15(st->beta0,Syy); if (tmp32 > MULT16_32_Q15(st->beta_max,See)) tmp32 = MULT16_32_Q15(st->beta_max,See); alpha = FLOAT_DIV32(tmp32, See); alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); /* Update correlations (recursive average) */ st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); if (FLOAT_LT(st->Pyy, FLOAT_ONE)) st->Pyy = FLOAT_ONE; /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; /* leak_estimate is the linear regression result */ st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ if (st->leak_estimate > 16383) st->leak_estimate = 32767; else st->leak_estimate = SHL16(st->leak_estimate,1); /*printf ("%f\n", st->leak_estimate);*/ /* Compute Residual to Error Ratio */ #ifdef FIXED_POINT tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); /* Check for y in e (lower bound on RER) */ { spx_float_t bound = PSEUDOFLOAT(Sey); bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); if (FLOAT_GT(bound, PSEUDOFLOAT(See))) tmp32 = See; else if (tmp32 < FLOAT_EXTRACT32(bound)) tmp32 = FLOAT_EXTRACT32(bound); } if (tmp32 > SHR32(See,1)) tmp32 = SHR32(See,1); RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); #else RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; /* Check for y in e (lower bound on RER) */ if (RER < Sey*Sey/(1+See*Syy)) RER = Sey*Sey/(1+See*Syy); if (RER > .5) RER = .5; #endif /* We consider that the filter has had minimal adaptation if the following is true*/ if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) { st->adapted = 1; } if (st->adapted) { /* Normal learning rate calculation once we're past the minimal adaptation phase */ for (i=0;i<=st->frame_size;i++) { spx_word32_t r, e; /* Compute frequency-domain adaptation mask */ r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); e = SHL32(st->Rf[i],3)+1; #ifdef FIXED_POINT if (r>SHR32(e,1)) r = SHR32(e,1); #else if (r>.5*e) r = .5*e; #endif r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); } } else { /* Temporary adaption rate if filter is not yet adapted enough */ spx_word16_t adapt_rate=0; if (Sxx > SHR32(MULT16_16(N, 1000),6)) { tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); #ifdef FIXED_POINT if (tmp32 > SHR32(See,2)) tmp32 = SHR32(See,2); #else if (tmp32 > .25*See) tmp32 = .25*See; #endif adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); } for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); /* How much have we adapted so far? */ st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } /* FIXME: MC conversion required */ for (i=0;i<st->frame_size;i++) st->last_y[i] = st->last_y[st->frame_size+i]; if (st->adapted) { /* If the filter is adapted, take the filtered echo */ for (i=0;i<st->frame_size;i++) st->last_y[st->frame_size+i] = in[i]-out[i]; } else { /* If filter isn't adapted yet, all we can do is take the far end signal directly */ /* moved earlier: for (i=0;i<N;i++) st->last_y[i] = st->x[i];*/ } }
CELTMode *celt051_mode_create(celt_int32_t Fs, int channels, int frame_size, int *error) { int i; #ifdef STDIN_TUNING scanf("%d ", &MIN_BINS); scanf("%d ", &BITALLOC_SIZE); band_allocation = celt_alloc(sizeof(int)*BARK_BANDS*BITALLOC_SIZE); for (i=0;i<BARK_BANDS*BITALLOC_SIZE;i++) { scanf("%d ", band_allocation+i); } #endif #ifdef STATIC_MODES const CELTMode *m = NULL; CELTMode *mode=NULL; ALLOC_STACK; for (i=0;i<TOTAL_MODES;i++) { if (Fs == static_mode_list[i]->Fs && channels == static_mode_list[i]->nbChannels && frame_size == static_mode_list[i]->mdctSize) { m = static_mode_list[i]; break; } } if (m == NULL) { celt_warning("Mode not included as part of the static modes"); if (error) *error = CELT_BAD_ARG; return NULL; } mode = (CELTMode*)celt_alloc(sizeof(CELTMode)); CELT_COPY(mode, m, 1); #else int res; CELTMode *mode; celt_word16_t *window; ALLOC_STACK; /* The good thing here is that permutation of the arguments will automatically be invalid */ if (Fs < 32000 || Fs > 96000) { celt_warning("Sampling rate must be between 32 kHz and 96 kHz"); if (error) *error = CELT_BAD_ARG; return NULL; } if (channels < 0 || channels > 2) { celt_warning("Only mono and stereo supported"); if (error) *error = CELT_BAD_ARG; return NULL; } if (frame_size < 64 || frame_size > 512 || frame_size%2!=0) { celt_warning("Only even frame sizes from 64 to 512 are supported"); if (error) *error = CELT_BAD_ARG; return NULL; } res = (Fs+frame_size)/(2*frame_size); mode = celt_alloc(sizeof(CELTMode)); mode->Fs = Fs; mode->mdctSize = frame_size; mode->nbChannels = channels; mode->eBands = compute_ebands(Fs, frame_size, &mode->nbEBands); compute_pbands(mode, res); mode->ePredCoef = QCONST16(.8f,15); if (frame_size > 384 && (frame_size%8)==0) { mode->nbShortMdcts = 4; } else if (frame_size > 384 && (frame_size%10)==0) { mode->nbShortMdcts = 5; } else if (frame_size > 256 && (frame_size%6)==0) { mode->nbShortMdcts = 3; } else if (frame_size > 256 && (frame_size%8)==0) { mode->nbShortMdcts = 4; } else if (frame_size > 64 && (frame_size%4)==0) { mode->nbShortMdcts = 2; } else if (frame_size > 128 && (frame_size%6)==0) { mode->nbShortMdcts = 3; } else { mode->nbShortMdcts = 1; } if (mode->nbShortMdcts > 1) mode->overlap = ((frame_size/mode->nbShortMdcts)>>2)<<2; /* Overlap must be divisible by 4 */ else
#include <math.h> #include "celt.h" #include "pitch.h" #include "kiss_fftr.h" #include "bands.h" #include "modes.h" #include "entcode.h" #include "quant_bands.h" #include "psy.h" #include "rate.h" #include "stack_alloc.h" #include "mathops.h" #include "float_cast.h" #include <stdarg.h> static const celt_word16_t preemph = QCONST16(0.8f,15); #ifdef FIXED_POINT static const celt_word16_t transientWindow[16] = { 279, 1106, 2454, 4276, 6510, 9081, 11900, 14872, 17896, 20868, 23687, 26258, 28492, 30314, 31662, 32489}; #else static const float transientWindow[16] = { 0.0085135, 0.0337639, 0.0748914, 0.1304955, 0.1986827, 0.2771308, 0.3631685, 0.4538658, 0.5461342, 0.6368315, 0.7228692, 0.8013173, 0.8695045, 0.9251086, 0.9662361, 0.9914865}; #endif #define ENCODERVALID 0x4c434554 #define ENCODERPARTIAL 0x5445434c