void spx_drft_init(struct drft_lookup *l,int n) { l->n=n; l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache)); l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache)); fdrffti(n, l->trigcache, l->splitcache); }
VorbisPsy *vorbis_psy_init(int rate, int n) { long i,j,lo=-99,hi=1; VorbisPsy *p = speex_alloc(sizeof(VorbisPsy)); memset(p,0,sizeof(*p)); p->n = n; spx_drft_init(&p->lookup, n); p->bark = speex_alloc(n*sizeof(*p->bark)); p->rate=rate; p->vi = &example_tuning; /* BH4 window */ p->window = speex_alloc(sizeof(*p->window)*n); float a0 = .35875f; float a1 = .48829f; float a2 = .14128f; float a3 = .01168f; for(i=0;i<n;i++) p->window[i] = //a0 - a1*cos(2.*M_PI/n*(i+.5)) + a2*cos(4.*M_PI/n*(i+.5)) - a3*cos(6.*M_PI/n*(i+.5)); sin((i+.5)/n * M_PI)*sin((i+.5)/n * M_PI); /* bark scale lookups */ for(i=0;i<n;i++){ float bark=toBARK(rate/(2*n)*i); for(;lo+p->vi->noisewindowlomin<i && toBARK(rate/(2*n)*lo)<(bark-p->vi->noisewindowlo);lo++); for(;hi<=n && (hi<i+p->vi->noisewindowhimin || toBARK(rate/(2*n)*hi)<(bark+p->vi->noisewindowhi));hi++); p->bark[i]=((lo-1)<<16)+(hi-1); } /* set up rolling noise median */ p->noiseoffset=speex_alloc(n*sizeof(*p->noiseoffset)); for(i=0;i<n;i++){ float halfoc=toOC((i+.5)*rate/(2.*n))*2.; int inthalfoc; float del; if(halfoc<0)halfoc=0; if(halfoc>=P_BANDS-1)halfoc=P_BANDS-1; inthalfoc=(int)halfoc; del=halfoc-inthalfoc; p->noiseoffset[i]= p->vi->noiseoff[inthalfoc]*(1.-del) + p->vi->noiseoff[inthalfoc+1]*del; } #if 0 _analysis_output_always("noiseoff0",ls,p->noiseoffset,n,1,0,0); #endif return p; }
EXPORT SpeexBuffer *speex_buffer_init(int size) { SpeexBuffer *st = speex_alloc(sizeof(SpeexBuffer)); st->data = speex_alloc(size); st->size = size; st->read_ptr = 0; st->write_ptr = 0; st->available = 0; return st; }
EXPORT SpeexHeader *speex_packet_to_header(char *packet, int size) { int i; SpeexHeader *le_header; const char *h = "Speex "; /*FIXME: Do we allow larger headers?*/ if (size < (int)sizeof(SpeexHeader)) { speex_notify("Speex header too small"); return NULL; } for (i=0;i<8;i++) if (packet[i]!=h[i]) { /* This doesn't look like a Speex file */ return NULL; } le_header = (SpeexHeader*)speex_alloc(sizeof(SpeexHeader)); SPEEX_COPY(le_header, (SpeexHeader*)packet, 1); /*Make sure everything is converted correctly from little-endian*/ ENDIAN_SWITCH(le_header->speex_version_id); ENDIAN_SWITCH(le_header->header_size); ENDIAN_SWITCH(le_header->rate); ENDIAN_SWITCH(le_header->mode); ENDIAN_SWITCH(le_header->mode_bitstream_version); ENDIAN_SWITCH(le_header->nb_channels); ENDIAN_SWITCH(le_header->bitrate); ENDIAN_SWITCH(le_header->frame_size); ENDIAN_SWITCH(le_header->vbr); ENDIAN_SWITCH(le_header->frames_per_packet); ENDIAN_SWITCH(le_header->extra_headers); if (le_header->mode >= SPEEX_NB_MODES || le_header->mode < 0) { speex_notify("Invalid mode specified in Speex header"); speex_free (le_header); return NULL; } if (le_header->nb_channels>2) le_header->nb_channels = 2; if (le_header->nb_channels<1) le_header->nb_channels = 1; return le_header; }
EXPORT SpeexDecorrState *speex_decorrelate_new(int rate, int channels, int frame_size) { int i, ch; SpeexDecorrState *st = speex_alloc(sizeof(SpeexDecorrState)); st->rate = rate; st->channels = channels; st->frame_size = frame_size; #ifdef VORBIS_PSYCHO st->psy = vorbis_psy_init(rate, 2 * frame_size); spx_drft_init(&st->lookup, 2 * frame_size); st->wola_mem = speex_alloc(frame_size * sizeof(float)); st->curve = speex_alloc(frame_size * sizeof(float)); #endif st->y = speex_alloc(frame_size * sizeof(float)); st->buff = speex_alloc(channels * 2 * frame_size * sizeof(float)); st->ringID = speex_alloc(channels * sizeof(int)); st->order = speex_alloc(channels * sizeof(int)); st->alpha = speex_alloc(channels * sizeof(float)); st->ring = speex_alloc(channels * ALLPASS_ORDER * sizeof(float)); /*FIXME: The +20 is there only as a kludge for ALL_PASS_OLA */ st->vorbis_win = speex_alloc((2 * frame_size + 20) * sizeof(float)); for (i = 0; i < 2 * frame_size; i++) st->vorbis_win[i] = sin(.5 * M_PI * sin(M_PI * i / (2 * frame_size)) * sin(M_PI * i / (2 * frame_size))); st->seed = rand(); for (ch = 0; ch < channels; ch++) { for (i = 0; i < ALLPASS_ORDER; i++) st->ring[ch][i] = 0; st->ringID[ch] = 0; st->alpha[ch] = 0; st->order[ch] = 10; } return st; }
/** Initialise jitter buffer */ JitterBuffer *jitter_buffer_init(int tick) { JitterBuffer *jitter = (JitterBuffer*)speex_alloc(sizeof(JitterBuffer)); if (jitter) { int i; for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) jitter->buf[i]=NULL; jitter->tick_size = tick; jitter->buffer_margin = 1; jitter_buffer_reset(jitter); } return jitter; }
SpeexHeader *speex_packet_to_header(char *packet, int size) { int i; SpeexHeader *le_header; const char *h = "Speex "; for (i=0;i<8;i++) if (packet[i]!=h[i]) { speex_warning ("This doesn't look like a Speex file"); return NULL; } /*FIXME: Do we allow larger headers?*/ if (size < (int)sizeof(SpeexHeader)) { speex_warning("Speex header too small"); return NULL; } le_header = (SpeexHeader*)speex_alloc(sizeof(SpeexHeader)); speex_move(le_header, packet, sizeof(SpeexHeader)); /*Make sure everything is converted correctly from little-endian*/ ENDIAN_SWITCH(le_header->speex_version_id); ENDIAN_SWITCH(le_header->header_size); ENDIAN_SWITCH(le_header->rate); ENDIAN_SWITCH(le_header->mode); ENDIAN_SWITCH(le_header->mode_bitstream_version); ENDIAN_SWITCH(le_header->nb_channels); ENDIAN_SWITCH(le_header->bitrate); ENDIAN_SWITCH(le_header->frame_size); ENDIAN_SWITCH(le_header->vbr); ENDIAN_SWITCH(le_header->frames_per_packet); ENDIAN_SWITCH(le_header->extra_headers); return le_header; }
char *speex_header_to_packet(SpeexHeader *header, int *size) { SpeexHeader *le_header; le_header = (SpeexHeader*)speex_alloc(sizeof(SpeexHeader)); speex_move(le_header, header, sizeof(SpeexHeader)); /*Make sure everything is now little-endian*/ ENDIAN_SWITCH(le_header->speex_version_id); ENDIAN_SWITCH(le_header->header_size); ENDIAN_SWITCH(le_header->rate); ENDIAN_SWITCH(le_header->mode); ENDIAN_SWITCH(le_header->mode_bitstream_version); ENDIAN_SWITCH(le_header->nb_channels); ENDIAN_SWITCH(le_header->bitrate); ENDIAN_SWITCH(le_header->frame_size); ENDIAN_SWITCH(le_header->vbr); ENDIAN_SWITCH(le_header->frames_per_packet); ENDIAN_SWITCH(le_header->extra_headers); *size = sizeof(SpeexHeader); return (char *)le_header; }
/** Initialise jitter buffer */ EXPORT JitterBuffer *jitter_buffer_init(int step_size) { JitterBuffer *jitter = (JitterBuffer*)speex_alloc(sizeof(JitterBuffer)); if (jitter) { int i; spx_int32_t tmp; for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) jitter->packets[i].data=NULL; jitter->delay_step = step_size; jitter->concealment_size = step_size; /*FIXME: Should this be 0 or 1?*/ jitter->buffer_margin = 0; jitter->late_cutoff = 50; jitter->destroy = NULL; jitter->latency_tradeoff = 0; jitter->auto_adjust = 1; tmp = 4; jitter_buffer_ctl(jitter, JITTER_BUFFER_SET_MAX_LATE_RATE, &tmp); jitter_buffer_reset(jitter); } return jitter; }
FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type) { FilterBank *bank; spx_word32_t df; spx_word32_t max_mel, mel_interval; int i; int id1; int id2; df = DIV32(SHL32(sampling,15),MULT16_16(2,len)); max_mel = toBARK(EXTRACT16(sampling/2)); mel_interval = PDIV32(max_mel,banks-1); bank = (FilterBank*)speex_alloc(sizeof(FilterBank)); bank->nb_banks = banks; bank->len = len; bank->bank_left = (int*)speex_alloc(len*sizeof(int)); bank->bank_right = (int*)speex_alloc(len*sizeof(int)); bank->filter_left = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); bank->filter_right = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ #ifndef FIXED_POINT bank->scaling = (float*)speex_alloc(banks*sizeof(float)); #endif for (i=0;i<len;i++) { spx_word16_t curr_freq; spx_word32_t mel; spx_word16_t val; curr_freq = EXTRACT16(MULT16_32_P15(i,df)); mel = toBARK(curr_freq); if (mel > max_mel) break; #ifdef FIXED_POINT id1 = DIV32(mel,mel_interval); #else id1 = (int)(floor(mel/mel_interval)); #endif if (id1>banks-2) { id1 = banks-2; val = Q15_ONE; } else { val = DIV32_16(mel - id1*mel_interval,EXTRACT16(PSHR32(mel_interval,15))); } id2 = id1+1; bank->bank_left[i] = id1; bank->filter_left[i] = SUB16(Q15_ONE,val); bank->bank_right[i] = id2; bank->filter_right[i] = val; } /* Think I can safely disable normalisation for fixed-point (and probably float as well) */ #ifndef FIXED_POINT for (i=0;i<bank->nb_banks;i++) bank->scaling[i] = 0; for (i=0;i<bank->len;i++) { int id = bank->bank_left[i]; bank->scaling[id] += bank->filter_left[i]; id = bank->bank_right[i]; bank->scaling[id] += bank->filter_right[i]; } for (i=0;i<bank->nb_banks;i++) bank->scaling[i] = Q15_ONE/(bank->scaling[i]); #endif return bank; }
/** Put one packet into the jitter buffer */ void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) { int i,j; spx_int32_t arrival_margin; /*fprintf (stderr, "put packet %d %d\n", timestamp, span);*/ if (jitter->reset_state) { jitter->reset_state=0; jitter->pointer_timestamp = packet->timestamp; jitter->current_timestamp = packet->timestamp; /*fprintf(stderr, "reset to %d\n", timestamp);*/ } /* Cleanup buffer (remove old packets that weren't played) */ for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) { if (jitter->buf[i] && LE32(jitter->timestamp[i] + jitter->span[i], jitter->pointer_timestamp)) { /*fprintf (stderr, "cleaned (not played)\n");*/ speex_free(jitter->buf[i]); jitter->buf[i] = NULL; } } /*Find an empty slot in the buffer*/ for (i=0;i<SPEEX_JITTER_MAX_BUFFER_SIZE;i++) { if (jitter->buf[i]==NULL) break; } /*fprintf(stderr, "%d %d %f\n", timestamp, jitter->pointer_timestamp, jitter->drift_average);*/ /*No place left in the buffer*/ if (i==SPEEX_JITTER_MAX_BUFFER_SIZE) { int earliest=jitter->timestamp[0]; i=0; for (j=1;j<SPEEX_JITTER_MAX_BUFFER_SIZE;j++) { if (!jitter->buf[i] || LT32(jitter->timestamp[j],earliest)) { earliest = jitter->timestamp[j]; i=j; } } speex_free(jitter->buf[i]); jitter->buf[i]=NULL; if (jitter->lost_count>20) { jitter_buffer_reset(jitter); } /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp);*/ } /* Copy packet in buffer */ jitter->buf[i]=(char*)speex_alloc(packet->len); for (j=0;j<(int)packet->len;j++) jitter->buf[i][j]=packet->data[j]; jitter->timestamp[i]=packet->timestamp; jitter->span[i]=packet->span; jitter->len[i]=packet->len; /* Adjust the buffer size depending on network conditions */ arrival_margin = (packet->timestamp - jitter->current_timestamp) - jitter->buffer_margin*jitter->tick_size; if (arrival_margin >= -LATE_BINS*jitter->tick_size) { spx_int32_t int_margin; for (i=0;i<MAX_MARGIN;i++) { jitter->shortterm_margin[i] *= .98; jitter->longterm_margin[i] *= .995; } int_margin = LATE_BINS + arrival_margin/jitter->tick_size; if (int_margin>MAX_MARGIN-1) int_margin = MAX_MARGIN-1; if (int_margin>=0) { jitter->shortterm_margin[int_margin] += .02; jitter->longterm_margin[int_margin] += .005; } } else { /*fprintf (stderr, "way too late = %d\n", arrival_margin);*/ if (jitter->lost_count>20) { jitter_buffer_reset(jitter); } } #if 0 /* Enable to check how much is being buffered */ if (rand()%1000==0) { int count = 0; for (j=0;j<SPEEX_JITTER_MAX_BUFFER_SIZE;j++) { if (jitter->buf[j]) count++; } fprintf (stderr, "buffer_size = %d\n", count); } #endif }
EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) { int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); st->K = nb_speakers; st->C = nb_mic; C=st->C; K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); rFile = fopen("aec_rec.sw", "wb"); pFile = fopen("aec_play.sw", "wb"); oFile = fopen("aec_out.sw", "wb"); #endif st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; M = st->M = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->sum_adapt = 0; st->saturated = 0; st->screwed_up = 0; /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); #ifdef FIXED_POINT st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); for (i=0;i<N>>1;i++) { st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); st->window[N-i-1] = st->window[i]; } #else for (i=0;i<N;i++) st->window[i] = .5-.5*cos(2*M_PI*i/N); #endif for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_ONE; for (i=0;i<N*M*K*C;i++) st->W[i] = 0; { spx_word32_t sum = 0; /* Ratio of ~10 between adaptation rate of first and last block */ spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); st->prop[0] = QCONST16(.7, 15); sum = EXTEND32(st->prop[0]); for (i=1;i<M;i++) { st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); sum = ADD32(sum, EXTEND32(st->prop[i])); } for (i=M-1;i>=0;i--) { st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); } } st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); st->preemph = QCONST16(.9,15); if (st->sampling_rate<12000) st->notch_radius = QCONST16(.9, 15); else if (st->sampling_rate<24000) st->notch_radius = QCONST16(.982, 15); else st->notch_radius = QCONST16(.992, 15); st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; #ifdef TWO_PATH st->Davg1 = st->Davg2 = 0; st->Dvar1 = st->Dvar2 = FLOAT_ZERO; #endif st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; st->play_buf_started = 0; return st; }
/** Put one packet into the jitter buffer */ EXPORT void jitter_buffer_put(JitterBuffer * jitter, const JitterBufferPacket * packet) { int i, j; int late; /*fprintf (stderr, "put packet %d %d\n", timestamp, span); */ /* Cleanup buffer (remove old packets that weren't played) */ if (!jitter->reset_state) { for (i = 0; i < SPEEX_JITTER_MAX_BUFFER_SIZE; i++) { /* Make sure we don't discard a "just-late" packet in case we want to play it next (if we interpolate). */ if (jitter->packets[i].data && LE32(jitter->packets[i].timestamp + jitter->packets[i].span, jitter->pointer_timestamp)) { /*fprintf (stderr, "cleaned (not played)\n"); */ if (jitter->destroy) jitter->destroy(jitter->packets[i]. data); else speex_free(jitter->packets[i].data); jitter->packets[i].data = NULL; } } } /*fprintf(stderr, "arrival: %d %d %d\n", packet->timestamp, jitter->next_stop, jitter->pointer_timestamp); */ /* Check if packet is late (could still be useful though) */ if (!jitter->reset_state && LT32(packet->timestamp, jitter->next_stop)) { update_timings(jitter, ((spx_int32_t) packet->timestamp) - ((spx_int32_t) jitter->next_stop) - jitter->buffer_margin); late = 1; } else { late = 0; } /* For some reason, the consumer has failed the last 20 fetches. Make sure this packet is * used to resync. */ if (jitter->lost_count > 20) { jitter_buffer_reset(jitter); } /* Only insert the packet if it's not hopelessly late (i.e. totally useless) */ if (jitter->reset_state || GE32(packet->timestamp + packet->span + jitter->delay_step, jitter->pointer_timestamp)) { /*Find an empty slot in the buffer */ for (i = 0; i < SPEEX_JITTER_MAX_BUFFER_SIZE; i++) { if (jitter->packets[i].data == NULL) break; } /*No place left in the buffer, need to make room for it by discarding the oldest packet */ if (i == SPEEX_JITTER_MAX_BUFFER_SIZE) { int earliest = jitter->packets[0].timestamp; i = 0; for (j = 1; j < SPEEX_JITTER_MAX_BUFFER_SIZE; j++) { if (!jitter->packets[i].data || LT32(jitter->packets[j].timestamp, earliest)) { earliest = jitter->packets[j].timestamp; i = j; } } if (jitter->destroy) jitter->destroy(jitter->packets[i].data); else speex_free(jitter->packets[i].data); jitter->packets[i].data = NULL; /*fprintf (stderr, "Buffer is full, discarding earliest frame %d (currently at %d)\n", timestamp, jitter->pointer_timestamp); */ } /* Copy packet in buffer */ if (jitter->destroy) { jitter->packets[i].data = packet->data; } else { jitter->packets[i].data = (char *)speex_alloc(packet->len); for (j = 0; j < (int)packet->len; j++) jitter->packets[i].data[j] = packet->data[j]; } jitter->packets[i].timestamp = packet->timestamp; jitter->packets[i].span = packet->span; jitter->packets[i].len = packet->len; jitter->packets[i].sequence = packet->sequence; jitter->packets[i].user_data = packet->user_data; if (jitter->reset_state || late) jitter->arrival[i] = 0; else jitter->arrival[i] = jitter->next_stop; } }
EXPORT SpeexStereoState *speex_stereo_state_init() { SpeexStereoState *stereo = speex_alloc(sizeof(SpeexStereoState)); speex_stereo_state_reset(stereo); return stereo; }
SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate) { int i; int N, N3, N4; SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState)); st->frame_size = frame_size; /* Round ps_size down to the nearest power of two */ #if 0 i=1; st->ps_size = st->frame_size; while(1) { if (st->ps_size & ~i) { st->ps_size &= ~i; i<<=1; } else { break; } } if (st->ps_size < 3*st->frame_size/4) st->ps_size = st->ps_size * 3 / 2; #else st->ps_size = st->frame_size; #endif N = st->ps_size; N3 = 2*N - st->frame_size; N4 = st->frame_size - N3; st->sampling_rate = sampling_rate; st->denoise_enabled = 1; st->agc_enabled = 0; st->agc_level = 8000; st->vad_enabled = 0; st->dereverb_enabled = 0; st->reverb_decay = .5; st->reverb_level = .2; st->frame = (float*)speex_alloc(2*N*sizeof(float)); st->ps = (float*)speex_alloc(N*sizeof(float)); st->gain2 = (float*)speex_alloc(N*sizeof(float)); st->window = (float*)speex_alloc(2*N*sizeof(float)); st->noise = (float*)speex_alloc(N*sizeof(float)); st->reverb_estimate = (float*)speex_alloc(N*sizeof(float)); st->old_ps = (float*)speex_alloc(N*sizeof(float)); st->gain = (float*)speex_alloc(N*sizeof(float)); st->prior = (float*)speex_alloc(N*sizeof(float)); st->post = (float*)speex_alloc(N*sizeof(float)); st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); st->inbuf = (float*)speex_alloc(N3*sizeof(float)); st->outbuf = (float*)speex_alloc(N3*sizeof(float)); st->echo_noise = (float*)speex_alloc(N*sizeof(float)); st->S = (float*)speex_alloc(N*sizeof(float)); st->Smin = (float*)speex_alloc(N*sizeof(float)); st->Stmp = (float*)speex_alloc(N*sizeof(float)); st->update_prob = (float*)speex_alloc(N*sizeof(float)); st->zeta = (float*)speex_alloc(N*sizeof(float)); st->Zpeak = 0; st->Zlast = 0; st->noise_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); st->noise_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); st->speech_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); st->speech_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); st->noise_bandsN = st->speech_bandsN = 1; conj_window(st->window, 2*N3); for (i=2*N3;i<2*st->ps_size;i++) st->window[i]=1; if (N4>0) { for (i=N3-1;i>=0;i--) { st->window[i+N3+N4]=st->window[i+N3]; st->window[i+N3]=1; } } for (i=0;i<N;i++) { st->noise[i]=1e4; st->reverb_estimate[i]=0.; st->old_ps[i]=1e4; st->gain[i]=1; st->post[i]=1; st->prior[i]=1; } for (i=0;i<N3;i++) { st->inbuf[i]=0; st->outbuf[i]=0; } for (i=0;i<N;i++) { float ff=((float)i)*.5*sampling_rate/((float)N); st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); if (st->loudness_weight[i]<.01f) st->loudness_weight[i]=.01f; st->loudness_weight[i] *= st->loudness_weight[i]; } st->speech_prob = 0; st->last_speech = 1000; st->loudness = pow(6000,LOUDNESS_EXP); st->loudness2 = 6000; st->nb_loudness_adapt = 0; st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup)); spx_drft_init(st->fft_lookup,2*N); st->nb_adapt=0; st->consec_noise=0; st->nb_preprocess=0; return st; }
/** Creates a new echo canceller state */ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) { int i,j,N,M; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; M = st->M = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->adapt_rate = .01f; st->sum_adapt = 0; st->Sey = 0; st->Syy = 0; st->See = 0; st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup)); spx_drft_init(st->fft_lookup, N); st->x = (float*)speex_alloc(N*sizeof(float)); st->d = (float*)speex_alloc(N*sizeof(float)); st->y = (float*)speex_alloc(N*sizeof(float)); st->y2 = (float*)speex_alloc(N*sizeof(float)); st->Yps = (float*)speex_alloc(N*sizeof(float)); st->last_y = (float*)speex_alloc(N*sizeof(float)); st->Yf = (float*)speex_alloc((st->frame_size+1)*sizeof(float)); st->Rf = (float*)speex_alloc((st->frame_size+1)*sizeof(float)); st->Xf = (float*)speex_alloc((st->frame_size+1)*sizeof(float)); st->fratio = (float*)speex_alloc((st->frame_size+1)*sizeof(float)); st->regul = (float*)speex_alloc(N*sizeof(float)); st->X = (float*)speex_alloc(M*N*sizeof(float)); st->D = (float*)speex_alloc(N*sizeof(float)); st->Y = (float*)speex_alloc(N*sizeof(float)); st->Y2 = (float*)speex_alloc(N*sizeof(float)); st->E = (float*)speex_alloc(N*sizeof(float)); st->W = (float*)speex_alloc(M*N*sizeof(float)); st->PHI = (float*)speex_alloc(M*N*sizeof(float)); st->power = (float*)speex_alloc((frame_size+1)*sizeof(float)); st->power_1 = (float*)speex_alloc((frame_size+1)*sizeof(float)); st->grad = (float*)speex_alloc(N*M*sizeof(float)); for (i=0; i<N*M; i++) { st->W[i] = st->PHI[i] = 0; } st->regul[0] = (.01+(10.)/((4.)*(4.)))/M; for (i=1,j=1; i<N-1; i+=2,j++) { st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M; st->regul[i+1] = .01+((10.)/((j+4.)*(j+4.)))/M; } st->regul[i] = .01+((10.)/((j+4.)*(j+4.)))/M; st->adapted = 0; return st; }
void *sb_encoder_init(const SpeexMode *m) { int i; spx_int32_t tmp; SBEncState *st; const SpeexSBMode *mode; st = (SBEncState*)speex_alloc(sizeof(SBEncState)); if (!st) return NULL; st->mode = m; mode = (const SpeexSBMode*)m->mode; st->st_low = speex_encoder_init(mode->nb_mode); #if defined(VAR_ARRAYS) || defined (USE_ALLOCA) st->stack = NULL; #else /*st->stack = (char*)speex_alloc_scratch(SB_ENC_STACK);*/ speex_encoder_ctl(st->st_low, SPEEX_GET_STACK, &st->stack); #endif st->full_frame_size = 2*mode->frameSize; st->frame_size = mode->frameSize; st->subframeSize = mode->subframeSize; st->nbSubframes = mode->frameSize/mode->subframeSize; st->windowSize = st->frame_size+st->subframeSize; st->lpcSize=mode->lpcSize; st->encode_submode = 1; st->submodes=mode->submodes; st->submodeSelect = st->submodeID=mode->defaultSubmode; tmp=9; speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &tmp); tmp=1; speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &tmp); st->lpc_floor = mode->lpc_floor; st->gamma1=mode->gamma1; st->gamma2=mode->gamma2; st->first=1; st->high=(spx_word16_t*)speex_alloc((st->windowSize-st->frame_size)*sizeof(spx_word16_t)); st->h0_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->h1_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->window= lpc_window; st->lagWindow = lag_window; st->old_lsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc(st->lpcSize*sizeof(spx_lsp_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); st->innov_rms_save = NULL; st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); for (i=0;i<st->lpcSize;i++) st->old_lsp[i]= DIV32(MULT16_16(QCONST16(3.1415927f, LSP_SHIFT), i+1), st->lpcSize+1); #ifndef DISABLE_VBR st->vbr_quality = 8; st->vbr_enabled = 0; st->vbr_max = 0; st->vbr_max_high = 20000; /* We just need a big value here */ st->vad_enabled = 0; st->abr_enabled = 0; st->relative_quality=0; #endif /* #ifndef DISABLE_VBR */ st->complexity=2; speex_encoder_ctl(st->st_low, SPEEX_GET_SAMPLING_RATE, &st->sampling_rate); st->sampling_rate*=2; #ifdef ENABLE_VALGRIND VALGRIND_MAKE_READABLE(st, (st->stack-(char*)st)); #endif return st; }
EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) { int i,N,M, C, K; SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); st->K = nb_speakers; st->C = nb_mic; C=st->C; K=st->K; #ifdef DUMP_ECHO_CANCEL_DATA if (rFile || pFile || oFile) speex_fatal("Opening dump files twice"); rFile = fopen("aec_rec.sw", "wb"); pFile = fopen("aec_play.sw", "wb"); oFile = fopen("aec_out.sw", "wb"); #endif st->frame_size = frame_size; st->window_size = 2*frame_size; N = st->window_size; M = st->M = (filter_length+st->frame_size-1)/frame_size; st->cancel_count=0; st->sum_adapt = 0; st->saturated = 0; st->screwed_up = 0; /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); #else st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); #ifdef TWO_PATH st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); #endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); #ifdef FIXED_POINT st->wtmp2 = (spx_word16_t*)speex_alloc