/* * Perform echo cancellation. */ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, unsigned options, void *reserved ) { speex_ec *echo = (speex_ec*) state; /* Sanity checks */ PJ_ASSERT_RETURN(echo && rec_frm && play_frm && options==0 && reserved==NULL, PJ_EINVAL); /* Cancel echo, put output in temporary buffer */ speex_echo_cancellation(echo->state, (const spx_int16_t*)rec_frm, (const spx_int16_t*)play_frm, (spx_int16_t*)echo->tmp_frame); /* Preprocess output */ speex_preprocess_run(echo->preprocess, (spx_int16_t*)echo->tmp_frame); /* Copy temporary buffer back to original rec_frm */ pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame); return PJ_SUCCESS; }
void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) { speex_echo_cancellation(ec->params.priv.speex.state, (const spx_int16_t *) rec, (const spx_int16_t *) play, (spx_int16_t *) out); /* preprecessor is run after AEC. This is not a mistake! */ if (ec->params.priv.speex.pp_state) speex_preprocess_run(ec->params.priv.speex.pp_state, (spx_int16_t *) out); }
/* * Let the Echo Canceller knows that a frame has been captured from * the microphone. */ pj_status_t pjs_echo_canceller::capture(pj_int16_t *rec_frm, unsigned size) { struct frame *oldest_frm; pj_status_t status, rc; if(samples_per_frame!=size) { PJ_LOG(1, (THIS_FILE, "WRONG SIZE ON CAPTURE %d != %d",size,samples_per_frame)); return -1; } for (unsigned i = 0; i < samples_per_frame; i++) { REAL f = hp00.highpass(rec_frm[i]); f = hp0.highpass(f); rec_frm[i] = round(f); } PPJ_WaitAndLock wl(*lock); if (!lat_ready) { /* Prefetching to fill in the desired latency */ PJ_LOG(4, (THIS_FILE, "Prefetching..")); return PJ_SUCCESS; } /* Retrieve oldest frame from the latency buffer */ oldest_frm = lat_buf.next; pj_list_erase(oldest_frm); lock->release(); speex_echo_cancellation(state, (const spx_int16_t*)rec_frm, (const spx_int16_t*)oldest_frm->buf, (spx_int16_t*)tmp_frame); /* Preprocess output */ speex_preprocess_run(preprocess, (spx_int16_t*)tmp_frame); pjmedia_copy_samples(rec_frm, tmp_frame, samples_per_frame); status = PJ_SUCCESS; /* Cancel echo using this reference frame */ lock->acquire(); /* Move one frame from delay buffer to the latency buffer. */ rc = pjmedia_delay_buf_get(delay_buf, oldest_frm->buf); if (rc != PJ_SUCCESS) { /* Ooops.. no frame! */ PJ_LOG(4, (THIS_FILE, "No frame from delay buffer. This will upset EC later")); pjmedia_zero_samples(oldest_frm->buf, samples_per_frame); } pj_list_push_back(&lat_buf, oldest_frm); return status; }
JNIEXPORT jshortArray JNICALL Java_com_pullmi_shanghai_TalkActivity_speex_1EchoCanceller_1process (JNIEnv *env, jobject jobj, jshortArray input_frame, jshortArray echo_frame) { //create native shorts from java shorts jint length = (*env)->GetArrayLength(env, input_frame); jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, 0); jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, 0); SPEEX_FRAME_BYTE = length; short ref[SPEEX_FRAME_BYTE], mic[SPEEX_FRAME_BYTE], out[SPEEX_FRAME_BYTE]; int i; for ( i = 0; i < SPEEX_FRAME_BYTE; ++i) { // code ref[i] = native_echo_frame[i]; mic[i] = native_input_frame[i]; } //allocate memory for output data jshortArray temp = (*env)->NewShortArray(env, length); jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0); if (0 >= length || NULL == native_output_frame) { /* code */ LOGE("create out error"); return; } //call echo cancellation //speex_echo_cancellation(st, native_input_frame,native_echo_frame,native_output_frame); speex_echo_cancellation(st, mic, ref, out); //preprocess output frame //speex_preprocess_run(den, native_output_frame); speex_preprocess_run(den, out); for ( i = 0; i < length; ++i) { //LOGE("output_frame %d = %d",i,out[i]); native_output_frame[i] = out[i]; } //convert native output to java layer output jshortArray output_shorts = (*env)->NewShortArray(env, length); (*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame); //cleanup and return (*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0); (*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0); (*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0); return output_shorts; }
/** * * @param aqi * @param buffer * @param length the length of <tt>buffer</tt> in bytes * @return the sound pressure level in dB of the playback which has been matched * to the specified capture <tt>buffer</tt> for the purposes of echo * cancellation if echo suppression is enabled; otherwise, <tt>0</tt> */ static float AudioQualityImprovement_cancelEchoFromPlay (AudioQualityImprovement *aqi, void *buffer, unsigned long length) { spx_uint32_t sampleCount; float spl; if (aqi->playIsDelaying == JNI_TRUE) return 0; sampleCount = length / sizeof(spx_int16_t); if (aqi->playLength < sampleCount) return 0; /* * Ensure that out exists and is large enough to receive the result of the * echo cancellation. */ if (!(aqi->out) || (aqi->outCapacity < length)) { spx_int16_t *newOut = realloc(aqi->out, length); if (newOut) { aqi->out = newOut; aqi->outCapacity = length; } else return 0; } /* Perform the echo cancellation and return the result in buffer. */ speex_echo_cancellation(aqi->echo, buffer, aqi->play, aqi->out); memcpy(buffer, aqi->out, length); /* * Calculate the sound pressure level in dB to be returned (if echo * suppression is enabled and, thus, needs it). */ spl = (JNI_TRUE == aqi->suppressEcho) ? AudioQualityImprovement_calculateSoundPressureLevel( aqi, aqi->play, sampleCount) : 0; AudioQualityImprovement_popFromPlay(aqi, sampleCount); return spl; }
extern "C" JNIEXPORT void Java_com_haitou_xiaoyoupai_imservice_support_audio_Speex_echoCancellation( JNIEnv *env, jshortArray rec, jshortArray play, jshortArray out) { jshort echo_buf[enc_frame_size]; jshort ref_buf[enc_frame_size]; jshort e_buf[enc_frame_size]; env->GetShortArrayRegion(rec, 0, enc_frame_size, echo_buf); env->GetShortArrayRegion(play, 0, enc_frame_size, ref_buf); speex_echo_cancellation(echoState, echo_buf, ref_buf, e_buf); // speex_preprocess_run(den, e_buf); env->SetShortArrayRegion(out, 0, enc_frame_size, e_buf); }
RTC::ReturnCode_t EchoCanceler::onExecute(RTC::UniqueId ec_id) { RTC_DEBUG(("onExecute start")); if((m_indata.size() > BUFFER_MAX) || (m_outdata.size() > BUFFER_MAX)) { RTC_INFO(("One of buffers exceeded the maximum value. Start clear buffers.")); BufferClr(); } if (( m_indata.size() >= ECHOLEN) && (m_outdata.size() >= ECHOLEN)) { m_mutex.lock(); RTC_DEBUG(("onExecute:mutex lock")); int i; short *inbuffer = new short[ECHOLEN]; short *outbuffer = new short[ECHOLEN]; short *result = new short[ECHOLEN]; for ( i = 0; i < ECHOLEN; i++ ) { inbuffer[i] = m_indata.front(); m_indata.pop_front(); outbuffer[i] = m_outdata.front(); m_outdata.pop_front(); result[i] = 0; } m_mutex.unlock(); RTC_DEBUG(("onExecute:mutex unlock")); speex_echo_cancellation(mp_sest, inbuffer, outbuffer, result); delete[] inbuffer; delete[] outbuffer; m_fout.data.length(ECHOLEN * 2); for ( i = 0; i < ECHOLEN; i++ ) { short val = result[i]; m_fout.data[i*2] = (unsigned char)(val & 0x00ff); m_fout.data[i*2+1] = (unsigned char)((val & 0xff00) >> 8); } delete[] result; setTimestamp( m_fout ); m_foutOut.write(); RTC_DEBUG(("onExecute:writing %d samples", m_fout.data.length() / 2)); } else {
int sound::inputcallback(const void *input, void *output, unsigned long frameCount, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData){ sound * psound = (sound*)userData; if (psound->speexppstate){ speex_preprocess_run(psound->speexppstate, (short *)input); } short * outputbuff = 0; while (1){ int _end = psound->end.load(); int _new = _end + 1; if (_end == psound->begin.load()){ break; } if (_new == 16){ _new = 0; } if (psound->end.compare_exchange_strong(_end, _new)){ outputbuff = (short *)psound->inputbuff[_end].buf; break; } } if (psound->isopenecho == true){ if (outputbuff != 0){ speex_echo_cancellation(psound->speexechostate, (short*)input, outputbuff, (short*)input); } } char tmp[4096]; *((short*)tmp) = psound->inputStreamParameters.channelCount; int len = psound->_encode.encoded((char*)input, frameCount, tmp+2, 4096); psound->sigCapture((char*)tmp, len + 2); return paContinue; }
EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ st->play_buf_started = 1; if (st->play_buf_pos>=st->frame_size) { speex_echo_cancellation(st, rec, st->play_buf, out); st->play_buf_pos -= st->frame_size; for (i=0;i<st->play_buf_pos;i++) st->play_buf[i] = st->play_buf[i+st->frame_size]; } else { speex_warning("No playback frame available (your application is buggy and/or got xruns)"); if (st->play_buf_pos!=0) { speex_warning("internal playback buffer corruption?"); st->play_buf_pos = 0; } for (i=0;i<st->frame_size;i++) out[i] = rec[i]; } }
int main(int argc, char **argv) { FILE *echo_fd, *ref_fd, *e_fd; short echo_buf[NN], ref_buf[NN], e_buf[NN]; SpeexEchoState *st; SpeexPreprocessState *den; int sampleRate = 8000; if (argc != 4) { fprintf(stderr, "testecho mic_signal.sw speaker_signal.sw output.sw\n"); exit(1); } echo_fd = fopen(argv[2], "rb"); ref_fd = fopen(argv[1], "rb"); e_fd = fopen(argv[3], "wb"); st = speex_echo_state_init(NN, TAIL); den = speex_preprocess_state_init(NN, sampleRate); speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate); speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st); while (!feof(ref_fd) && !feof(echo_fd)) { fread(ref_buf, sizeof(short), NN, ref_fd); fread(echo_buf, sizeof(short), NN, echo_fd); speex_echo_cancellation(st, ref_buf, echo_buf, e_buf); speex_preprocess_run(den, e_buf); fwrite(e_buf, sizeof(short), NN, e_fd); } speex_echo_state_destroy(st); speex_preprocess_state_destroy(den); fclose(e_fd); fclose(echo_fd); fclose(ref_fd); return 0; }
extern "C" JNIEXPORT int Java_com_haitou_xiaoyoupai_imservice_support_audio_Speex_echoCancellationEncode( JNIEnv *env, jshortArray rec, jshortArray play, jbyteArray encoded) { jshort echo_buf[enc_frame_size]; jshort ref_buf[enc_frame_size]; jshort e_buf[enc_frame_size]; jbyte output_buffer[enc_frame_size]; env->GetShortArrayRegion(rec, 0, enc_frame_size, echo_buf); env->GetShortArrayRegion(play, 0, enc_frame_size, ref_buf); speex_echo_cancellation(echoState, echo_buf, ref_buf, e_buf); speex_preprocess_run(den, e_buf); speex_bits_reset(&ebits); speex_encode_int(enc_state, e_buf, &ebits); jint tot_bytes = speex_bits_write(&ebits, (char *) output_buffer, enc_frame_size); env->SetByteArrayRegion(encoded, 0, tot_bytes, output_buffer); return (jint) tot_bytes; }
/* inputs[0]= reference signal from far end (sent to soundcard) * inputs[1]= near speech & echo signal (read from soundcard) * outputs[0]= is a copy of inputs[0] to be sent to soundcard * outputs[1]= near end speech, echo removed - towards far end */ static void speex_ec_process(MSFilter *f){ SpeexECState *s=(SpeexECState*)f->data; int nbytes=s->framesize*2; mblk_t *refm; uint8_t *ref,*echo; if (s->bypass_mode) { while((refm=ms_queue_get(f->inputs[0]))!=NULL){ ms_queue_put(f->outputs[0],refm); } while((refm=ms_queue_get(f->inputs[1]))!=NULL){ ms_queue_put(f->outputs[1],refm); } return; } if (f->inputs[0]!=NULL){ if (s->echostarted){ while((refm=ms_queue_get(f->inputs[0]))!=NULL){ refm=audio_flow_controller_process(&s->afc,refm); if (refm){ mblk_t *cp=dupmsg(refm); ms_bufferizer_put(&s->delayed_ref,cp); ms_bufferizer_put(&s->ref,refm); } } }else{ ms_warning("Getting reference signal but no echo to synchronize on."); ms_queue_flush(f->inputs[0]); } } ms_bufferizer_put_from_queue(&s->echo,f->inputs[1]); ref=(uint8_t*)alloca(nbytes); echo=(uint8_t*)alloca(nbytes); while (ms_bufferizer_read(&s->echo,echo,nbytes)==nbytes){ mblk_t *oecho=allocb(nbytes,0); int avail; int avail_samples; if (!s->echostarted) s->echostarted=TRUE; if ((avail=ms_bufferizer_get_avail(&s->delayed_ref))<((s->nominal_ref_samples*2)+nbytes)){ /*we don't have enough to read in a reference signal buffer, inject silence instead*/ avail=nbytes; refm=allocb(nbytes,0); memset(refm->b_wptr,0,nbytes); refm->b_wptr+=nbytes; ms_bufferizer_put(&s->delayed_ref,refm); ms_queue_put(f->outputs[0],dupmsg(refm)); if (!s->using_zeroes){ ms_warning("Not enough ref samples, using zeroes"); s->using_zeroes=TRUE; } }else{ if (s->using_zeroes){ ms_message("Samples are back."); s->using_zeroes=FALSE; } /* read from our no-delay buffer and output */ refm=allocb(nbytes,0); if (ms_bufferizer_read(&s->ref,refm->b_wptr,nbytes)==0){ ms_fatal("Should never happen"); } refm->b_wptr+=nbytes; ms_queue_put(f->outputs[0],refm); } /*now read a valid buffer of delayed ref samples*/ if (ms_bufferizer_read(&s->delayed_ref,ref,nbytes)==0){ ms_fatal("Should never happen"); } avail-=nbytes; avail_samples=avail/2; /*ms_message("avail=%i",avail_samples);*/ if (avail_samples<s->min_ref_samples || s->min_ref_samples==-1){ s->min_ref_samples=avail_samples; } #ifdef EC_DUMP if (s->reffile) fwrite(ref,nbytes,1,s->reffile); if (s->echofile) fwrite(echo,nbytes,1,s->echofile); #endif speex_echo_cancellation(s->ecstate,(short*)echo,(short*)ref,(short*)oecho->b_wptr); speex_preprocess_run(s->den, (short*)oecho->b_wptr); #ifdef EC_DUMP if (s->cleanfile) fwrite(oecho->b_wptr,nbytes,1,s->cleanfile); #endif oecho->b_wptr+=nbytes; ms_queue_put(f->outputs[1],oecho); } /*verify our ref buffer does not become too big, meaning that we are receiving more samples than we are sending*/ if ((((uint32_t)(f->ticker->time - s->flow_control_time)) >= flow_control_interval_ms) && (s->min_ref_samples != -1)) { int diff=s->min_ref_samples-s->nominal_ref_samples; if (diff>(nbytes/2)){ int purge=diff-(nbytes/2); ms_warning("echo canceller: we are accumulating too much reference signal, need to throw out %i samples",purge); audio_flow_controller_set_target(&s->afc,purge,(flow_control_interval_ms*s->samplerate)/1000); } s->min_ref_samples=-1; s->flow_control_time = f->ticker->time; } }
void AudioInput::encodeAudioFrame() { int iArg; ClientUser *p=ClientUser::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) return; sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psMic[i] * psMic[i]); dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); max = 1; for (i=0;i<iFrameSize;i++) max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max); dMaxMic = max; if (psSpeaker && (iEchoChannels > 0)) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); } else { dPeakSpeaker = 0.0; } QMutexLocker l(&qmSpeex); resetAudioProcessor(); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_AGC_GAIN, &iArg); float gainValue = static_cast<float>(iArg); iArg = g.s.iNoiseSuppress - iArg; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f); spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; // clean microphone level: peak of filtered signal attenuated by AGC gain dPeakCleanMic = qMax(dPeakSignal - gainValue, -96.0f); float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakCleanMic / 96.0f); bool bIsSpeech = false; if (level > g.s.fVADmax) bIsSpeech = true; else if (level > g.s.fVADmin && bPreviousVoice) bIsSpeech = true; if (! bIsSpeech) { iHoldFrames++; if (iHoldFrames < g.s.iVoiceHold) bIsSpeech = true; } else { iHoldFrames = 0; } if (g.s.atTransmit == Settings::Continous) bIsSpeech = true; else if (g.s.atTransmit == Settings::PushToTalk) bIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush)); bIsSpeech = bIsSpeech || (g.iPushToTalk > 0); if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && (p->bMute || p->bSuppress)) || g.bPushToMute || (g.iTarget < 0)) { bIsSpeech = false; } if (bIsSpeech) { iSilentFrames = 0; } else { iSilentFrames++; if (iSilentFrames > 500) iFrameCounter = 0; } if (p) { if (! bIsSpeech) p->setTalking(Settings::Passive); else if (g.iTarget == 0) p->setTalking(Settings::Talking); else p->setTalking(Settings::Shouting); } if (g.s.bTxAudioCue && g.uiSession != 0) { AudioOutputPtr ao = g.ao; if (bIsSpeech && ! bPreviousVoice && ao) ao->playSample(g.s.qsTxAudioCueOn); else if (ao && !bIsSpeech && bPreviousVoice && ao) ao->playSample(g.s.qsTxAudioCueOff); } if (! bIsSpeech && ! bPreviousVoice) { iBitrate = 0; if (g.s.iIdleTime && ! g.s.bDeaf && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) { emit doDeaf(); tIdle.restart(); } spx_int32_t increment = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); return; } else { spx_int32_t increment = 12; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); } tIdle.restart(); /* int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL)); qWarning() << "Set Callback" << r; */ unsigned char buffer[512]; int len; if (umtType != MessageHandler::UDPVoiceSpeex) { len = encodeCELTFrame(psSource, buffer); if (len == 0) return; } else { len = encodeSpeexFrame(psSource, buffer); } flushCheck(QByteArray(reinterpret_cast<const char *>(buffer), len), ! bIsSpeech); if (! bIsSpeech) iBitrate = 0; bPreviousVoice = bIsSpeech; }
void AudioInput::encodeAudioFrame() { int iArg; int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) return; sum=1.0f; max = 1; for (i=0;i<iFrameSize;i++) { sum += static_cast<float>(psMic[i] * psMic[i]); max = std::max(static_cast<short>(abs(psMic[i])), max); } dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); dMaxMic = max; if (psSpeaker && (iEchoChannels > 0)) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); } else { dPeakSpeaker = 0.0; } QMutexLocker l(&qmSpeex); resetAudioProcessor(); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_AGC_GAIN, &iArg); float gainValue = static_cast<float>(iArg); iArg = g.s.iNoiseSuppress - iArg; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f); spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; // clean microphone level: peak of filtered signal attenuated by AGC gain dPeakCleanMic = qMax(dPeakSignal - gainValue, -96.0f); float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakCleanMic / 96.0f); bool bIsSpeech = false; if (level > g.s.fVADmax) bIsSpeech = true; else if (level > g.s.fVADmin && bPreviousVoice) bIsSpeech = true; if (! bIsSpeech) { iHoldFrames++; if (iHoldFrames < g.s.iVoiceHold) bIsSpeech = true; } else { iHoldFrames = 0; } if (g.s.atTransmit == Settings::Continuous) bIsSpeech = true; else if (g.s.atTransmit == Settings::PushToTalk) bIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush)); bIsSpeech = bIsSpeech || (g.iPushToTalk > 0); ClientUser *p = ClientUser::get(g.uiSession); if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && (p->bMute || p->bSuppress)) || g.bPushToMute || (g.iTarget < 0)) { bIsSpeech = false; } if (bIsSpeech) { iSilentFrames = 0; } else { iSilentFrames++; if (iSilentFrames > 500) iFrameCounter = 0; } if (p) { if (! bIsSpeech) p->setTalking(Settings::Passive); else if (g.iTarget == 0) p->setTalking(Settings::Talking); else p->setTalking(Settings::Shouting); } if (g.s.bTxAudioCue && g.uiSession != 0) { AudioOutputPtr ao = g.ao; if (bIsSpeech && ! bPreviousVoice && ao) ao->playSample(g.s.qsTxAudioCueOn); else if (ao && !bIsSpeech && bPreviousVoice) ao->playSample(g.s.qsTxAudioCueOff); } if (! bIsSpeech && ! bPreviousVoice) { iBitrate = 0; if (g.s.iaeIdleAction != Settings::Nothing && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) { if (g.s.iaeIdleAction == Settings::Deafen && !g.s.bDeaf) { tIdle.restart(); emit doDeaf(); } else if (g.s.iaeIdleAction == Settings::Mute && !g.s.bMute) { tIdle.restart(); emit doMute(); } } spx_int32_t increment = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); return; } else { spx_int32_t increment = 12; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); } if (bIsSpeech && !bPreviousVoice) { bResetEncoder = true; } tIdle.restart(); EncodingOutputBuffer buffer; Q_ASSERT(buffer.size() >= static_cast<size_t>(iAudioQuality / 100 * iAudioFrames / 8)); int len = 0; bool encoded = true; if (!selectCodec()) return; if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) { len = encodeCELTFrame(psSource, buffer); if (len <= 0) { iBitrate = 0; qWarning() << "encodeCELTFrame failed" << iBufferedFrames << iFrameSize << len; return; } ++iBufferedFrames; } else if (umtType == MessageHandler::UDPVoiceOpus) { encoded = false; opusBuffer.insert(opusBuffer.end(), psSource, psSource + iFrameSize); ++iBufferedFrames; if (!bIsSpeech || iBufferedFrames >= iAudioFrames) { if (iBufferedFrames < iAudioFrames) { // Stuff frame to framesize if speech ends and we don't have enough audio // this way we are guaranteed to have a valid framecount and won't cause // a codec configuration switch by suddenly using a wildly different // framecount per packet. const int missingFrames = iAudioFrames - iBufferedFrames; opusBuffer.insert(opusBuffer.end(), iFrameSize * missingFrames, 0); iBufferedFrames += missingFrames; iFrameCounter += missingFrames; } Q_ASSERT(iBufferedFrames == iAudioFrames); len = encodeOpusFrame(&opusBuffer[0], iBufferedFrames * iFrameSize, buffer); opusBuffer.clear(); if (len <= 0) { iBitrate = 0; qWarning() << "encodeOpusFrame failed" << iBufferedFrames << iFrameSize << len; iBufferedFrames = 0; // These are lost. Make sure not to mess up our sequence counter next flushCheck. return; } encoded = true; } } if (encoded) { flushCheck(QByteArray(reinterpret_cast<char *>(&buffer[0]), len), !bIsSpeech); } if (! bIsSpeech) iBitrate = 0; bPreviousVoice = bIsSpeech; }
/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) { speex_echo_cancellation(st, in, far_end, out); }
void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) { speex_echo_cancellation(ec->params.priv.speex.state, (const spx_int16_t *) rec, (const spx_int16_t *) play, (spx_int16_t *) out); }
int KotiAEC_process(const int16_t* farend, const int16_t* nearend, int16_t* out) { int ret = -1, i = 0, frame_size = 0; switch(aec_core_used) { #ifdef WEBRTC_AEC_CORE_ENABLED case WEBRTC_AEC: if(farend) WebRtcAec_BufferFarend(webrtc_aec_pty.webrtc_aec, farend, webrtc_aec_pty.frame_size); if(!WebRtcAec_Process(webrtc_aec_pty.webrtc_aec, nearend, NULL, out, NULL, webrtc_aec_pty.frame_size, webrtc_aec_pty.sndcard_delay_ms, 0)) { ret = 0; } if(webrtc_aec_pty.webrtc_ns) { WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out, NULL, out, NULL); if(webrtc_aec_pty.frame_size == 160) WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out+80, NULL, out+80, NULL); } if(webrtc_aec_pty.webrtc_agc) { int32_t out_c; uint8_t warn_status; WebRtcAgc_Process(webrtc_aec_pty.webrtc_agc, out, NULL, webrtc_aec_pty.frame_size, out, NULL, 32, &out_c, 1, &warn_status); } // if(webrtc_aec_pty.webrtc_ns) // { // WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out, NULL, out, NULL); // if(webrtc_aec_pty.frame_size == 160) // WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out+80, NULL, out+80, NULL); // } frame_size = webrtc_aec_pty.frame_size; break; case WEBRTC_AECM: /* if(farend) WebRtcAecm_BufferFarend(webrtc_aecm_pty.webrtc_aec, farend, webrtc_aecm_pty.frame_size); if(!WebRtcAecm_Process(webrtc_aecm_pty.webrtc_aec, nearend, NULL, out, webrtc_aecm_pty.frame_size, webrtc_aecm_pty.sndcard_delay_ms)) { ret = 0; } */ memcpy(proc_tmp_buf, nearend, webrtc_aecm_pty.frame_size*2); if(webrtc_aecm_pty.webrtc_ns) { WebRtcNsx_Process((NsxHandle*)webrtc_aecm_pty.webrtc_ns, proc_tmp_buf, NULL, proc_tmp_buf, NULL); if(webrtc_aecm_pty.frame_size == 160) WebRtcNsx_Process((NsxHandle*)webrtc_aecm_pty.webrtc_ns, proc_tmp_buf+80, NULL, proc_tmp_buf+80, NULL); } if(webrtc_aecm_pty.webrtc_agc) { int32_t out_c; uint8_t warn_status; WebRtcAgc_Process(webrtc_aecm_pty.webrtc_agc, proc_tmp_buf, NULL, webrtc_aecm_pty.frame_size, proc_tmp_buf, NULL, 32, &out_c, 1, &warn_status); } // AEC if(farend) WebRtcAecm_BufferFarend(webrtc_aecm_pty.webrtc_aec, farend, webrtc_aecm_pty.frame_size); if(!WebRtcAecm_Process(webrtc_aecm_pty.webrtc_aec, proc_tmp_buf, NULL, out, webrtc_aecm_pty.frame_size, webrtc_aecm_pty.sndcard_delay_ms)) { ret = 0; } frame_size = webrtc_aecm_pty.frame_size; break; #endif case SPEEX_AEC: default: #ifdef OLD_SPEEX_AEC speex_echo_cancel((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, farend, out, speex_aec_pty.nosie); if(speex_preprocess((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out, speex_aec_pty.nosie) == 1) ret = 0; #else if(farend) speex_echo_cancellation((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, farend, out); else speex_echo_capture((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, out); // speex_preprocess_estimate_update((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out); if(speex_preprocess_run((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out) == 1) ret = 0; #endif frame_size = speex_aec_pty.frame_size; break; } // if the output sound needed amplify if(output_sound_amplification != 1.0f && output_sound_amplification > 0) { for(; i<frame_size; ++i) out[i] = out[i]*output_sound_amplification; } return ret; }
void AudioInput::encodeAudioFrame() { int iArg; ClientPlayer *p=ClientPlayer::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psMic[i] * psMic[i]); dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakMic < -96.0f) dPeakMic = -96.0f; max = 1; for (i=0;i<iFrameSize;i++) max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max); dMaxMic = max; if (g.bEchoTest) { STACKVAR(float, fft, iFrameSize); STACKVAR(float, power, iFrameSize); float scale = 1.f / static_cast<float>(iFrameSize); for (i=0;i<iFrameSize;i++) fft[i] = static_cast<float>(psMic[i]) * scale; mumble_drft_forward(&fftTable, fft); float mp = 0.0f; int bin = 0; power[0]=power[1]=0.0f; for (i=2;i < iFrameSize / 2;i++) { power[i] = sqrtf(fft[2*i]*fft[2*i]+fft[2*i-1]*fft[2*i-1]); if (power[i] > mp) { bin = i; mp = power[i]; } } for (i=2;i< iFrameSize / 2;i++) { if (power[i] * 2 > mp) { if (i != bin) bin = 0; } } iBestBin = bin * 2; } if (iEchoChannels > 0) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakSpeaker < -96.0f) dPeakSpeaker = -96.0f; } else { dPeakSpeaker = 0.0; } QMutexLocker l(&qmSpeex); if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, SAMPLE_RATE); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g.s.iMinLoudness); iArg = lroundf(floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init(iFrameSize, iFrameSize*10); iArg = SAMPLE_RATE; speex_echo_ctl(sesEcho, SPEEX_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); jitter_buffer_reset(jb); qWarning("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } iFrames = 0; speex_bits_reset(&sbBits); bResetProcessor = false; } int iIsSpeech; if (sesEcho) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g.s.fVADmax) iIsSpeech = 1; else if (level > g.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; } if (g.s.atTransmit == Settings::Continous) iIsSpeech = 1; else if (g.s.atTransmit == Settings::PushToTalk) iIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush)); iIsSpeech = iIsSpeech || (g.iPushToTalk > 0) || (g.iAltSpeak > 0); if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && p->bMute) || g.bPushToMute) { iIsSpeech = 0; } if (iIsSpeech) { iSilentFrames = 0; } else { iSilentFrames++; if (iSilentFrames > 200) iFrameCounter = 0; } if (p) p->setTalking(iIsSpeech, (g.iAltSpeak > 0)); if (g.s.bPushClick && (g.s.atTransmit == Settings::PushToTalk)) { AudioOutputPtr ao = g.ao; if (iIsSpeech && ! bPreviousVoice && ao) ao->playSine(400.0f,1200.0f,5); else if (ao && !iIsSpeech && bPreviousVoice && ao) ao->playSine(620.0f,-1200.0f,5); } if (! iIsSpeech && ! bPreviousVoice) { iBitrate = 0; if (g.s.iIdleTime && ! g.s.bMute && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) { emit doMute(); tIdle.restart(); } return; } bPreviousVoice = iIsSpeech; tIdle.restart(); if (! iIsSpeech) { memset(psMic, 0, sizeof(short) * iFrameSize); } if (g.s.bTransmitPosition && g.p && ! g.bCenterPosition && (iFrames == 0) && g.p->fetch()) { QByteArray q; QDataStream ds(&q, QIODevice::WriteOnly); ds << g.p->fPosition[0]; ds << g.p->fPosition[1]; ds << g.p->fPosition[2]; speex_bits_pack(&sbBits, 13, 5); speex_bits_pack(&sbBits, q.size(), 4); const unsigned char *d=reinterpret_cast<const unsigned char*>(q.data()); for (i=0;i<q.size();i++) { speex_bits_pack(&sbBits, d[i], 8); } } speex_encode_int(esEncState, psSource, &sbBits); iFrames++; speex_encoder_ctl(esEncState, SPEEX_GET_BITRATE, &iBitrate); flushCheck(); }