bool pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec, pa_sample_spec *rec_ss, pa_channel_map *rec_map, pa_sample_spec *play_ss, pa_channel_map *play_map, pa_sample_spec *out_ss, pa_channel_map *out_map, uint32_t *nframes, const char *args) { int rate; uint32_t frame_size_ms, filter_size_ms; pa_modargs *ma; if (!(ma = pa_modargs_new(args, valid_modargs))) { pa_log("Failed to parse submodule arguments."); goto fail; } filter_size_ms = DEFAULT_FILTER_SIZE_MS; if (pa_modargs_get_value_u32(ma, "filter_size_ms", &filter_size_ms) < 0 || filter_size_ms < 1 || filter_size_ms > 2000) { pa_log("Invalid filter_size_ms specification"); goto fail; } frame_size_ms = DEFAULT_FRAME_SIZE_MS; if (pa_modargs_get_value_u32(ma, "frame_size_ms", &frame_size_ms) < 0 || frame_size_ms < 1 || frame_size_ms > 200) { pa_log("Invalid frame_size_ms specification"); goto fail; } pa_speex_ec_fixate_spec(rec_ss, rec_map, play_ss, play_map, out_ss, out_map); rate = out_ss->rate; *nframes = pa_echo_canceller_blocksize_power2(rate, frame_size_ms); pa_log_debug ("Using nframes %d, channels %d, rate %d", *nframes, out_ss->channels, out_ss->rate); ec->params.priv.speex.state = speex_echo_state_init_mc(*nframes, (rate * filter_size_ms) / 1000, out_ss->channels, out_ss->channels); if (!ec->params.priv.speex.state) goto fail; speex_echo_ctl(ec->params.priv.speex.state, SPEEX_ECHO_SET_SAMPLING_RATE, &rate); if (!pa_speex_ec_preprocessor_init(ec, out_ss, *nframes, ma)) goto fail; pa_modargs_free(ma); return true; fail: if (ma) pa_modargs_free(ma); if (ec->params.priv.speex.pp_state) { speex_preprocess_state_destroy(ec->params.priv.speex.pp_state); ec->params.priv.speex.pp_state = NULL; } if (ec->params.priv.speex.state) { speex_echo_state_destroy(ec->params.priv.speex.state); ec->params.priv.speex.state = NULL; } return false; }
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec, pa_sample_spec *source_ss, pa_channel_map *source_map, pa_sample_spec *sink_ss, pa_channel_map *sink_map, uint32_t *blocksize, const char *args) { int framelen, y, rate; uint32_t frame_size_ms, filter_size_ms; pa_modargs *ma; if (!(ma = pa_modargs_new(args, valid_modargs))) { pa_log("Failed to parse submodule arguments."); goto fail; } filter_size_ms = DEFAULT_FILTER_SIZE_MS; if (pa_modargs_get_value_u32(ma, "filter_size_ms", &filter_size_ms) < 0 || filter_size_ms < 1 || filter_size_ms > 2000) { pa_log("Invalid filter_size_ms specification"); goto fail; } frame_size_ms = DEFAULT_FRAME_SIZE_MS; if (pa_modargs_get_value_u32(ma, "frame_size_ms", &frame_size_ms) < 0 || frame_size_ms < 1 || frame_size_ms > 200) { pa_log("Invalid frame_size_ms specification"); goto fail; } pa_speex_ec_fixate_spec(source_ss, source_map, sink_ss, sink_map); rate = source_ss->rate; framelen = (rate * frame_size_ms) / 1000; /* framelen should be a power of 2, round down to nearest power of two */ y = 1 << ((8 * sizeof (int)) - 2); while (y > framelen) y >>= 1; framelen = y; *blocksize = framelen * pa_frame_size (source_ss); pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, *blocksize, source_ss->channels, source_ss->rate); ec->params.priv.speex.state = speex_echo_state_init_mc (framelen, (rate * filter_size_ms) / 1000, source_ss->channels, source_ss->channels); if (!ec->params.priv.speex.state) goto fail; speex_echo_ctl(ec->params.priv.speex.state, SPEEX_ECHO_SET_SAMPLING_RATE, &rate); pa_modargs_free(ma); return TRUE; fail: if (ma) pa_modargs_free(ma); return FALSE; }
void AudioInput::resetAudioProcessor() { if (!bResetProcessor) return; int iArg; if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g.s.iMinLoudness); iArg = iroundf(floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = -60; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_DECREMENT, &iArg); iArg = g.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize * 10, 1, bEchoMulti ? iEchoChannels : 1); iArg = iSampleRate; speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); qWarning("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } bResetEncoder = true; bResetProcessor = false; }
pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec, pa_sample_spec *source_ss, pa_channel_map *source_map, pa_sample_spec *sink_ss, pa_channel_map *sink_map, uint32_t *nframes, const char *args) { int rate; uint32_t y, frame_size_ms, filter_size_ms; pa_modargs *ma; if (!(ma = pa_modargs_new(args, valid_modargs))) { pa_log("Failed to parse submodule arguments."); goto fail; } filter_size_ms = DEFAULT_FILTER_SIZE_MS; if (pa_modargs_get_value_u32(ma, "filter_size_ms", &filter_size_ms) < 0 || filter_size_ms < 1 || filter_size_ms > 2000) { pa_log("Invalid filter_size_ms specification"); goto fail; } frame_size_ms = DEFAULT_FRAME_SIZE_MS; if (pa_modargs_get_value_u32(ma, "frame_size_ms", &frame_size_ms) < 0 || frame_size_ms < 1 || frame_size_ms > 200) { pa_log("Invalid frame_size_ms specification"); goto fail; } pa_speex_ec_fixate_spec(source_ss, source_map, sink_ss, sink_map); rate = source_ss->rate; *nframes = (rate * frame_size_ms) / 1000; /* nframes should be a power of 2, round down to nearest power of two */ y = 1 << ((8 * sizeof (uint32_t)) - 2); while (y > *nframes) y >>= 1; *nframes = y; pa_log_debug ("Using nframes %d, channels %d, rate %d", *nframes, source_ss->channels, source_ss->rate); ec->params.priv.speex.state = speex_echo_state_init_mc (*nframes, (rate * filter_size_ms) / 1000, source_ss->channels, source_ss->channels); if (!ec->params.priv.speex.state) goto fail; speex_echo_ctl(ec->params.priv.speex.state, SPEEX_ECHO_SET_SAMPLING_RATE, &rate); if (!pa_speex_ec_preprocessor_init(ec, source_ss, *nframes, ma)) goto fail; pa_modargs_free(ma); return TRUE; fail: if (ma) pa_modargs_free(ma); if (ec->params.priv.speex.pp_state) { speex_preprocess_state_destroy(ec->params.priv.speex.pp_state); ec->params.priv.speex.pp_state = NULL; } if (ec->params.priv.speex.state) { speex_echo_state_destroy(ec->params.priv.speex.state); ec->params.priv.speex.state = NULL; } return FALSE; }
/* * Create the AEC. */ PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool, unsigned clock_rate, unsigned channel_count, unsigned samples_per_frame, unsigned tail_ms, unsigned options, void **p_echo ) { speex_ec *echo; int sampling_rate; *p_echo = NULL; echo = PJ_POOL_ZALLOC_T(pool, speex_ec); PJ_ASSERT_RETURN(echo != NULL, PJ_ENOMEM); echo->samples_per_frame = samples_per_frame; echo->options = options; #if 0 echo->state = speex_echo_state_init_mc(echo->samples_per_frame, clock_rate * tail_ms / 1000, channel_count, channel_count); #else if (channel_count != 1) { PJ_LOG(2,("echo_speex.c", "Multichannel EC is not supported by this " "echo canceller. It may not work.")); } echo->state = speex_echo_state_init(echo->samples_per_frame, clock_rate * tail_ms / 1000); #endif if (echo->state == NULL) { return PJ_ENOMEM; } /* Set sampling rate */ sampling_rate = clock_rate; speex_echo_ctl(echo->state, SPEEX_ECHO_SET_SAMPLING_RATE, &sampling_rate); echo->preprocess = speex_preprocess_state_init(echo->samples_per_frame, clock_rate); if (echo->preprocess == NULL) { speex_echo_state_destroy(echo->state); return PJ_ENOMEM; } /* Disable all preprocessing, we only want echo cancellation */ #if 0 disabled = 0; enabled = 1; speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DENOISE, &enabled); speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_AGC, &disabled); speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_VAD, &disabled); speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DEREVERB, &enabled); #endif /* Control echo cancellation in the preprocessor */ speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, echo->state); /* Create temporary frame for echo cancellation */ echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2*samples_per_frame); PJ_ASSERT_RETURN(echo->tmp_frame != NULL, PJ_ENOMEM); /* Done */ *p_echo = echo; return PJ_SUCCESS; }
void AudioInput::encodeAudioFrame() { int iArg; //ClientUser *p=ClientUser::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psMic[i] * psMic[i]); iLevel = sqrtf(sum / static_cast<float>(iFrameSize)) * 9/32768.0f; dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakMic < -96.0f) dPeakMic = -96.0f; max = 1; for (i=0;i<iFrameSize;i++) max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max); dMaxMic = max; if (psSpeaker && (iEchoChannels > 0)) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f); if (dPeakSpeaker < -96.0f) dPeakSpeaker = -96.0f; } else { dPeakSpeaker = 0.0; }*/ MutexLocker l(&qmSpeex); bResetProcessor = false; if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); //speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness); iArg = (floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g_struct.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1); iArg = iSampleRate; speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); Trace("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } bResetProcessor = false; } int iIsSpeech=1; psSource = psMic; /* //回音消除和音质处理 if (bEcho && sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; }*/ /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g_struct.s.fVADmax) iIsSpeech = 1; else if (level > g_struct.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g_struct.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; }*/ //tIdle.restart(); /* int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL)); qWarning() << "Set Callback" << r; */ //编码 speex或者CELT unsigned char buffer[512]; int len; if (umtType != MessageHandler::UDPVoiceSpeex) { if (cCodec == NULL) { cCodec = new CELTCodec; umtType = MessageHandler::UDPVoiceCELT; ceEncoder = cCodec->encoderCreate(); } else if (cCodec && ! bPreviousVoice) { cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE); } cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0)); cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality)); len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 100, buffer, 512); iBitrate = len * 100 * 8; } else { int vbr = 0; speex_encoder_ctl(esSpeex, SPEEX_GET_VBR_MAX_BITRATE, &vbr); if (vbr != iAudioQuality) { vbr = iAudioQuality; speex_encoder_ctl(esSpeex, SPEEX_SET_VBR_MAX_BITRATE, &vbr); } if (! bPreviousVoice) speex_encoder_ctl(esSpeex, SPEEX_RESET_STATE, NULL); speex_encode_int(esSpeex, psSource, &sbBits); len = speex_bits_write(&sbBits, reinterpret_cast<char *>(buffer), 127); iBitrate = len * 50 * 8; speex_bits_reset(&sbBits); } QByteArray qba; for(int i=0; i<len; i++) { qba.push_back(buffer[i]); } flushCheck(qba, false); if (! iIsSpeech) iBitrate = 0; bPreviousVoice = iIsSpeech; }
void AudioInput::encodeAudioFrame() { int iArg; //ClientUser *p=ClientUser::get(g.uiSession); int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) { return; } MutexLocker l(&qmSpeex); bResetProcessor = false; if (bResetProcessor) { if (sppPreprocess) speex_preprocess_state_destroy(sppPreprocess); if (sesEcho) speex_echo_state_destroy(sesEcho); sppPreprocess = speex_preprocess_state_init(iFrameSize, iSampleRate); iArg = 1; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg); //speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg); iArg = 30000; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg); float v = 30000.0f / static_cast<float>(g_struct.s.iMinLoudness); iArg = (floorf(20.0f * log10f(v))); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg); iArg = g_struct.s.iNoiseSuppress; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (iEchoChannels > 0) { sesEcho = speex_echo_state_init_mc(iFrameSize, iFrameSize*10, 1, bEchoMulti ? iEchoChannels : 1); iArg = iSampleRate; speex_echo_ctl(sesEcho, SPEEX_ECHO_SET_SAMPLING_RATE, &iArg); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho); Trace("AudioInput: ECHO CANCELLER ACTIVE"); } else { sesEcho = NULL; } bResetProcessor = false; } int iIsSpeech=1; psSource = psMic; /* //回音消除和音质处理 if (bEcho && sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); iIsSpeech=speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { iIsSpeech=speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; }*/ /*sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal=20.0f*log10f(micLevel / 32768.0f); if (dPeakSignal < -96.0f) dPeakSignal = -96.0f; spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; float level = (g_struct.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f); if (level > g_struct.s.fVADmax) iIsSpeech = 1; else if (level > g_struct.s.fVADmin && bPreviousVoice) iIsSpeech = 1; else iIsSpeech = 0; if (! iIsSpeech) { iHoldFrames++; if (iHoldFrames < g_struct.s.iVoiceHold) iIsSpeech=1; } else { iHoldFrames = 0; }*/ //tIdle.restart(); /* int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL)); qWarning() << "Set Callback" << r; */ //编码 speex或者CELT unsigned char buffer[512]; int len; if (umtType == MessageHandler::UDPVoiceCELT) { if (cCodec == NULL) { cCodec = CELTCodec::instance(); ceEncoder = cCodec->encoderCreate(); } else if (cCodec && ! bPreviousVoice) { cCodec->encoder_ctl(ceEncoder, CELT_RESET_STATE); } cCodec->encoder_ctl(ceEncoder, CELT_SET_PREDICTION(0)); cCodec->encoder_ctl(ceEncoder,CELT_SET_BITRATE(iAudioQuality)); len = cCodec->encode(ceEncoder, psSource, SAMPLE_RATE / 50, buffer, 512); iBitrate = len * 50 * 8; /*//////////////////////////////////////////////////////////////////////// if (m_de_cdDecoder == NULL) { m_de_cdDecoder = cCodec->decoderCreate(); } celt_int16 fout2[2560]={0}; if (cCodec) { int len3 = cCodec->decode(m_de_cdDecoder, buffer, len, fout2, SAMPLE_RATE / 50); len3++; UINT dwDataWrote; if( FAILED(g_pWaveFile.Write( SAMPLE_RATE / 50*2*2, (BYTE*)fout2, &dwDataWrote ) )) { int a=0; a++; } else { OutputDebugString(L"plushuwav g_pWaveFile.Write 3"); } } ///////////////////////////////////////////////////////////////////////*/ } else { assert(0); } QByteArray qba; for(int i=0; i<len; i++) { qba.push_back(buffer[i]); } flushCheck(qba, false); if (! iIsSpeech) iBitrate = 0; bPreviousVoice = iIsSpeech; }
/** Creates a new echo canceller state */ EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) { return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); }