RTC::ReturnCode_t WebRTCVAD::onExecute(RTC::UniqueId ec_id) { RTC_DEBUG(("onExecute start")); m_mutex.lock(); RTC_DEBUG(("onExecute:mutex lock")); if (m_inbuffer.size() >= WINLEN) { int i; WebRtc_Word16 *data;//ADDED data = new WebRtc_Word16[WINLEN];//CHANGED std::list<short>::iterator pbuffer; // sliding window with half overlap for (i = 0; i < WINLEN/2; i++) { data[i] = m_inbuffer.front(); m_inbuffer.pop_front(); } pbuffer = m_inbuffer.begin(); for (i = WINLEN/2; i < WINLEN; i++) { data[i] = *pbuffer; pbuffer++; } WebRtc_Word16 vad = WebRtcVad_Process(handle, 16000, data, WINLEN); m_filterdatabuffer.push_back(data);//ADDED m_filterflagbuffer.push_back(vad);//ADDED while (m_filterdatabuffer.size() > m_bufferlen) {//CHANGED Begin WebRtc_Word16 vad1 = 0; std::list<WebRtc_Word16>::iterator it = m_filterflagbuffer.begin(); for (i = 0; i < m_bufferlen; i++) { WebRtc_Word16 vad2 = *it; if (vad2 > 0) { vad1 = vad2; } it++; } //RTC_INFO(("vad: %i, vad(filtered): %i", vad, vad1)); m_filterflagbuffer.pop_front(); data = m_filterdatabuffer.front(); m_filterdatabuffer.pop_front(); // output the resulting signal m_fout.data.length(WINLEN); if (vad1 > 0) { for (i = 0; i < WINLEN/2; i++) { m_fout.data[i*2] = (unsigned char)(data[i] & 0x00ff); m_fout.data[i*2+1] = (unsigned char)((data[i] & 0xff00) >> 8); } } else { for (i = 0; i < WINLEN/2; i++) { m_fout.data[i*2] = i % 2; // avoid julius zero stripping problem m_fout.data[i*2+1] = 0; } } delete [] data; setTimestamp(m_fout); m_foutOut.write(); }//CHANGED End
int32_t AudioDeviceImpl::RecordedDataIsAvailable( const void* audioSamples, const uint32_t nSamples, const uint8_t nBytesPerSample, const uint8_t nChannels, const uint32_t samplesPerSec, const uint32_t totalDelayMS, const int32_t clockDrift, const uint32_t currentMicLevel, const bool keyPressed, uint32_t& newMicLevel) { int ret; ret = WebRtcVad_Process(vad, samplesPerSec, (int16_t*)audioSamples, nSamples); if(ret == 0){ if(callback){ callback->input(NULL, 0); } return 0; } if(nBytesPerSample != sizeof(int16_t)){ log_error("invalid nBytesPerSample: %d", nBytesPerSample); return -1; } if(nChannels != 1){ log_error("invalid nChannels: %d", nChannels); return -1; } const int16_t *samples; int16_t tmp[MAX_SAMPLES_PER_10MS]; if(!resampler_in){ samples = (const int16_t*)audioSamples; if(nSamples != this->input_samples_per_10ms){ log_error("invalid nSamples: %d", nSamples); return -1; } }else{ int out_len = 0; int ret = resampler_in->Push((const int16_t*)audioSamples, nSamples, tmp, MAX_SAMPLES_PER_10MS, out_len); if(ret != 0 || out_len != this->input_samples_per_10ms){ log_error("resample error!"); return -1; } samples = tmp; } if(callback){ callback->input(samples, this->input_samples_per_10ms); } return 0; }
int filter_audio(Filter_Audio *f_a, int16_t *data, unsigned int samples) { if (!f_a) { return -1; } unsigned int nsx_samples = f_a->fs / 100; if (!samples || (samples % nsx_samples) != 0) { return -1; } _Bool resample = 0; unsigned int resampled_samples = 0; if (f_a->fs != 16000) { samples = (samples / nsx_samples) * 160; nsx_samples = 160; resample = 1; } unsigned int temp_samples = samples; unsigned int smp = f_a->fs / 100; int novoice = 1; int16_t *d_l = (int16_t *)STACK_ALLOC(nsx_samples * (2 * sizeof(int16_t) + 2 * sizeof(float)) + smp * sizeof(float)); int16_t *temp = d_l + nsx_samples; float *d_f_l = (float *)(temp + nsx_samples); float *d_f_h = d_f_l + nsx_samples; float *d_f_u = d_f_h + nsx_samples; while (temp_samples) { int16_t *d_h = NULL; memset(temp, 0, nsx_samples*sizeof(int16_t)); if (resample) { d_h = temp; downsample_audio(f_a, d_l, d_h, data + resampled_samples, smp); } else { memcpy(d_l, data + (samples - temp_samples), nsx_samples * sizeof(int16_t)); } if(f_a->vad_enabled){ if(WebRtcVad_Process(f_a->Vad_handle, 16000, d_l, nsx_samples) == 1){ novoice = 0; } } else { novoice = 0; } if (f_a->gain_enabled) { int32_t inMicLevel = 128, outMicLevel; if (WebRtcAgc_VirtualMic(f_a->gain_control, d_l, d_h, nsx_samples, inMicLevel, &outMicLevel) == -1) return -1; } S16ToFloatS16(d_l, nsx_samples, d_f_l); memset(d_f_h, 0, nsx_samples*sizeof(float)); if (resample) { S16ToFloatS16(d_h, nsx_samples, d_f_h); } if (f_a->echo_enabled) { if (WebRtcAec_Process(f_a->echo_cancellation, d_f_l, d_f_h, d_f_l, d_f_h, nsx_samples, f_a->msInSndCardBuf, 0) == -1) { return -1; } if (resample) { FloatS16ToS16(d_f_h, nsx_samples, d_h); } FloatS16ToS16(d_f_l, nsx_samples, d_l); } if (f_a->noise_enabled) { if (WebRtcNsx_Process(f_a->noise_sup_x, d_l, d_h, d_l, d_h) == -1) { return -1; } } if (f_a->gain_enabled) { int32_t inMicLevel = 128, outMicLevel; uint8_t saturationWarning; if (WebRtcAgc_Process(f_a->gain_control, d_l, d_h, nsx_samples, d_l, d_h, inMicLevel, &outMicLevel, 0, &saturationWarning) == -1) { return -1; } } if (resample) { upsample_audio(f_a, data + resampled_samples, smp, d_l, d_h, nsx_samples); S16ToFloat(data + resampled_samples, smp, d_f_u); run_filter_zam(&f_a->hpfa, d_f_u, smp); run_filter_zam(&f_a->hpfb, d_f_u, smp); if (f_a->lowpass_enabled) { run_filter_zam(&f_a->lpfa, d_f_u, smp); run_filter_zam(&f_a->lpfb, d_f_u, smp); } run_saturator_zam(d_f_u, smp); FloatToS16(d_f_u, smp, data + resampled_samples); resampled_samples += smp; } else { S16ToFloat(d_l, nsx_samples, d_f_l); run_filter_zam(&f_a->hpfa, d_f_l, nsx_samples); run_filter_zam(&f_a->hpfb, d_f_l, nsx_samples); if (f_a->lowpass_enabled) { run_filter_zam(&f_a->lpfa, d_f_l, nsx_samples); run_filter_zam(&f_a->lpfb, d_f_l, nsx_samples); } run_saturator_zam(d_f_l, nsx_samples); FloatToS16(d_f_l, nsx_samples, d_l); memcpy(data + (samples - temp_samples), d_l, nsx_samples * sizeof(int16_t)); } temp_samples -= nsx_samples; } return !novoice; }