/* * The callback called by sound recorder when it has finished capturing a * frame. */ static pj_status_t rec_cb(void *user_data, pjmedia_frame *frame) { pjmedia_snd_port *snd_port = (pjmedia_snd_port*) user_data; pjmedia_port *port; pjmedia_clock_src_update(&snd_port->cap_clocksrc, &frame->timestamp); port = snd_port->port; if (port == NULL) return PJ_SUCCESS; /* Cancel echo */ if (snd_port->ec_state && !snd_port->ec_suspended) { pjmedia_echo_capture(snd_port->ec_state, (pj_int16_t*) frame->buf, 0); } #ifdef MY_SAVE_FILE_BEFORE_SPEEX fwrite(frame->buf,1,frame->size,fd_bfspeex); #endif if (pjmedia_audio_use_speex_ns == PJ_TRUE){ speex_preprocess_run(snd_port->speex_st, frame->buf); //PJ_LOG(5,(THIS_FILE, "frame->size:%d",frame->size)); } #ifdef MY_SAVE_FILE_SEND fwrite(frame->buf,1,frame->size,fd_save); #endif pjmedia_port_put_frame(port, frame); return PJ_SUCCESS; }
JNIEXPORT jint JNICALL Java_com_ccut_shangri_audiorecorder_IMSpeexDSPAndEnc_denoiseAndEnc (JNIEnv *env, jobject obj, jshortArray lin, jint offset, jint size) { LOGD("IMSpeexDSPAndEnc jni start"); if (!codec_open) return -1; short *buffer=(short *)malloc(sizeof(short) * size); env->GetShortArrayRegion(lin, offset, size, buffer); int i = 0; for (i = 0; i < size/frame_size; i++) { //降噪增益处理 if (speex_preprocess_run(denoise_state, (buffer + i * frame_size)) == 0) { LOGD("IMSpeexDSP_denoise 静音或噪音!"); } handleAmrEnc("/sdcard/amrQAQ.amr", 1, buffer + i * frame_size, frame_size); ++count; } free(buffer); LOGD("count = %d", count); LOGD("IMSpeexDSPAndEnc jni end"); }
bool NetworkSoundRecorder::onProcessSamples(const cpp3ds::Int16 *samples, std::size_t sampleCount) { m_samples.insert(m_samples.end(), samples, samples + sampleCount); std::vector<char> encodedSamples; char out[m_frameSize*2]; int size = m_samples.size(); int i = 0; while (size >= m_frameSize) { spx_int16_t* audioFrame = &m_samples[0] + i * m_frameSize; speex_preprocess_run(m_speexPreprocessState, audioFrame); speex_bits_reset(&m_speexBits); speex_encode_int(m_speexState, audioFrame, &m_speexBits); char bytes = speex_bits_write(&m_speexBits, out, sizeof(out)); encodedSamples.push_back(bytes); encodedSamples.insert(encodedSamples.end(), out, out + bytes); i++; size -= m_frameSize; } std::vector<cpp3ds::Int16>(m_samples.end() - size, m_samples.end()).swap(m_samples); std::cout << "size: " << sampleCount * sizeof(cpp3ds::Int16) << std::endl; m_context->client.sendVoiceData(m_context->name.toAnsiString(), &encodedSamples[0], encodedSamples.size()); return true; }
/* * Perform echo cancellation. */ PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, unsigned options, void *reserved ) { speex_ec *echo = (speex_ec*) state; /* Sanity checks */ PJ_ASSERT_RETURN(echo && rec_frm && play_frm && options==0 && reserved==NULL, PJ_EINVAL); /* Cancel echo, put output in temporary buffer */ speex_echo_cancellation(echo->state, (const spx_int16_t*)rec_frm, (const spx_int16_t*)play_frm, (spx_int16_t*)echo->tmp_frame); /* Preprocess output */ speex_preprocess_run(echo->preprocess, (spx_int16_t*)echo->tmp_frame); /* Copy temporary buffer back to original rec_frm */ pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame); return PJ_SUCCESS; }
/* * Drain out remaining samples if the effect generates any. */ static int drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) { priv_t* p = (priv_t*)effp->priv; size_t obuf_pos = 0; size_t obuf_end = *osamp; size_t i; size_t end_pos; /* Input that hasn't been processed yet? */ if (p->buffer_ipos != 0) { /* DSP only works on full frames, so fill the remaining space with 0s. */ for (i = p->buffer_ipos; i < p->buffer_end; i++) p->buffer[i] = 0; speex_preprocess_run(p->sps, p->buffer); p->buffer_end = p->buffer_ipos; p->buffer_ipos = 0; p->buffer_opos = 0; } end_pos = obuf_pos + min(p->buffer_end - p->buffer_opos, obuf_end - obuf_pos); for (; obuf_pos < end_pos; obuf_pos++, p->buffer_opos++) obuf[obuf_pos] = SOX_SIGNED_16BIT_TO_SAMPLE(p->buffer[p->buffer_opos], dummy); *osamp = obuf_pos; return p->buffer_opos != p->buffer_end ? SOX_SUCCESS : SOX_EOF; }
int main() { short in[NN]; int i; SpeexPreprocessState *st; int count=0; float f; st = speex_preprocess_state_init(NN, 8000); i=1; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i); i=0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i); i=8000; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC_LEVEL, &i); i=0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i); f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f); f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f); while (1) { int vad; fread(in, sizeof(short), NN, stdin); if (feof(stdin)) break; vad = speex_preprocess_run(st, in); /*fprintf (stderr, "%d\n", vad);*/ fwrite(in, sizeof(short), NN, stdout); count++; } speex_preprocess_state_destroy(st); return 0; }
void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) { speex_echo_cancellation(ec->params.priv.speex.state, (const spx_int16_t *) rec, (const spx_int16_t *) play, (spx_int16_t *) out); /* preprecessor is run after AEC. This is not a mistake! */ if (ec->params.priv.speex.pp_state) speex_preprocess_run(ec->params.priv.speex.pp_state, (spx_int16_t *) out); }
static int tdav_speex_denoise_process_playback(tmedia_denoise_t* self, void* audio_frame) { tdav_speex_denoise_t *denoiser = (tdav_speex_denoise_t *)self; if(denoiser->preprocess_state_playback){ speex_preprocess_run(denoiser->preprocess_state_playback, audio_frame); } return 0; }
UtlBoolean MprSpeexPreprocess::doProcessFrame(MpBufPtr inBufs[], MpBufPtr outBufs[], int inBufsSize, int outBufsSize, UtlBoolean isEnabled, int samplesPerFrame, int samplesPerSecond) { MpAudioBufPtr inputBuffer; spx_int32_t* echoResidue=NULL; bool res = false; // We don't need to do anything if we don't have an output. if (outBufsSize != 1) return FALSE; // Get incoming data inputBuffer.swap(inBufs[0]); int isVoiceActive = 1; // If the object is not enabled or we don't have valid input, pass input to output if ( isEnabled && inputBuffer.isValid() && (inputBuffer->getSamplesNumber() == samplesPerFrame)) { // This buffer will be modified in place. Make sure we're the only owner. res = inputBuffer.requestWrite(); assert(res); // Get Echo residue if we have any isVoiceActive = speex_preprocess_run(mpPreprocessState, (spx_int16_t*)inputBuffer->getSamplesPtr()); } if (inputBuffer.isValid()) { if (m_bVadEnabled) { if (isVoiceActive) { inputBuffer->setSpeechType(MP_SPEECH_ACTIVE); } else { inputBuffer->setSpeechType(MP_SPEECH_SILENT); } } else { inputBuffer->setSpeechType(MP_SPEECH_UNKNOWN); } } outBufs[0].swap(inputBuffer); return TRUE; }
/* * Let the Echo Canceller knows that a frame has been captured from * the microphone. */ pj_status_t pjs_echo_canceller::capture(pj_int16_t *rec_frm, unsigned size) { struct frame *oldest_frm; pj_status_t status, rc; if(samples_per_frame!=size) { PJ_LOG(1, (THIS_FILE, "WRONG SIZE ON CAPTURE %d != %d",size,samples_per_frame)); return -1; } for (unsigned i = 0; i < samples_per_frame; i++) { REAL f = hp00.highpass(rec_frm[i]); f = hp0.highpass(f); rec_frm[i] = round(f); } PPJ_WaitAndLock wl(*lock); if (!lat_ready) { /* Prefetching to fill in the desired latency */ PJ_LOG(4, (THIS_FILE, "Prefetching..")); return PJ_SUCCESS; } /* Retrieve oldest frame from the latency buffer */ oldest_frm = lat_buf.next; pj_list_erase(oldest_frm); lock->release(); speex_echo_cancellation(state, (const spx_int16_t*)rec_frm, (const spx_int16_t*)oldest_frm->buf, (spx_int16_t*)tmp_frame); /* Preprocess output */ speex_preprocess_run(preprocess, (spx_int16_t*)tmp_frame); pjmedia_copy_samples(rec_frm, tmp_frame, samples_per_frame); status = PJ_SUCCESS; /* Cancel echo using this reference frame */ lock->acquire(); /* Move one frame from delay buffer to the latency buffer. */ rc = pjmedia_delay_buf_get(delay_buf, oldest_frm->buf); if (rc != PJ_SUCCESS) { /* Ooops.. no frame! */ PJ_LOG(4, (THIS_FILE, "No frame from delay buffer. This will upset EC later")); pjmedia_zero_samples(oldest_frm->buf, samples_per_frame); } pj_list_push_back(&lat_buf, oldest_frm); return status; }
int SoundPreprocessor::preprocess(void *buf, int size){ if(mpSpStat){ int8_t *pTmp = (int8_t*)buf; if(size > mBytesPerFrame){ int nSetup = size / mBytesPerFrame; for (int i = 0; i < nSetup; i++){ speex_preprocess_run(mpSpStat, (short *)pTmp); pTmp += mBytesPerFrame; } }else if(size == mBytesPerFrame){ speex_preprocess_run(mpSpStat, (short *)pTmp); }else{ LOGD("func : %s,,not process,,bytesPerFrame:%d < bufSize:%d", __FUNCTION__,mBytesPerFrame, size); } return size; }else{ return 0; } }
JNIEXPORT jshortArray JNICALL Java_com_pullmi_shanghai_TalkActivity_speex_1EchoCanceller_1process (JNIEnv *env, jobject jobj, jshortArray input_frame, jshortArray echo_frame) { //create native shorts from java shorts jint length = (*env)->GetArrayLength(env, input_frame); jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, 0); jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, 0); SPEEX_FRAME_BYTE = length; short ref[SPEEX_FRAME_BYTE], mic[SPEEX_FRAME_BYTE], out[SPEEX_FRAME_BYTE]; int i; for ( i = 0; i < SPEEX_FRAME_BYTE; ++i) { // code ref[i] = native_echo_frame[i]; mic[i] = native_input_frame[i]; } //allocate memory for output data jshortArray temp = (*env)->NewShortArray(env, length); jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0); if (0 >= length || NULL == native_output_frame) { /* code */ LOGE("create out error"); return; } //call echo cancellation //speex_echo_cancellation(st, native_input_frame,native_echo_frame,native_output_frame); speex_echo_cancellation(st, mic, ref, out); //preprocess output frame //speex_preprocess_run(den, native_output_frame); speex_preprocess_run(den, out); for ( i = 0; i < length; ++i) { //LOGE("output_frame %d = %d",i,out[i]); native_output_frame[i] = out[i]; } //convert native output to java layer output jshortArray output_shorts = (*env)->NewShortArray(env, length); (*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame); //cleanup and return (*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0); (*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0); (*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0); return output_shorts; }
uint16 SpeexPlugin::encode(int16 *sample_buf, uint16 nsamples, uint8 *payload, uint16 payload_size, bool &silence) throw(OperationNotPerfomedException) { if (payload_size < MAX_PAYLOAD_SIZE) throw OperationNotPerfomedException("The buffer is not large enough"); if (!encoder) throw OperationNotPerfomedException("Encoder not ready"); // // Falta fazer o echo cancelling // // // if (usingEchoCancellation && !echoCapturedLast) { // uint32 samplesize = getSampleSize(); // spx_int16_t *input_buf = new spx_int16_t[samplesize / 2]; // // for (int i = 0; i < getSampleSize() / 2; i++) { // input_buf[i] = sample_buf[i]; // } // // speex_echo_capture(echocancellation, input_buf, sample_buf); // echoCapturedLast = true; // delete input_buf; // } bool preprocessing_silence = false; if (preprocess) { preprocessing_silence = !speex_preprocess_run(preprocess, sample_buf); //Garantir que o nsamples sera sempre o mesmo bool speex_dsp_vad; speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_GET_VAD, &speex_dsp_vad); if (!speex_dsp_vad) preprocessing_silence = false; } silence = false; speex_bits_reset(&encoder->bits); silence = speex_encode_int(encoder->state, sample_buf, &encoder->bits) == 0; silence = silence || preprocessing_silence; return speex_bits_write(&encoder->bits, (char *) payload, payload_size); }
static int tdav_speex_denoise_process_record(tmedia_denoise_t* self, void* audio_frame, tsk_bool_t* silence_or_noise) { tdav_speex_denoise_t *denoiser = (tdav_speex_denoise_t *)self; int vad; if(denoiser->preprocess_state_record){ if(denoiser->echo_state && denoiser->echo_output_frame){ speex_echo_capture(denoiser->echo_state, audio_frame, denoiser->echo_output_frame); memcpy(audio_frame, denoiser->echo_output_frame, denoiser->frame_size*sizeof(spx_int16_t)); } vad = speex_preprocess_run(denoiser->preprocess_state_record, audio_frame); if(!vad && TMEDIA_DENOISE(denoiser)->vad_enabled){ *silence_or_noise = tsk_true; } } return 0; }
int roardsp_speex_prep_calc161(struct roardsp_filter * filter, void * data, size_t samples) { struct roardsp_speex_prep * self = filter->inst; if ( self->preprocess == NULL ) return -1; if ( samples != ((self->frame_size * filter->bits) / 8) ) return -1; #ifdef _SPEEX_API_OLD speex_preprocess(self->preprocess, data, NULL); #elif defined(_SPEEX_API_NEW) speex_preprocess_run(self->preprocess, data); #else return -1; #endif return 0; }
int sound::inputcallback(const void *input, void *output, unsigned long frameCount, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData){ sound * psound = (sound*)userData; if (psound->speexppstate){ speex_preprocess_run(psound->speexppstate, (short *)input); } short * outputbuff = 0; while (1){ int _end = psound->end.load(); int _new = _end + 1; if (_end == psound->begin.load()){ break; } if (_new == 16){ _new = 0; } if (psound->end.compare_exchange_strong(_end, _new)){ outputbuff = (short *)psound->inputbuff[_end].buf; break; } } if (psound->isopenecho == true){ if (outputbuff != 0){ speex_echo_cancellation(psound->speexechostate, (short*)input, outputbuff, (short*)input); } } char tmp[4096]; *((short*)tmp) = psound->inputStreamParameters.channelCount; int len = psound->_encode.encoded((char*)input, frameCount, tmp+2, 4096); psound->sigCapture((char*)tmp, len + 2); return paContinue; }
/* * Process up to *isamp samples from ibuf and produce up to *osamp samples * in obuf. Write back the actual numbers of samples to *isamp and *osamp. * Return SOX_SUCCESS or, if error occurs, SOX_EOF. */ static int flow( sox_effect_t* effp, const sox_sample_t* ibuf, sox_sample_t* obuf, size_t* isamp, size_t* osamp) { priv_t* p = (priv_t*)effp->priv; size_t ibuf_pos = 0; size_t ibuf_end = *isamp; size_t obuf_pos = 0; size_t obuf_end = *osamp; size_t end_pos; SOX_SAMPLE_LOCALS; for (;;) { /* Write any processed data in working buffer to the output buffer. */ end_pos = obuf_pos + min(p->buffer_end - p->buffer_opos, obuf_end - obuf_pos); for (; obuf_pos < end_pos; obuf_pos++, p->buffer_opos++) obuf[obuf_pos] = SOX_SIGNED_16BIT_TO_SAMPLE(p->buffer[p->buffer_opos], dummy); if (p->buffer_opos != p->buffer_end) break; /* Output buffer is full and we still have more processed data. */ /* Fill working buffer from input buffer. */ end_pos = ibuf_pos + min(p->buffer_end - p->buffer_ipos, ibuf_end - ibuf_pos); for (; ibuf_pos < end_pos; ibuf_pos++, p->buffer_ipos++) p->buffer[p->buffer_ipos] = SOX_SAMPLE_TO_SIGNED_16BIT(ibuf[ibuf_pos], effp->clips); if (p->buffer_ipos != p->buffer_end) break; /* Working buffer is not full and there is no more input data. */ speex_preprocess_run(p->sps, p->buffer); p->buffer_ipos = 0; p->buffer_opos = 0; } *isamp = ibuf_pos; *osamp = obuf_pos; return SOX_SUCCESS; }
/* * Perform echo cancellation to captured frame. */ PJ_DEF(pj_status_t) speex_aec_capture( void *state, pj_int16_t *rec_frm, unsigned options ) { speex_ec *echo = (speex_ec*) state; /* Sanity checks */ PJ_ASSERT_RETURN(echo && rec_frm, PJ_EINVAL); PJ_UNUSED_ARG(options); /* Cancel echo */ pjmedia_copy_samples(echo->tmp_frame, rec_frm, echo->samples_per_frame); speex_echo_capture(echo->state, (spx_int16_t*)echo->tmp_frame, (spx_int16_t*)rec_frm); /* Apply preprocessing */ speex_preprocess_run(echo->preprocess, (spx_int16_t*)rec_frm); return PJ_SUCCESS; }
static int encode(struct aufilt_enc_st *st, int16_t *sampv, size_t *sampc) { struct preproc *pp = (struct preproc *)st; int is_speech = 1; if (!*sampc) return 0; /* NOTE: Using this macro to check libspeex version */ #ifdef SPEEX_PREPROCESS_SET_NOISE_SUPPRESS /* New API */ is_speech = speex_preprocess_run(pp->state, sampv); #else /* Old API - not tested! */ is_speech = speex_preprocess(pp->state, sampv, NULL); #endif /* XXX: Handle is_speech and VAD */ (void)is_speech; return 0; }
int main(int argc, char **argv) { FILE *echo_fd, *ref_fd, *e_fd; short echo_buf[NN], ref_buf[NN], e_buf[NN]; SpeexEchoState *st; SpeexPreprocessState *den; int sampleRate = 8000; if (argc != 4) { fprintf(stderr, "testecho mic_signal.sw speaker_signal.sw output.sw\n"); exit(1); } echo_fd = fopen(argv[2], "rb"); ref_fd = fopen(argv[1], "rb"); e_fd = fopen(argv[3], "wb"); st = speex_echo_state_init(NN, TAIL); den = speex_preprocess_state_init(NN, sampleRate); speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate); speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st); while (!feof(ref_fd) && !feof(echo_fd)) { fread(ref_buf, sizeof(short), NN, ref_fd); fread(echo_buf, sizeof(short), NN, echo_fd); speex_echo_cancellation(st, ref_buf, echo_buf, e_buf); speex_preprocess_run(den, e_buf); fwrite(e_buf, sizeof(short), NN, e_fd); } speex_echo_state_destroy(st); speex_preprocess_state_destroy(den); fclose(e_fd); fclose(echo_fd); fclose(ref_fd); return 0; }
void AudioCaptureThread::Denoise(BYTE* lpData, DWORD dwBytes) { DWORD dwProcessed = 0; short* in = (short*)lpData; int i; SpeexPreprocessState *st; //int count=0; float f; st = speex_preprocess_state_init(SAMPLES_PER_RECORD, SAMPLES_PER_SECOND); i=1; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i); i=0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i); i=SAMPLES_PER_SECOND; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC_LEVEL, &i); i=0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i); f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f); f=.0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f); while (dwProcessed < SAMPLES_PER_RECORD) { int vad; //fread(in, sizeof(short), SAMPLES_PER_RECORD, stdin); //if (feof(stdin)) // break; vad = speex_preprocess_run(st, in); //fprintf (stderr, "%d\n", vad); //fwrite(in, sizeof(short), SAMPLES_PER_RECORD, stdout); //count++; dwProcessed += SAMPLES_PER_RECORD; in += SAMPLES_PER_RECORD; } speex_preprocess_state_destroy(st); }
extern "C" JNIEXPORT int Java_com_haitou_xiaoyoupai_imservice_support_audio_Speex_echoCancellationEncode( JNIEnv *env, jshortArray rec, jshortArray play, jbyteArray encoded) { jshort echo_buf[enc_frame_size]; jshort ref_buf[enc_frame_size]; jshort e_buf[enc_frame_size]; jbyte output_buffer[enc_frame_size]; env->GetShortArrayRegion(rec, 0, enc_frame_size, echo_buf); env->GetShortArrayRegion(play, 0, enc_frame_size, ref_buf); speex_echo_cancellation(echoState, echo_buf, ref_buf, e_buf); speex_preprocess_run(den, e_buf); speex_bits_reset(&ebits); speex_encode_int(enc_state, e_buf, &ebits); jint tot_bytes = speex_bits_write(&ebits, (char *) output_buffer, enc_frame_size); env->SetByteArrayRegion(encoded, 0, tot_bytes, output_buffer); return (jint) tot_bytes; }
int main(int argc, char *argv[]) { int sd, rc, n; int i; struct sockaddr_in cliAddr, remoteAddr; char msg[MAX_MSG]; struct hostent *h; int local_port, remote_port; int nfds; struct pollfd *pfds; SpeexPreprocessState *preprocess; AlsaDevice *audio_dev; int tmp; if (argc != 5) { fprintf(stderr, "wrong options\n"); exit(1); } h = gethostbyname(argv[2]); if(h==NULL) { fprintf(stderr, "%s: unknown host '%s' \n", argv[0], argv[1]); exit(1); } local_port = atoi(argv[3]); remote_port = atoi(argv[4]); printf("%s: sending data to '%s' (IP : %s) \n", argv[0], h->h_name, inet_ntoa(*(struct in_addr *)h->h_addr_list[0])); { remoteAddr.sin_family = h->h_addrtype; memcpy((char *) &remoteAddr.sin_addr.s_addr, h->h_addr_list[0], h->h_length); remoteAddr.sin_port = htons(remote_port); } /* socket creation */ sd=socket(AF_INET, SOCK_DGRAM, 0); if(sd<0) { printf("%s: cannot open socket \n",argv[0]); exit(1); } /* bind any port */ cliAddr.sin_family = AF_INET; cliAddr.sin_addr.s_addr = htonl(INADDR_ANY); cliAddr.sin_port = htons(local_port); rc = bind(sd, (struct sockaddr *) &cliAddr, sizeof(cliAddr)); if(rc<0) { printf("%s: cannot bind port\n", argv[0]); exit(1); } /* Setup audio device */ audio_dev = alsa_device_open(argv[1], SAMPLING_RATE, 1, FRAME_SIZE); /* Setup the encoder and decoder in wideband */ void *enc_state, *dec_state; enc_state = speex_encoder_init(&speex_wb_mode); tmp = 8; speex_encoder_ctl(enc_state, SPEEX_SET_QUALITY, &tmp); tmp = 2; speex_encoder_ctl(enc_state, SPEEX_SET_COMPLEXITY, &tmp); dec_state = speex_decoder_init(&speex_wb_mode); tmp = 1; speex_decoder_ctl(dec_state, SPEEX_SET_ENH, &tmp); SpeexBits enc_bits, dec_bits; speex_bits_init(&enc_bits); speex_bits_init(&dec_bits); struct sched_param param; /*param.sched_priority = 40; */ param.sched_priority = sched_get_priority_min(SCHED_FIFO); if (sched_setscheduler(0,SCHED_FIFO,¶m)) perror("sched_setscheduler"); int send_timestamp = 0; int recv_started=0; /* Setup all file descriptors for poll()ing */ nfds = alsa_device_nfds(audio_dev); pfds = malloc(sizeof(*pfds)*(nfds+1)); alsa_device_getfds(audio_dev, pfds, nfds); pfds[nfds].fd = sd; pfds[nfds].events = POLLIN; /* Setup jitter buffer using decoder */ SpeexJitter jitter; speex_jitter_init(&jitter, dec_state, SAMPLING_RATE); /* Echo canceller with 200 ms tail length */ SpeexEchoState *echo_state = speex_echo_state_init(FRAME_SIZE, 10*FRAME_SIZE); tmp = SAMPLING_RATE; speex_echo_ctl(echo_state, SPEEX_ECHO_SET_SAMPLING_RATE, &tmp); /* Setup preprocessor and associate with echo canceller for residual echo suppression */ preprocess = speex_preprocess_state_init(FRAME_SIZE, SAMPLING_RATE); speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, echo_state); alsa_device_start(audio_dev); /* Infinite loop on capture, playback and receiving packets */ while (1) { /* Wait for either 1) capture 2) playback 3) socket data */ poll(pfds, nfds+1, -1); /* Received packets */ if (pfds[nfds].revents & POLLIN) { /*fprintf (stderr, "x");*/ n = recv(sd, msg, MAX_MSG, 0); int recv_timestamp = ((int*)msg)[1]; int payload = ((int*)msg)[0]; if ((payload & 0x80000000) == 0) { /* Put content of the packet into the jitter buffer, except for the pseudo-header */ speex_jitter_put(&jitter, msg+8, n-8, recv_timestamp); recv_started = 1; } } /* Ready to play a frame (playback) */ if (alsa_device_playback_ready(audio_dev, pfds, nfds)) { short pcm[FRAME_SIZE]; if (recv_started) { /* Get audio from the jitter buffer */ speex_jitter_get(&jitter, pcm, NULL); } else { for (i=0; i<FRAME_SIZE; i++) pcm[i] = 0; } /* Playback the audio and reset the echo canceller if we got an underrun */ if (alsa_device_write(audio_dev, pcm, FRAME_SIZE)) speex_echo_state_reset(echo_state); /* Put frame into playback buffer */ speex_echo_playback(echo_state, pcm); } /* Audio available from the soundcard (capture) */ if (alsa_device_capture_ready(audio_dev, pfds, nfds)) { short pcm[FRAME_SIZE], pcm2[FRAME_SIZE]; char outpacket[MAX_MSG]; /* Get audio from the soundcard */ alsa_device_read(audio_dev, pcm, FRAME_SIZE); /* Perform echo cancellation */ speex_echo_capture(echo_state, pcm, pcm2); for (i=0; i<FRAME_SIZE; i++) pcm[i] = pcm2[i]; speex_bits_reset(&enc_bits); /* Apply noise/echo suppression */ speex_preprocess_run(preprocess, pcm); /* Encode */ speex_encode_int(enc_state, pcm, &enc_bits); int packetSize = speex_bits_write(&enc_bits, outpacket+8, MAX_MSG); /* Pseudo header: four null bytes and a 32-bit timestamp */ ((int*)outpacket)[0] = htonl(0); ((int*)outpacket)[1] = send_timestamp; send_timestamp += FRAME_SIZE; rc = sendto(sd, outpacket, packetSize+8, 0, (struct sockaddr *) &remoteAddr, sizeof(remoteAddr)); if(rc<0) { printf("cannot send audio data\n"); close(sd); exit(1); } } } return 0; }
int main(int argc, char*argv[]) { /* The Sample format to use */ static const pa_sample_spec ss = { .format = PA_SAMPLE_S16LE, .rate = 44100, .channels = 2 }; pa_simple* dev_out = 0; pa_simple* dev_in = 0; int ret = 1; int error; /* Create a new playback stream */ if (!(dev_out = pa_simple_new(NULL, "Noise Remover", PA_STREAM_PLAYBACK, NULL, "playback", &ss, NULL, NULL, &error))) { fprintf(stderr, __FILE__": pa_simple_new() failed: %dev_out\n", pa_strerror(error)); goto finish; } if (!(dev_in = pa_simple_new(NULL, "Noise Remover", PA_STREAM_RECORD, NULL, "record", &ss, NULL, NULL, &error))) { fprintf(stderr, __FILE__": pa_simple_new() failed: %dev_out\n", pa_strerror(error)); goto finish; } { int i; float f; SpeexPreprocessState* pp = speex_preprocess_state_init(BUFSIZE, ss.rate); i = 1; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_DENOISE, &i); i = 1; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_AGC, &i); f = 8000; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &f); i = 1; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_DEREVERB, &i); f = 0.04; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f); f = 0.03; speex_preprocess_ctl(pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f); double lowest_rms = 99999999999999; int silence_count = 0; for (;;) { int16_t buf[BUFSIZE]; /* Read some data ... */ if (pa_simple_read(dev_in, buf, sizeof(buf), &error) < 0) { fprintf(stderr, __FILE__": pa_simple_read() failed: %s\n", pa_strerror(error)); goto finish; } /* ... Use speex to de-noise ... */ double total = 0; for(int n = 0; n < sizeof(buf); n++) total += buf[n] * buf[n]; double rms = std::sqrt(total / sizeof(buf)); if(rms < lowest_rms) lowest_rms = rms; if((rms - lowest_rms) < 50) // this value will probably need adjusting for you silence_count = 0; else if(silence_count < 10) silence_count++; if(silence_count == 10) speex_preprocess_run(pp, buf); else continue; // don't write it out... /* ... and play it */ if (pa_simple_write(dev_out, buf, sizeof(buf), &error) < 0) { fprintf(stderr, __FILE__": pa_simple_write() failed: %dev_out\n", pa_strerror(error)); goto finish; } } /* Make sure that every single sample was played */ if (pa_simple_drain(dev_out, &error) < 0) { fprintf(stderr, __FILE__": pa_simple_drain() failed: %dev_out\n", pa_strerror(error)); goto finish; } } ret = 0; finish: if (dev_out) pa_simple_free(dev_out); if (dev_in) pa_simple_free(dev_in); return ret; }
/* inputs[0]= reference signal from far end (sent to soundcard) * inputs[1]= near speech & echo signal (read from soundcard) * outputs[0]= is a copy of inputs[0] to be sent to soundcard * outputs[1]= near end speech, echo removed - towards far end */ static void speex_ec_process(MSFilter *f){ SpeexECState *s=(SpeexECState*)f->data; int nbytes=s->framesize*2; mblk_t *refm; uint8_t *ref,*echo; if (s->bypass_mode) { while((refm=ms_queue_get(f->inputs[0]))!=NULL){ ms_queue_put(f->outputs[0],refm); } while((refm=ms_queue_get(f->inputs[1]))!=NULL){ ms_queue_put(f->outputs[1],refm); } return; } if (f->inputs[0]!=NULL){ if (s->echostarted){ while((refm=ms_queue_get(f->inputs[0]))!=NULL){ refm=audio_flow_controller_process(&s->afc,refm); if (refm){ mblk_t *cp=dupmsg(refm); ms_bufferizer_put(&s->delayed_ref,cp); ms_bufferizer_put(&s->ref,refm); } } }else{ ms_warning("Getting reference signal but no echo to synchronize on."); ms_queue_flush(f->inputs[0]); } } ms_bufferizer_put_from_queue(&s->echo,f->inputs[1]); ref=(uint8_t*)alloca(nbytes); echo=(uint8_t*)alloca(nbytes); while (ms_bufferizer_read(&s->echo,echo,nbytes)==nbytes){ mblk_t *oecho=allocb(nbytes,0); int avail; int avail_samples; if (!s->echostarted) s->echostarted=TRUE; if ((avail=ms_bufferizer_get_avail(&s->delayed_ref))<((s->nominal_ref_samples*2)+nbytes)){ /*we don't have enough to read in a reference signal buffer, inject silence instead*/ avail=nbytes; refm=allocb(nbytes,0); memset(refm->b_wptr,0,nbytes); refm->b_wptr+=nbytes; ms_bufferizer_put(&s->delayed_ref,refm); ms_queue_put(f->outputs[0],dupmsg(refm)); if (!s->using_zeroes){ ms_warning("Not enough ref samples, using zeroes"); s->using_zeroes=TRUE; } }else{ if (s->using_zeroes){ ms_message("Samples are back."); s->using_zeroes=FALSE; } /* read from our no-delay buffer and output */ refm=allocb(nbytes,0); if (ms_bufferizer_read(&s->ref,refm->b_wptr,nbytes)==0){ ms_fatal("Should never happen"); } refm->b_wptr+=nbytes; ms_queue_put(f->outputs[0],refm); } /*now read a valid buffer of delayed ref samples*/ if (ms_bufferizer_read(&s->delayed_ref,ref,nbytes)==0){ ms_fatal("Should never happen"); } avail-=nbytes; avail_samples=avail/2; /*ms_message("avail=%i",avail_samples);*/ if (avail_samples<s->min_ref_samples || s->min_ref_samples==-1){ s->min_ref_samples=avail_samples; } #ifdef EC_DUMP if (s->reffile) fwrite(ref,nbytes,1,s->reffile); if (s->echofile) fwrite(echo,nbytes,1,s->echofile); #endif speex_echo_cancellation(s->ecstate,(short*)echo,(short*)ref,(short*)oecho->b_wptr); speex_preprocess_run(s->den, (short*)oecho->b_wptr); #ifdef EC_DUMP if (s->cleanfile) fwrite(oecho->b_wptr,nbytes,1,s->cleanfile); #endif oecho->b_wptr+=nbytes; ms_queue_put(f->outputs[1],oecho); } /*verify our ref buffer does not become too big, meaning that we are receiving more samples than we are sending*/ if ((((uint32_t)(f->ticker->time - s->flow_control_time)) >= flow_control_interval_ms) && (s->min_ref_samples != -1)) { int diff=s->min_ref_samples-s->nominal_ref_samples; if (diff>(nbytes/2)){ int purge=diff-(nbytes/2); ms_warning("echo canceller: we are accumulating too much reference signal, need to throw out %i samples",purge); audio_flow_controller_set_target(&s->afc,purge,(flow_control_interval_ms*s->samplerate)/1000); } s->min_ref_samples=-1; s->flow_control_time = f->ticker->time; } }
main(int argc, char *argv[]) { FILE *fp; int fd,arg; snd_pcm_t *handle; snd_pcm_hw_params_t *hw_params; int rate=8000; float f[WINDOW],hann[WINDOW],w[WINDOW],w2[WINDOW],s0,s1=0,tot; float ac[ORDER+1],lcp[ORDER+1],lsp[ORDER],l[ORDER],weight[ORDER],delta,d; short sample,s[160],buf[2000]; int i,j,n,b,toggle=1; float e,laste=0; int ebit=ETOPBIT, ebuff=0; int sound=0; // boolean start/stop float f2[FFT],min; float real[FFT],imag[FFT]; int dummy[100000]; float amp[WINDOW],pha[WINDOW]; int frame=0; SpeexPreprocessState *st; for (i=0; i<8; i++) report[i]=0; st = speex_preprocess_state_init(160, 8000); i=1; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i); // i=1; // speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i); // e=.0; // speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &e); // e=.0; // speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &e); setup_twiddles(realtwiddle,imtwiddle,FFT); for (i=0; i<WINDOW; i++) f[i]=0; for (i=0; i<ORDER; i++) {last[i]=0; last2[i]=0;} for(i=0; i<WINDOW; i++) hann[i]=0.5-0.5*cos(2.0*M_PI*i/(WINDOW-1)); if (argc==2) training=atoi(argv[1]); fprintf(stderr,"training=%i\n",training); // exit(0); cbsize=0; start[0]=0; size[0]=0; if (training==0) if (fp=fopen("cb.txt","r")) { while(!feof(fp)) { fscanf(fp,"%i\n",&size[cbsize]); for (i=start[cbsize]; i<start[cbsize]+size[cbsize]; i++) { for (n=1; n<FF2-1; n++) fscanf(fp,"%f,",&cb[i][n]); fscanf(fp,"\n"); } start[cbsize+1]=start[cbsize]+size[cbsize]; cbsize++; } fclose(fp); } //for (i=0; i<cbsize; i++) printf("%i,\n",size[i]); exit(0); //--------------------------------- fp=fopen("/tmp/b.raw","w"); snd_pcm_open(&handle, "default", SND_PCM_STREAM_CAPTURE, 0); snd_pcm_hw_params_malloc(&hw_params); snd_pcm_hw_params_any(handle, hw_params); snd_pcm_hw_params_set_access(handle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED); snd_pcm_hw_params_set_format(handle, hw_params, SND_PCM_FORMAT_S16_LE); snd_pcm_hw_params_set_rate_near(handle, hw_params, &rate, 0); snd_pcm_hw_params_set_channels(handle, hw_params, 2); snd_pcm_hw_params(handle, hw_params); snd_pcm_hw_params_free(hw_params); snd_pcm_prepare(handle); //printf("sleep 1...\n"); sleep(1); printf("OK, go....\n"); while(1) { for (i=0; i<WINDOW-STEP; i++) f[i]=f[i+STEP]; // shift samples down if (toggle) { //read(fd,s,160*2); snd_pcm_readi(handle, buf, 160); for (i=0; i<160; i++) s[i]=buf[i*2]; speex_preprocess_run(st, s); } else bcopy(&s[80],s,80*2); toggle=!toggle; for (i=WINDOW-STEP,j=0; i<WINDOW; i++,j++) { sample=s[j]; s0=(float)sample; f[i]=s0-s1*EMPH; s1=s0; // 1.0 pre-emphasis fwrite(&sample,2,1,fp); } for (i=0; i<WINDOW; i++) w[i]=f[i]; // remove any DC level.... tot=0; for (i=0; i<WINDOW; i++) tot+=w[i]; tot/=WINDOW; for (i=0; i<WINDOW; i++) w[i]-=tot; for (i=0; i<WINDOW; i++) w[i]*=hann[i]; // window data autocorrelate(w,ac,WINDOW,ORDER); wld(&lpc[1],ac,ORDER); lpc[0]=1.0; // e=ac[0]; e=0;for(i=0; i<=ORDER; i++) e+=ac[i]*lpc[i]; if (e<0) e=0; if (e>TOL_OFF) ebuff|=ebit; else ebuff&=~ebit; // update energy bit-buffer ebit>>=1; if (ebit==0) ebit=ETOPBIT; // circular shift for (i=0; i<FFT; i++) {real[i]=0; imag[i]=0;} for (i=0; i<=ORDER; i++) real[i]=lpc[i]; simple_fft(real,imag,realtwiddle,imtwiddle,FFT); for (i=0; i<FF2; i++) { b=bin[i]; f2[i]=powf(real[b]*real[b]+imag[b]*imag[b],-0.5); //f2[i]=powf(f2[i],0.333333); //f2[i]=powf(f2[i],1.2); f2[i]=logf(f2[i]); } // spectral tilt compensation... for (i=1; i<FF2; i++) f2[i]=f2[i]*(float)(i+TILT)/TILT; // fold down to 9 bins... /* if (f2[FF2-2]>f2[FF2-3]) f2[FF2-3]=f2[FF2-2]; f2[FF2-2]=0; if (f2[FF2-4]>f2[FF2-5]) f2[FF2-5]=f2[FF2-4]; f2[FF2-4]=0; if (f2[FF2-9]>f2[FF2-10]) f2[FF2-10]=f2[FF2-9]; f2[FF2-9]=0; if (f2[FF2-11]>f2[FF2-12]) f2[FF2-12]=f2[FF2-11]; f2[FF2-11]=0; if (f2[FF2-13]>f2[FF2-14]) f2[FF2-14]=f2[FF2-13]; f2[FF2-13]=0; if (f2[FF2-15]>f2[FF2-16]) f2[FF2-16]=f2[FF2-15]; f2[FF2-15]=0; */ for (i=0; i<FF2; i++) { if (f2[i]>6.0) f2[i]=6.0; f2[i]*=100.0; } if (TRACE) { fprintf(stderr,"%.f,",e); for (i=1; i<FF2-1; i++) fprintf(stderr,"%.f,",f2[i]); fprintf(stderr,"\n");} // calculate frame delta.... delta=0; for (i=1; i<FF2-1; i++) {d=f2[i]-last[i]; delta+=d*d;} //printf("delta=%f\n",delta); if (sound==0 && e>TOL_ON && frame>200) { // start recording... bcopy(last2,&word[wsize],FF2*4); wsize++; bcopy(last,&word[wsize],FF2*4); wsize++; sound=1; wsize=0; bcopy(f2,&word[wsize],FF2*4); wsize++; bcopy(last,last2,FF2*4); bcopy(f2,last,FF2*4); } else if (sound==1 && e>TOL_OFF) { // continue reading word... bcopy(f2,&word[wsize],FF2*4); wsize++; if (wsize>200) wsize=200; bcopy(last,last2,FF2*4); bcopy(f2,last,FF2*4); } else if (sound==1 && ebuff==0) { // finised reading word // wsize-=8; // remove training silence (2 frame buffer) if (wsize>4 && wsize<50) { if (training>0) train(); else closest(); } sound=0; wsize=0; bcopy(last,last2,FF2*4); bcopy(f2,last,FF2*4); } //for (i=1; i<FF2-1; i++) printf("%.0f,",f2[i]); printf(" e=%f\n",e); laste=e; frame++; if (frame==37800) exit(0); } }
bool t_audio_rx::get_sound_samples(unsigned short &sound_payload_size, bool &silence) { int status; struct timespec sleeptimer; //struct timeval debug_timer; silence = false; mtx_3way.lock(); if (is_3way && !is_main_rx_3way) { // We are not the main receiver in a 3-way call, so // get the sound samples from the local media buffer. // This buffer will be filled by the main receiver. if (!media_3way_peer_rx->get(input_sample_buf, SAMPLE_BUF_SIZE)) { // The mutex is unlocked before going to sleep. // First I had the mutex unlock after the sleep. // That worked fine with LinuxThreading, but it does // not work with NPTL. It causes a deadlock when // the main receiver calls post_media_peer_rx_3way // as NPTL does not fair scheduling. This thread // simly gets the lock again and the main receiver // dies from starvation. mtx_3way.unlock(); // There is not enough data yet. Sleep for 1 ms. sleeptimer.tv_sec = 0; sleeptimer.tv_nsec = 1000000; nanosleep(&sleeptimer, NULL); return false; } mtx_3way.unlock(); } else { // Don't keep the 3way mutex locked while waiting for the DSP. mtx_3way.unlock(); // Get the sound samples from the DSP status = input_device->read(input_sample_buf, SAMPLE_BUF_SIZE); if (status != SAMPLE_BUF_SIZE) { if (!logged_capture_failure) { // Log this failure only once log_file->write_header("t_audio_rx::get_sound_samples", LOG_NORMAL, LOG_WARNING); log_file->write_raw("Audio rx line "); log_file->write_raw(get_line()->get_line_number()+1); log_file->write_raw(": sound capture failed.\n"); log_file->write_raw("Status: "); log_file->write_raw(status); log_file->write_endl(); log_file->write_footer(); logged_capture_failure = true; } stop_running = true; return false; } // If line is muted, then fill sample buffer with silence. // Note that we keep reading the dsp, to prevent the DSP buffers // from filling up. if (get_line()->get_is_muted()) { memset(input_sample_buf, 0, SAMPLE_BUF_SIZE); } } // Convert buffer to a buffer of shorts as the samples are 16 bits short *sb = (short *)input_sample_buf; mtx_3way.lock(); if (is_3way) { // Send the sound samples to the other receiver if we // are the main receiver. // There may be no other receiver when one of the far-ends // has put the call on-hold. if (is_main_rx_3way && peer_rx_3way) { peer_rx_3way->post_media_peer_rx_3way(input_sample_buf, SAMPLE_BUF_SIZE, audio_encoder->get_sample_rate()); } // Mix the sound samples with the 3rd party if (media_3way_peer_tx->get(mix_buf_3way, SAMPLE_BUF_SIZE)) { short *mix_sb = (short *)mix_buf_3way; for (int i = 0; i < SAMPLE_BUF_SIZE / 2; i++) { sb[i] = mix_linear_pcm(sb[i], mix_sb[i]); } } } mtx_3way.unlock(); /*** PREPROCESSING & ENCODING ***/ bool preprocessing_silence = false; #ifdef HAVE_SPEEX // speex acoustic echo cancellation if (audio_session->get_do_echo_cancellation() && !audio_session->get_echo_captured_last()) { spx_int16_t *input_buf = new spx_int16_t[SAMPLE_BUF_SIZE/2]; MEMMAN_NEW_ARRAY(input_buf); for (int i = 0; i < SAMPLE_BUF_SIZE / 2; i++) { input_buf[i] = sb[i]; } speex_echo_capture(audio_session->get_speex_echo_state(), input_buf, sb); audio_session->set_echo_captured_last(true); MEMMAN_DELETE_ARRAY(input_buf); delete [] input_buf; } // preprocessing preprocessing_silence = !speex_preprocess_run(speex_preprocess_state, sb); // According to the speex API documentation the return value // from speex_preprocess_run() is only defined when VAD is // enabled. So to be safe, reset the return value, if VAD is // disabled. if (!speex_dsp_vad) preprocessing_silence = false; #endif // encoding sound_payload_size = audio_encoder->encode(sb, nsamples, payload, payload_size, silence); // recognizing silence (both from preprocessing and encoding) silence = silence || preprocessing_silence; return true; }
void CVoiceRecorder::DoPulse(void) { m_CS.Lock(); char* pInputBuffer; char bufTempOutput[2048]; unsigned int uiTotalBufferSize = m_uiBufferSizeBytes * FRAME_OUTGOING_BUFFER_COUNT; // Only send every 100 ms if (CClientTime::GetTime() - m_ulTimeOfLastSend > 100 && m_VoiceState != VOICESTATE_AWAITING_INPUT) { m_bIsSendingVoiceData = false; unsigned int uiBytesAvailable = 0; if (m_uiOutgoingWriteIndex >= m_uiOutgoingReadIndex) uiBytesAvailable = m_uiOutgoingWriteIndex - m_uiOutgoingReadIndex; else uiBytesAvailable = m_uiOutgoingWriteIndex + (uiTotalBufferSize - m_uiOutgoingReadIndex); unsigned int uiSpeexBlockSize = m_iSpeexOutgoingFrameSampleCount * VOICE_SAMPLE_SIZE; unsigned int uiSpeexFramesAvailable = uiBytesAvailable / uiSpeexBlockSize; if (uiSpeexFramesAvailable > 0) { SpeexBits speexBits; speex_bits_init(&speexBits); while (uiSpeexFramesAvailable-- > 0) { speex_bits_reset(&speexBits); // Does the input data wrap around the buffer? Copy it first then if (m_uiOutgoingReadIndex + uiSpeexBlockSize >= uiTotalBufferSize) { unsigned t; for (t = 0; t < uiSpeexBlockSize; t++) bufTempOutput[t] = m_pOutgoingBuffer[t % uiTotalBufferSize]; pInputBuffer = bufTempOutput; } else pInputBuffer = m_pOutgoingBuffer + m_uiOutgoingReadIndex; // Run through our preprocessor (noise/echo cancelation) speex_preprocess_run(m_pSpeexPreprocState, (spx_int16_t*)pInputBuffer); // Encode our audio stream with speex speex_encode_int(m_pSpeexEncoderState, (spx_int16_t*)pInputBuffer, &speexBits); m_uiOutgoingReadIndex = (m_uiOutgoingReadIndex + uiSpeexBlockSize) % uiTotalBufferSize; m_bIsSendingVoiceData = true; unsigned int uiBytesWritten = speex_bits_write(&speexBits, bufTempOutput, 2048); g_pClientGame->GetLocalPlayer()->GetVoice()->DecodeAndBuffer(bufTempOutput, uiBytesWritten); NetBitStreamInterface* pBitStream = g_pNet->AllocateNetBitStream(); if (pBitStream) { CClientPlayer* pLocalPlayer = g_pClientGame->GetPlayerManager()->GetLocalPlayer(); if (pLocalPlayer) { pBitStream->Write((unsigned short)uiBytesWritten); // size of buffer / voice data pBitStream->Write((char*)bufTempOutput, uiBytesWritten); // voice data g_pNet->SendPacket(PACKET_ID_VOICE_DATA, pBitStream, PACKET_PRIORITY_LOW, PACKET_RELIABILITY_UNRELIABLE_SEQUENCED, PACKET_ORDERING_VOICE); g_pNet->DeallocateNetBitStream(pBitStream); } } } speex_bits_destroy(&speexBits); m_ulTimeOfLastSend = CClientTime::GetTime(); } } if (m_VoiceState == VOICESTATE_RECORDING_LAST_PACKET) // End of voice data (for events) { m_VoiceState = VOICESTATE_AWAITING_INPUT; NetBitStreamInterface* pBitStream = g_pNet->AllocateNetBitStream(); if (pBitStream) { CClientPlayer* pLocalPlayer = g_pClientGame->GetPlayerManager()->GetLocalPlayer(); if (pLocalPlayer) { g_pNet->SendPacket(PACKET_ID_VOICE_END, pBitStream, PACKET_PRIORITY_LOW, PACKET_RELIABILITY_UNRELIABLE_SEQUENCED, PACKET_ORDERING_VOICE); g_pNet->DeallocateNetBitStream(pBitStream); } } } m_CS.Unlock(); }
void AudioInput::encodeAudioFrame() { int iArg; int i; float sum; short max; short *psSource; iFrameCounter++; if (! bRunning) return; sum=1.0f; max = 1; for (i=0;i<iFrameSize;i++) { sum += static_cast<float>(psMic[i] * psMic[i]); max = std::max(static_cast<short>(abs(psMic[i])), max); } dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); dMaxMic = max; if (psSpeaker && (iEchoChannels > 0)) { sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]); dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f); } else { dPeakSpeaker = 0.0; } QMutexLocker l(&qmSpeex); resetAudioProcessor(); speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_AGC_GAIN, &iArg); float gainValue = static_cast<float>(iArg); iArg = g.s.iNoiseSuppress - iArg; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg); if (sesEcho && psSpeaker) { speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean); speex_preprocess_run(sppPreprocess, psClean); psSource = psClean; } else { speex_preprocess_run(sppPreprocess, psMic); psSource = psMic; } sum=1.0f; for (i=0;i<iFrameSize;i++) sum += static_cast<float>(psSource[i] * psSource[i]); float micLevel = sqrtf(sum / static_cast<float>(iFrameSize)); dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f); spx_int32_t prob = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob); fSpeechProb = static_cast<float>(prob) / 100.0f; // clean microphone level: peak of filtered signal attenuated by AGC gain dPeakCleanMic = qMax(dPeakSignal - gainValue, -96.0f); float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakCleanMic / 96.0f); bool bIsSpeech = false; if (level > g.s.fVADmax) bIsSpeech = true; else if (level > g.s.fVADmin && bPreviousVoice) bIsSpeech = true; if (! bIsSpeech) { iHoldFrames++; if (iHoldFrames < g.s.iVoiceHold) bIsSpeech = true; } else { iHoldFrames = 0; } if (g.s.atTransmit == Settings::Continuous) bIsSpeech = true; else if (g.s.atTransmit == Settings::PushToTalk) bIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush)); bIsSpeech = bIsSpeech || (g.iPushToTalk > 0); ClientUser *p = ClientUser::get(g.uiSession); if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && (p->bMute || p->bSuppress)) || g.bPushToMute || (g.iTarget < 0)) { bIsSpeech = false; } if (bIsSpeech) { iSilentFrames = 0; } else { iSilentFrames++; if (iSilentFrames > 500) iFrameCounter = 0; } if (p) { if (! bIsSpeech) p->setTalking(Settings::Passive); else if (g.iTarget == 0) p->setTalking(Settings::Talking); else p->setTalking(Settings::Shouting); } if (g.s.bTxAudioCue && g.uiSession != 0) { AudioOutputPtr ao = g.ao; if (bIsSpeech && ! bPreviousVoice && ao) ao->playSample(g.s.qsTxAudioCueOn); else if (ao && !bIsSpeech && bPreviousVoice) ao->playSample(g.s.qsTxAudioCueOff); } if (! bIsSpeech && ! bPreviousVoice) { iBitrate = 0; if (g.s.iaeIdleAction != Settings::Nothing && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) { if (g.s.iaeIdleAction == Settings::Deafen && !g.s.bDeaf) { tIdle.restart(); emit doDeaf(); } else if (g.s.iaeIdleAction == Settings::Mute && !g.s.bMute) { tIdle.restart(); emit doMute(); } } spx_int32_t increment = 0; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); return; } else { spx_int32_t increment = 12; speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment); } if (bIsSpeech && !bPreviousVoice) { bResetEncoder = true; } tIdle.restart(); EncodingOutputBuffer buffer; Q_ASSERT(buffer.size() >= static_cast<size_t>(iAudioQuality / 100 * iAudioFrames / 8)); int len = 0; bool encoded = true; if (!selectCodec()) return; if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) { len = encodeCELTFrame(psSource, buffer); if (len <= 0) { iBitrate = 0; qWarning() << "encodeCELTFrame failed" << iBufferedFrames << iFrameSize << len; return; } ++iBufferedFrames; } else if (umtType == MessageHandler::UDPVoiceOpus) { encoded = false; opusBuffer.insert(opusBuffer.end(), psSource, psSource + iFrameSize); ++iBufferedFrames; if (!bIsSpeech || iBufferedFrames >= iAudioFrames) { if (iBufferedFrames < iAudioFrames) { // Stuff frame to framesize if speech ends and we don't have enough audio // this way we are guaranteed to have a valid framecount and won't cause // a codec configuration switch by suddenly using a wildly different // framecount per packet. const int missingFrames = iAudioFrames - iBufferedFrames; opusBuffer.insert(opusBuffer.end(), iFrameSize * missingFrames, 0); iBufferedFrames += missingFrames; iFrameCounter += missingFrames; } Q_ASSERT(iBufferedFrames == iAudioFrames); len = encodeOpusFrame(&opusBuffer[0], iBufferedFrames * iFrameSize, buffer); opusBuffer.clear(); if (len <= 0) { iBitrate = 0; qWarning() << "encodeOpusFrame failed" << iBufferedFrames << iFrameSize << len; iBufferedFrames = 0; // These are lost. Make sure not to mess up our sequence counter next flushCheck. return; } encoded = true; } } if (encoded) { flushCheck(QByteArray(reinterpret_cast<char *>(&buffer[0]), len), !bIsSpeech); } if (! bIsSpeech) iBitrate = 0; bPreviousVoice = bIsSpeech; }
static float volume_agc_process(Volume *v, mblk_t *om){ speex_preprocess_run(v->speex_pp,(int16_t*)om->b_rptr); return 1; }