Пример #1
0
/*
 * Perform echo cancellation.
 */
PJ_DEF(pj_status_t) speex_aec_cancel_echo( void *state,
					   pj_int16_t *rec_frm,
					   const pj_int16_t *play_frm,
					   unsigned options,
					   void *reserved )
{
    speex_ec *echo = (speex_ec*) state;

    /* Sanity checks */
    PJ_ASSERT_RETURN(echo && rec_frm && play_frm && options==0 &&
		     reserved==NULL, PJ_EINVAL);

    /* Cancel echo, put output in temporary buffer */
    speex_echo_cancellation(echo->state, (const spx_int16_t*)rec_frm,
			    (const spx_int16_t*)play_frm,
			    (spx_int16_t*)echo->tmp_frame);


    /* Preprocess output */
    speex_preprocess_run(echo->preprocess, (spx_int16_t*)echo->tmp_frame);

    /* Copy temporary buffer back to original rec_frm */
    pjmedia_copy_samples(rec_frm, echo->tmp_frame, echo->samples_per_frame);

    return PJ_SUCCESS;

}
Пример #2
0
void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out) {
    speex_echo_cancellation(ec->params.priv.speex.state, (const spx_int16_t *) rec, (const spx_int16_t *) play,
                            (spx_int16_t *) out);

    /* preprecessor is run after AEC. This is not a mistake! */
    if (ec->params.priv.speex.pp_state)
        speex_preprocess_run(ec->params.priv.speex.pp_state, (spx_int16_t *) out);
}
Пример #3
0
/*
 * Let the Echo Canceller knows that a frame has been captured from
 * the microphone.
 */
pj_status_t pjs_echo_canceller::capture(pj_int16_t *rec_frm, unsigned size) {
	struct frame *oldest_frm;
	pj_status_t status, rc;

	if(samples_per_frame!=size)
	{
		PJ_LOG(1, (THIS_FILE, "WRONG SIZE ON CAPTURE %d != %d",size,samples_per_frame));
		return -1;
	}
	for (unsigned i = 0; i < samples_per_frame; i++)
	{

		REAL f = hp00.highpass(rec_frm[i]);
		f = hp0.highpass(f);
		rec_frm[i] = round(f);
	}

	PPJ_WaitAndLock wl(*lock);
	if (!lat_ready) {
		/* Prefetching to fill in the desired latency */
		PJ_LOG(4, (THIS_FILE, "Prefetching.."));
		return PJ_SUCCESS;
	}

	/* Retrieve oldest frame from the latency buffer */
	oldest_frm = lat_buf.next;
	pj_list_erase(oldest_frm);

	lock->release();

    speex_echo_cancellation(state, (const spx_int16_t*)rec_frm,
			    (const spx_int16_t*)oldest_frm->buf,
			    (spx_int16_t*)tmp_frame);


    /* Preprocess output */
    speex_preprocess_run(preprocess, (spx_int16_t*)tmp_frame);
    pjmedia_copy_samples(rec_frm, tmp_frame, samples_per_frame);

	status = PJ_SUCCESS;
	/* Cancel echo using this reference frame */
	lock->acquire();

	/* Move one frame from delay buffer to the latency buffer. */
	rc = pjmedia_delay_buf_get(delay_buf, oldest_frm->buf);
	if (rc != PJ_SUCCESS) {
		/* Ooops.. no frame! */
		PJ_LOG(4,
				(THIS_FILE, "No frame from delay buffer. This will upset EC later"));
		pjmedia_zero_samples(oldest_frm->buf, samples_per_frame);
	}
	pj_list_push_back(&lat_buf, oldest_frm);

	return status;
}
Пример #4
0
  JNIEXPORT jshortArray JNICALL Java_com_pullmi_shanghai_TalkActivity_speex_1EchoCanceller_1process
(JNIEnv *env, jobject jobj, jshortArray input_frame, jshortArray echo_frame)
{
  //create native shorts from java shorts
  jint length = (*env)->GetArrayLength(env, input_frame);
  jshort *native_input_frame = (*env)->GetShortArrayElements(env, input_frame, 0);
  jshort *native_echo_frame = (*env)->GetShortArrayElements(env, echo_frame, 0);

  SPEEX_FRAME_BYTE = length;
  short ref[SPEEX_FRAME_BYTE], mic[SPEEX_FRAME_BYTE], out[SPEEX_FRAME_BYTE];
  int i;
  for ( i = 0; i < SPEEX_FRAME_BYTE; ++i)
  {
    // code
    ref[i] = native_echo_frame[i];
    mic[i] = native_input_frame[i];
  }
  //allocate memory for output data

  jshortArray temp = (*env)->NewShortArray(env, length);
  jshort *native_output_frame = (*env)->GetShortArrayElements(env, temp, 0);
  if (0 >= length || NULL == native_output_frame)
  {
    /* code */
    LOGE("create out error");
    return;
  }

  //call echo cancellation
  //speex_echo_cancellation(st, native_input_frame,native_echo_frame,native_output_frame);
  speex_echo_cancellation(st, mic, ref, out);
  //preprocess output frame
  //speex_preprocess_run(den, native_output_frame);
  speex_preprocess_run(den, out);

  for ( i = 0; i < length; ++i)
  {
    //LOGE("output_frame %d = %d",i,out[i]);
    native_output_frame[i] = out[i];
  }

  //convert native output to java layer output 
  jshortArray output_shorts = (*env)->NewShortArray(env, length);
  (*env)->SetShortArrayRegion(env, output_shorts, 0, length, native_output_frame);

  //cleanup and return 
  (*env)->ReleaseShortArrayElements(env, input_frame, native_input_frame, 0);
  (*env)->ReleaseShortArrayElements(env, echo_frame, native_echo_frame, 0);
  (*env)->ReleaseShortArrayElements(env, temp, native_output_frame, 0);

  return output_shorts;
}
Пример #5
0
/**
 *
 * @param aqi
 * @param buffer
 * @param length the length of <tt>buffer</tt> in bytes
 * @return the sound pressure level in dB of the playback which has been matched
 * to the specified capture <tt>buffer</tt> for the purposes of echo
 * cancellation if echo suppression is enabled; otherwise, <tt>0</tt>
 */
static float
AudioQualityImprovement_cancelEchoFromPlay
    (AudioQualityImprovement *aqi,
    void *buffer, unsigned long length)
{
    spx_uint32_t sampleCount;
    float spl;

    if (aqi->playIsDelaying == JNI_TRUE)
        return 0;

    sampleCount = length / sizeof(spx_int16_t);
    if (aqi->playLength < sampleCount)
        return 0;

    /*
     * Ensure that out exists and is large enough to receive the result of the
     * echo cancellation.
     */
    if (!(aqi->out) || (aqi->outCapacity < length))
    {
        spx_int16_t *newOut = realloc(aqi->out, length);

        if (newOut)
        {
            aqi->out = newOut;
            aqi->outCapacity = length;
        }
        else
            return 0;
    }

    /* Perform the echo cancellation and return the result in buffer. */
    speex_echo_cancellation(aqi->echo, buffer, aqi->play, aqi->out);
    memcpy(buffer, aqi->out, length);

    /*
     * Calculate the sound pressure level in dB to be returned (if echo
     * suppression is enabled and, thus, needs it).
     */
    spl
        = (JNI_TRUE == aqi->suppressEcho)
            ? AudioQualityImprovement_calculateSoundPressureLevel(
                aqi,
                aqi->play, sampleCount)
            : 0;

    AudioQualityImprovement_popFromPlay(aqi, sampleCount);

    return spl;
}
Пример #6
0
extern "C" JNIEXPORT void Java_com_haitou_xiaoyoupai_imservice_support_audio_Speex_echoCancellation(
    JNIEnv *env, jshortArray rec, jshortArray play, jshortArray out) {

    jshort echo_buf[enc_frame_size];
    jshort ref_buf[enc_frame_size];
    jshort e_buf[enc_frame_size];

    env->GetShortArrayRegion(rec, 0, enc_frame_size, echo_buf);
    env->GetShortArrayRegion(play, 0, enc_frame_size, ref_buf);

    speex_echo_cancellation(echoState, echo_buf, ref_buf, e_buf);
// speex_preprocess_run(den, e_buf);

    env->SetShortArrayRegion(out, 0, enc_frame_size, e_buf);

}
Пример #7
0
RTC::ReturnCode_t EchoCanceler::onExecute(RTC::UniqueId ec_id)
{
  RTC_DEBUG(("onExecute start"));
  if((m_indata.size() > BUFFER_MAX) || (m_outdata.size() > BUFFER_MAX)) {
    RTC_INFO(("One of buffers exceeded the maximum value. Start clear buffers."));
    BufferClr();
  }
  if (( m_indata.size() >= ECHOLEN) && (m_outdata.size() >= ECHOLEN)) {
    m_mutex.lock();
    RTC_DEBUG(("onExecute:mutex lock"));
    int i;
    short *inbuffer = new short[ECHOLEN];
    short *outbuffer = new short[ECHOLEN];
    short *result = new short[ECHOLEN];

    for ( i = 0; i < ECHOLEN; i++ ) {
      inbuffer[i] = m_indata.front();
      m_indata.pop_front();
      outbuffer[i] = m_outdata.front();
      m_outdata.pop_front();
      result[i] = 0;
    }

    m_mutex.unlock();
    RTC_DEBUG(("onExecute:mutex unlock"));
    speex_echo_cancellation(mp_sest, inbuffer, outbuffer, result);

    delete[] inbuffer;
    delete[] outbuffer;
    m_fout.data.length(ECHOLEN * 2);
    for ( i = 0; i < ECHOLEN; i++ ) {
      short val = result[i];
      m_fout.data[i*2]   = (unsigned char)(val & 0x00ff);
      m_fout.data[i*2+1] = (unsigned char)((val & 0xff00) >> 8);
    }
    delete[] result;
    setTimestamp( m_fout );
    m_foutOut.write();
    RTC_DEBUG(("onExecute:writing %d samples", m_fout.data.length() / 2));
  } else {
Пример #8
0
int sound::inputcallback(const void *input, void *output, unsigned long frameCount, const PaStreamCallbackTimeInfo *timeInfo, PaStreamCallbackFlags statusFlags, void *userData){
	sound * psound = (sound*)userData;

	if (psound->speexppstate){
		speex_preprocess_run(psound->speexppstate, (short *)input);
	}

	short * outputbuff = 0;
	while (1){
		int _end = psound->end.load();
		int _new = _end + 1;

		if (_end == psound->begin.load()){
			break;
		}

		if (_new == 16){
			_new = 0;
		}

		if (psound->end.compare_exchange_strong(_end, _new)){
			outputbuff = (short *)psound->inputbuff[_end].buf;
			break;
		}
	}

	if (psound->isopenecho == true){
		if (outputbuff != 0){
			speex_echo_cancellation(psound->speexechostate, (short*)input, outputbuff, (short*)input);
		}
	}

	char tmp[4096];
	*((short*)tmp) = psound->inputStreamParameters.channelCount;
	int len = psound->_encode.encoded((char*)input, frameCount, tmp+2, 4096);

	psound->sigCapture((char*)tmp, len + 2);

	return paContinue;
}
Пример #9
0
EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
{
   int i;
   /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
   st->play_buf_started = 1;
   if (st->play_buf_pos>=st->frame_size)
   {
      speex_echo_cancellation(st, rec, st->play_buf, out);
      st->play_buf_pos -= st->frame_size;
      for (i=0;i<st->play_buf_pos;i++)
         st->play_buf[i] = st->play_buf[i+st->frame_size];
   } else {
      speex_warning("No playback frame available (your application is buggy and/or got xruns)");
      if (st->play_buf_pos!=0)
      {
         speex_warning("internal playback buffer corruption?");
         st->play_buf_pos = 0;
      }
      for (i=0;i<st->frame_size;i++)
         out[i] = rec[i];
   }
}
Пример #10
0
int main(int argc, char **argv)
{
   FILE *echo_fd, *ref_fd, *e_fd;
   short echo_buf[NN], ref_buf[NN], e_buf[NN];
   SpeexEchoState *st;
   SpeexPreprocessState *den;
   int sampleRate = 8000;

   if (argc != 4)
   {
      fprintf(stderr, "testecho mic_signal.sw speaker_signal.sw output.sw\n");
      exit(1);
   }
   echo_fd = fopen(argv[2], "rb");
   ref_fd  = fopen(argv[1],  "rb");
   e_fd    = fopen(argv[3], "wb");

   st = speex_echo_state_init(NN, TAIL);
   den = speex_preprocess_state_init(NN, sampleRate);
   speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
   speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st);

   while (!feof(ref_fd) && !feof(echo_fd))
   {
      fread(ref_buf, sizeof(short), NN, ref_fd);
      fread(echo_buf, sizeof(short), NN, echo_fd);
      speex_echo_cancellation(st, ref_buf, echo_buf, e_buf);
      speex_preprocess_run(den, e_buf);
      fwrite(e_buf, sizeof(short), NN, e_fd);
   }
   speex_echo_state_destroy(st);
   speex_preprocess_state_destroy(den);
   fclose(e_fd);
   fclose(echo_fd);
   fclose(ref_fd);
   return 0;
}
Пример #11
0
extern "C" JNIEXPORT int Java_com_haitou_xiaoyoupai_imservice_support_audio_Speex_echoCancellationEncode(
    JNIEnv *env, jshortArray rec, jshortArray play, jbyteArray encoded) {

    jshort echo_buf[enc_frame_size];
    jshort ref_buf[enc_frame_size];
    jshort e_buf[enc_frame_size];
    jbyte output_buffer[enc_frame_size];

    env->GetShortArrayRegion(rec, 0, enc_frame_size, echo_buf);
    env->GetShortArrayRegion(play, 0, enc_frame_size, ref_buf);

    speex_echo_cancellation(echoState, echo_buf, ref_buf, e_buf);
    speex_preprocess_run(den, e_buf);

    speex_bits_reset(&ebits);

    speex_encode_int(enc_state, e_buf, &ebits);

    jint tot_bytes = speex_bits_write(&ebits, (char *) output_buffer,
                                      enc_frame_size);
    env->SetByteArrayRegion(encoded, 0, tot_bytes, output_buffer);

    return (jint) tot_bytes;
}
Пример #12
0
/*	inputs[0]= reference signal from far end (sent to soundcard)
 *	inputs[1]= near speech & echo signal	(read from soundcard)
 *	outputs[0]=  is a copy of inputs[0] to be sent to soundcard
 *	outputs[1]=  near end speech, echo removed - towards far end
*/
static void speex_ec_process(MSFilter *f){
	SpeexECState *s=(SpeexECState*)f->data;
	int nbytes=s->framesize*2;
	mblk_t *refm;
	uint8_t *ref,*echo;
	
	if (s->bypass_mode) {
		while((refm=ms_queue_get(f->inputs[0]))!=NULL){
			ms_queue_put(f->outputs[0],refm);
		}
		while((refm=ms_queue_get(f->inputs[1]))!=NULL){
			ms_queue_put(f->outputs[1],refm);
		}
		return;
	}
	
	if (f->inputs[0]!=NULL){
		if (s->echostarted){
			while((refm=ms_queue_get(f->inputs[0]))!=NULL){
				refm=audio_flow_controller_process(&s->afc,refm);
				if (refm){
					mblk_t *cp=dupmsg(refm);
					ms_bufferizer_put(&s->delayed_ref,cp);
					ms_bufferizer_put(&s->ref,refm);
				}
			}
		}else{
			ms_warning("Getting reference signal but no echo to synchronize on.");
			ms_queue_flush(f->inputs[0]);
		}
	}

	ms_bufferizer_put_from_queue(&s->echo,f->inputs[1]);
	
	ref=(uint8_t*)alloca(nbytes);
	echo=(uint8_t*)alloca(nbytes);
	while (ms_bufferizer_read(&s->echo,echo,nbytes)==nbytes){
		mblk_t *oecho=allocb(nbytes,0);
		int avail;
		int avail_samples;

		if (!s->echostarted) s->echostarted=TRUE;
		if ((avail=ms_bufferizer_get_avail(&s->delayed_ref))<((s->nominal_ref_samples*2)+nbytes)){
			/*we don't have enough to read in a reference signal buffer, inject silence instead*/
			avail=nbytes;
			refm=allocb(nbytes,0);
			memset(refm->b_wptr,0,nbytes);
			refm->b_wptr+=nbytes;
			ms_bufferizer_put(&s->delayed_ref,refm);
			ms_queue_put(f->outputs[0],dupmsg(refm));
			if (!s->using_zeroes){
				ms_warning("Not enough ref samples, using zeroes");
				s->using_zeroes=TRUE;
			}
		}else{
			if (s->using_zeroes){
				ms_message("Samples are back.");
				s->using_zeroes=FALSE;
			}
			/* read from our no-delay buffer and output */
			refm=allocb(nbytes,0);
			if (ms_bufferizer_read(&s->ref,refm->b_wptr,nbytes)==0){
				ms_fatal("Should never happen");
			}
			refm->b_wptr+=nbytes;
			ms_queue_put(f->outputs[0],refm);
		}

		/*now read a valid buffer of delayed ref samples*/
		if (ms_bufferizer_read(&s->delayed_ref,ref,nbytes)==0){
			ms_fatal("Should never happen");
		}
		avail-=nbytes;
		avail_samples=avail/2;
		/*ms_message("avail=%i",avail_samples);*/
		if (avail_samples<s->min_ref_samples || s->min_ref_samples==-1){
			s->min_ref_samples=avail_samples;
		}
		
#ifdef EC_DUMP
		if (s->reffile)
			fwrite(ref,nbytes,1,s->reffile);
		if (s->echofile)
			fwrite(echo,nbytes,1,s->echofile);
#endif
		speex_echo_cancellation(s->ecstate,(short*)echo,(short*)ref,(short*)oecho->b_wptr);
		speex_preprocess_run(s->den, (short*)oecho->b_wptr);
#ifdef EC_DUMP
		if (s->cleanfile)
			fwrite(oecho->b_wptr,nbytes,1,s->cleanfile);
#endif
		oecho->b_wptr+=nbytes;
		ms_queue_put(f->outputs[1],oecho);
	}
	
	/*verify our ref buffer does not become too big, meaning that we are receiving more samples than we are sending*/
	if ((((uint32_t)(f->ticker->time - s->flow_control_time)) >= flow_control_interval_ms) && (s->min_ref_samples != -1)) {
		int diff=s->min_ref_samples-s->nominal_ref_samples;
		if (diff>(nbytes/2)){
			int purge=diff-(nbytes/2);
			ms_warning("echo canceller: we are accumulating too much reference signal, need to throw out %i samples",purge);
			audio_flow_controller_set_target(&s->afc,purge,(flow_control_interval_ms*s->samplerate)/1000);
		}
		s->min_ref_samples=-1;
		s->flow_control_time = f->ticker->time;
	}
}
Пример #13
0
void AudioInput::encodeAudioFrame() {
	int iArg;
	ClientUser *p=ClientUser::get(g.uiSession);
	int i;
	float sum;
	short max;

	short *psSource;

	iFrameCounter++;

	if (! bRunning)
		return;

	sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psMic[i] * psMic[i]);
	dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);

	max = 1;
	for (i=0;i<iFrameSize;i++)
		max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max);
	dMaxMic = max;

	if (psSpeaker && (iEchoChannels > 0)) {
		sum=1.0f;
		for (i=0;i<iFrameSize;i++)
			sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]);
		dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);
	} else {
		dPeakSpeaker = 0.0;
	}

	QMutexLocker l(&qmSpeex);
	resetAudioProcessor();

	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_AGC_GAIN, &iArg);
	float gainValue = static_cast<float>(iArg);
	iArg = g.s.iNoiseSuppress - iArg;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

	if (sesEcho && psSpeaker) {
		speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
		speex_preprocess_run(sppPreprocess, psClean);
		psSource = psClean;
	} else {
		speex_preprocess_run(sppPreprocess, psMic);
		psSource = psMic;
	}

	sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psSource[i] * psSource[i]);
	float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
	dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f);

	spx_int32_t prob = 0;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
	fSpeechProb = static_cast<float>(prob) / 100.0f;

	// clean microphone level: peak of filtered signal attenuated by AGC gain
	dPeakCleanMic = qMax(dPeakSignal - gainValue, -96.0f);
	float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakCleanMic / 96.0f);

	bool bIsSpeech = false;

	if (level > g.s.fVADmax)
		bIsSpeech = true;
	else if (level > g.s.fVADmin && bPreviousVoice)
		bIsSpeech = true;

	if (! bIsSpeech) {
		iHoldFrames++;
		if (iHoldFrames < g.s.iVoiceHold)
			bIsSpeech = true;
	} else {
		iHoldFrames = 0;
	}

	if (g.s.atTransmit == Settings::Continous)
		bIsSpeech = true;
	else if (g.s.atTransmit == Settings::PushToTalk)
		bIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush));

	bIsSpeech = bIsSpeech || (g.iPushToTalk > 0);

	if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && (p->bMute || p->bSuppress)) || g.bPushToMute || (g.iTarget < 0)) {
		bIsSpeech = false;
	}

	if (bIsSpeech) {
		iSilentFrames = 0;
	} else {
		iSilentFrames++;
		if (iSilentFrames > 500)
			iFrameCounter = 0;
	}

	if (p) {
		if (! bIsSpeech)
			p->setTalking(Settings::Passive);
		else if (g.iTarget == 0)
			p->setTalking(Settings::Talking);
		else
			p->setTalking(Settings::Shouting);
	}

	if (g.s.bTxAudioCue && g.uiSession != 0) {
		AudioOutputPtr ao = g.ao;
		if (bIsSpeech && ! bPreviousVoice && ao)
			ao->playSample(g.s.qsTxAudioCueOn);
		else if (ao && !bIsSpeech && bPreviousVoice && ao)
			ao->playSample(g.s.qsTxAudioCueOff);
	}

	if (! bIsSpeech && ! bPreviousVoice) {
		iBitrate = 0;
		if (g.s.iIdleTime && ! g.s.bDeaf && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) {
			emit doDeaf();
			tIdle.restart();
		}
		spx_int32_t increment = 0;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment);
		return;
	} else {
		spx_int32_t increment = 12;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment);
	}

	tIdle.restart();
	/*
		int r = celt_encoder_ctl(ceEncoder, CELT_SET_POST_MDCT_CALLBACK(celtBack, NULL));
		qWarning() << "Set Callback" << r;
	*/

	unsigned char buffer[512];
	int len;

	if (umtType != MessageHandler::UDPVoiceSpeex) {
		len = encodeCELTFrame(psSource, buffer);
		if (len == 0)
			return;
	} else {
		len = encodeSpeexFrame(psSource, buffer);
	}

	flushCheck(QByteArray(reinterpret_cast<const char *>(buffer), len), ! bIsSpeech);

	if (! bIsSpeech)
		iBitrate = 0;

	bPreviousVoice = bIsSpeech;
}
Пример #14
0
void AudioInput::encodeAudioFrame() {
	int iArg;
	int i;
	float sum;
	short max;

	short *psSource;

	iFrameCounter++;

	if (! bRunning)
		return;

	sum=1.0f;
	max = 1;
	for (i=0;i<iFrameSize;i++) {
		sum += static_cast<float>(psMic[i] * psMic[i]);
		max = std::max(static_cast<short>(abs(psMic[i])), max);
	}
	dPeakMic = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);
	dMaxMic = max;

	if (psSpeaker && (iEchoChannels > 0)) {
		sum=1.0f;
		for (i=0;i<iFrameSize;i++)
			sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]);
		dPeakSpeaker = qMax(20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f), -96.0f);
	} else {
		dPeakSpeaker = 0.0;
	}

	QMutexLocker l(&qmSpeex);
	resetAudioProcessor();

	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_AGC_GAIN, &iArg);
	float gainValue = static_cast<float>(iArg);
	iArg = g.s.iNoiseSuppress - iArg;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

	if (sesEcho && psSpeaker) {
		speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
		speex_preprocess_run(sppPreprocess, psClean);
		psSource = psClean;
	} else {
		speex_preprocess_run(sppPreprocess, psMic);
		psSource = psMic;
	}

	sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psSource[i] * psSource[i]);
	float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
	dPeakSignal = qMax(20.0f*log10f(micLevel / 32768.0f), -96.0f);

	spx_int32_t prob = 0;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
	fSpeechProb = static_cast<float>(prob) / 100.0f;

	// clean microphone level: peak of filtered signal attenuated by AGC gain
	dPeakCleanMic = qMax(dPeakSignal - gainValue, -96.0f);
	float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakCleanMic / 96.0f);

	bool bIsSpeech = false;

	if (level > g.s.fVADmax)
		bIsSpeech = true;
	else if (level > g.s.fVADmin && bPreviousVoice)
		bIsSpeech = true;

	if (! bIsSpeech) {
		iHoldFrames++;
		if (iHoldFrames < g.s.iVoiceHold)
			bIsSpeech = true;
	} else {
		iHoldFrames = 0;
	}

	if (g.s.atTransmit == Settings::Continuous)
		bIsSpeech = true;
	else if (g.s.atTransmit == Settings::PushToTalk)
		bIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush));

	bIsSpeech = bIsSpeech || (g.iPushToTalk > 0);

	ClientUser *p = ClientUser::get(g.uiSession);
	if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && (p->bMute || p->bSuppress)) || g.bPushToMute || (g.iTarget < 0)) {
		bIsSpeech = false;
	}

	if (bIsSpeech) {
		iSilentFrames = 0;
	} else {
		iSilentFrames++;
		if (iSilentFrames > 500)
			iFrameCounter = 0;
	}

	if (p) {
		if (! bIsSpeech)
			p->setTalking(Settings::Passive);
		else if (g.iTarget == 0)
			p->setTalking(Settings::Talking);
		else
			p->setTalking(Settings::Shouting);
	}

	if (g.s.bTxAudioCue && g.uiSession != 0) {
		AudioOutputPtr ao = g.ao;
		if (bIsSpeech && ! bPreviousVoice && ao)
			ao->playSample(g.s.qsTxAudioCueOn);
		else if (ao && !bIsSpeech && bPreviousVoice)
			ao->playSample(g.s.qsTxAudioCueOff);
	}

	if (! bIsSpeech && ! bPreviousVoice) {
		iBitrate = 0;

		if (g.s.iaeIdleAction != Settings::Nothing && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) {

			if (g.s.iaeIdleAction == Settings::Deafen && !g.s.bDeaf) {
				tIdle.restart();
				emit doDeaf();
			} else if (g.s.iaeIdleAction == Settings::Mute && !g.s.bMute) {
				tIdle.restart();
				emit doMute();
			}
		}

		spx_int32_t increment = 0;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment);
		return;
	} else {
		spx_int32_t increment = 12;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_INCREMENT, &increment);
	}

	if (bIsSpeech && !bPreviousVoice) {
		bResetEncoder = true;
	}

	tIdle.restart();

	EncodingOutputBuffer buffer;
	Q_ASSERT(buffer.size() >= static_cast<size_t>(iAudioQuality / 100 * iAudioFrames / 8));
	
	int len = 0;

	bool encoded = true;
	if (!selectCodec())
		return;

	if (umtType == MessageHandler::UDPVoiceCELTAlpha || umtType == MessageHandler::UDPVoiceCELTBeta) {
		len = encodeCELTFrame(psSource, buffer);
		if (len <= 0) {
			iBitrate = 0;
			qWarning() << "encodeCELTFrame failed" << iBufferedFrames << iFrameSize << len;
			return;
		}
		++iBufferedFrames;
	} else if (umtType == MessageHandler::UDPVoiceOpus) {
		encoded = false;
		opusBuffer.insert(opusBuffer.end(), psSource, psSource + iFrameSize);
		++iBufferedFrames;

		if (!bIsSpeech || iBufferedFrames >= iAudioFrames) {
			if (iBufferedFrames < iAudioFrames) {
				// Stuff frame to framesize if speech ends and we don't have enough audio
				// this way we are guaranteed to have a valid framecount and won't cause
				// a codec configuration switch by suddenly using a wildly different
				// framecount per packet.
				const int missingFrames = iAudioFrames - iBufferedFrames;
				opusBuffer.insert(opusBuffer.end(), iFrameSize * missingFrames, 0);
				iBufferedFrames += missingFrames;
				iFrameCounter += missingFrames;
			}
			
			Q_ASSERT(iBufferedFrames == iAudioFrames);

			len = encodeOpusFrame(&opusBuffer[0], iBufferedFrames * iFrameSize, buffer);
			opusBuffer.clear();
			if (len <= 0) {
				iBitrate = 0;
				qWarning() << "encodeOpusFrame failed" << iBufferedFrames << iFrameSize << len;
				iBufferedFrames = 0; // These are lost. Make sure not to mess up our sequence counter next flushCheck.
				return;
			}
			encoded = true;
		}
	}

	if (encoded) {
		flushCheck(QByteArray(reinterpret_cast<char *>(&buffer[0]), len), !bIsSpeech);
	}

	if (! bIsSpeech)
		iBitrate = 0;

	bPreviousVoice = bIsSpeech;
}
Пример #15
0
/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
{
   speex_echo_cancellation(st, in, far_end, out);
}
Пример #16
0
void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *play, uint8_t *out)
{
    speex_echo_cancellation(ec->params.priv.speex.state, (const spx_int16_t *) rec, (const spx_int16_t *) play, (spx_int16_t *) out);
}
Пример #17
0
int KotiAEC_process(const int16_t* farend, const int16_t* nearend, int16_t* out)
{
    int ret = -1, i = 0, frame_size = 0;
    switch(aec_core_used)
    {
#ifdef WEBRTC_AEC_CORE_ENABLED
    case WEBRTC_AEC:
        if(farend)
            WebRtcAec_BufferFarend(webrtc_aec_pty.webrtc_aec, farend, webrtc_aec_pty.frame_size);
        if(!WebRtcAec_Process(webrtc_aec_pty.webrtc_aec, nearend, NULL, out, NULL, webrtc_aec_pty.frame_size,
                                   webrtc_aec_pty.sndcard_delay_ms, 0))
        {
            ret = 0;
        }

        if(webrtc_aec_pty.webrtc_ns)
        {
            WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out, NULL, out, NULL);
            if(webrtc_aec_pty.frame_size == 160)
                WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out+80, NULL, out+80, NULL);
        }

        if(webrtc_aec_pty.webrtc_agc)
        {
            int32_t out_c; uint8_t warn_status;
            WebRtcAgc_Process(webrtc_aec_pty.webrtc_agc, out, NULL, webrtc_aec_pty.frame_size, out, NULL, 32,
                              &out_c, 1, &warn_status);
        }

//        if(webrtc_aec_pty.webrtc_ns)
//        {
//            WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out, NULL, out, NULL);
//            if(webrtc_aec_pty.frame_size == 160)
//                WebRtcNsx_Process((NsxHandle*)webrtc_aec_pty.webrtc_ns, out+80, NULL, out+80, NULL);
//        }

        frame_size = webrtc_aec_pty.frame_size;
        break;
    case WEBRTC_AECM:
/*
        if(farend)
            WebRtcAecm_BufferFarend(webrtc_aecm_pty.webrtc_aec, farend, webrtc_aecm_pty.frame_size);
        if(!WebRtcAecm_Process(webrtc_aecm_pty.webrtc_aec, nearend, NULL, out, webrtc_aecm_pty.frame_size,
                              webrtc_aecm_pty.sndcard_delay_ms))
        {
            ret = 0;
        }
*/

	memcpy(proc_tmp_buf, nearend, webrtc_aecm_pty.frame_size*2);
        if(webrtc_aecm_pty.webrtc_ns)
        {
            WebRtcNsx_Process((NsxHandle*)webrtc_aecm_pty.webrtc_ns, proc_tmp_buf, NULL, proc_tmp_buf, NULL);
            if(webrtc_aecm_pty.frame_size == 160)
                WebRtcNsx_Process((NsxHandle*)webrtc_aecm_pty.webrtc_ns, proc_tmp_buf+80, NULL, proc_tmp_buf+80, NULL);
        }

        if(webrtc_aecm_pty.webrtc_agc)
        {
            int32_t out_c; uint8_t warn_status;
            WebRtcAgc_Process(webrtc_aecm_pty.webrtc_agc, proc_tmp_buf, NULL, webrtc_aecm_pty.frame_size, proc_tmp_buf, NULL, 32,
                              &out_c, 1, &warn_status);
        }

	// AEC
        if(farend)
            WebRtcAecm_BufferFarend(webrtc_aecm_pty.webrtc_aec, farend, webrtc_aecm_pty.frame_size);
        if(!WebRtcAecm_Process(webrtc_aecm_pty.webrtc_aec, proc_tmp_buf, NULL, out, webrtc_aecm_pty.frame_size,
                              webrtc_aecm_pty.sndcard_delay_ms))
        {
            ret = 0;
        }

        frame_size = webrtc_aecm_pty.frame_size;
        break;
#endif
    case SPEEX_AEC:
    default:
#ifdef OLD_SPEEX_AEC
        speex_echo_cancel((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, farend, out, speex_aec_pty.nosie);
        if(speex_preprocess((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out, speex_aec_pty.nosie) == 1)
            ret = 0;
#else
        if(farend)
            speex_echo_cancellation((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, farend, out);
        else
            speex_echo_capture((SpeexEchoState*)speex_aec_pty.speex_echo_state, nearend, out);
//        speex_preprocess_estimate_update((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out);
        if(speex_preprocess_run((SpeexPreprocessState*)speex_aec_pty.speex_preprocess_state, out) == 1)
            ret = 0;
#endif

        frame_size = speex_aec_pty.frame_size;
        break;
    }

    // if the output sound needed amplify
    if(output_sound_amplification != 1.0f && output_sound_amplification > 0)
    {
        for(; i<frame_size; ++i)
            out[i] = out[i]*output_sound_amplification;
    }

    return ret;
}
Пример #18
0
void AudioInput::encodeAudioFrame() {
	int iArg;
	ClientPlayer *p=ClientPlayer::get(g.uiSession);
	int i;
	float sum;
	short max;

	short *psSource;

	iFrameCounter++;

	if (! bRunning) {
		return;
	}

	sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psMic[i] * psMic[i]);
	dPeakMic=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f);
	if (dPeakMic < -96.0f)
		dPeakMic = -96.0f;

	max = 1;
	for (i=0;i<iFrameSize;i++)
		max = static_cast<short>(abs(psMic[i]) > max ? abs(psMic[i]) : max);
	dMaxMic = max;

	if (g.bEchoTest) {
		STACKVAR(float, fft, iFrameSize);
		STACKVAR(float, power, iFrameSize);
		float scale = 1.f / static_cast<float>(iFrameSize);
		for (i=0;i<iFrameSize;i++)
			fft[i] = static_cast<float>(psMic[i]) * scale;
		mumble_drft_forward(&fftTable, fft);
		float mp = 0.0f;
		int bin = 0;
		power[0]=power[1]=0.0f;
		for (i=2;i < iFrameSize / 2;i++) {
			power[i] = sqrtf(fft[2*i]*fft[2*i]+fft[2*i-1]*fft[2*i-1]);
			if (power[i] > mp) {
				bin = i;
				mp = power[i];
			}
		}
		for (i=2;i< iFrameSize / 2;i++) {
			if (power[i] * 2 > mp) {
				if (i != bin)
					bin = 0;
			}
		}
		iBestBin = bin * 2;
	}

	if (iEchoChannels > 0) {
		sum=1.0f;
		for (i=0;i<iFrameSize;i++)
			sum += static_cast<float>(psSpeaker[i] * psSpeaker[i]);
		dPeakSpeaker=20.0f*log10f(sqrtf(sum / static_cast<float>(iFrameSize)) / 32768.0f);
		if (dPeakSpeaker < -96.0f)
			dPeakSpeaker = -96.0f;
	} else {
		dPeakSpeaker = 0.0;
	}

	QMutexLocker l(&qmSpeex);

	if (bResetProcessor) {
		if (sppPreprocess)
			speex_preprocess_state_destroy(sppPreprocess);
		if (sesEcho)
			speex_echo_state_destroy(sesEcho);

		sppPreprocess = speex_preprocess_state_init(iFrameSize, SAMPLE_RATE);

		iArg = 1;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_VAD, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DENOISE, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC, &iArg);
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_DEREVERB, &iArg);

		iArg = 30000;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_TARGET, &iArg);

		float v = 30000.0f / static_cast<float>(g.s.iMinLoudness);
		iArg = lroundf(floorf(20.0f * log10f(v)));
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_AGC_MAX_GAIN, &iArg);

		iArg = g.s.iNoiseSuppress;
		speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &iArg);

		if (iEchoChannels > 0) {
			sesEcho = speex_echo_state_init(iFrameSize, iFrameSize*10);
			iArg = SAMPLE_RATE;
			speex_echo_ctl(sesEcho, SPEEX_SET_SAMPLING_RATE, &iArg);
			speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_SET_ECHO_STATE, sesEcho);

			jitter_buffer_reset(jb);
			qWarning("AudioInput: ECHO CANCELLER ACTIVE");
		} else {
			sesEcho = NULL;
		}

		iFrames = 0;
		speex_bits_reset(&sbBits);

		bResetProcessor = false;
	}

	int iIsSpeech;

	if (sesEcho) {
		speex_echo_cancellation(sesEcho, psMic, psSpeaker, psClean);
		iIsSpeech=speex_preprocess_run(sppPreprocess, psClean);
		psSource = psClean;
	} else {
		iIsSpeech=speex_preprocess_run(sppPreprocess, psMic);
		psSource = psMic;
	}

	sum=1.0f;
	for (i=0;i<iFrameSize;i++)
		sum += static_cast<float>(psSource[i] * psSource[i]);
	float micLevel = sqrtf(sum / static_cast<float>(iFrameSize));
	dPeakSignal=20.0f*log10f(micLevel / 32768.0f);
	if (dPeakSignal < -96.0f)
		dPeakSignal = -96.0f;

	spx_int32_t prob = 0;
	speex_preprocess_ctl(sppPreprocess, SPEEX_PREPROCESS_GET_PROB, &prob);
	fSpeechProb = static_cast<float>(prob) / 100.0f;

	float level = (g.s.vsVAD == Settings::SignalToNoise) ? fSpeechProb : (1.0f + dPeakMic / 96.0f);

	if (level > g.s.fVADmax)
		iIsSpeech = 1;
	else if (level > g.s.fVADmin && bPreviousVoice)
		iIsSpeech = 1;
	else
		iIsSpeech = 0;

	if (! iIsSpeech) {
		iHoldFrames++;
		if (iHoldFrames < g.s.iVoiceHold)
			iIsSpeech=1;
	} else {
		iHoldFrames = 0;
	}

	if (g.s.atTransmit == Settings::Continous)
		iIsSpeech = 1;
	else if (g.s.atTransmit == Settings::PushToTalk)
		iIsSpeech = g.s.uiDoublePush && ((g.uiDoublePush < g.s.uiDoublePush) || (g.tDoublePush.elapsed() < g.s.uiDoublePush));

	iIsSpeech = iIsSpeech || (g.iPushToTalk > 0) || (g.iAltSpeak > 0);

	if (g.s.bMute || ((g.s.lmLoopMode != Settings::Local) && p && p->bMute) || g.bPushToMute) {
		iIsSpeech = 0;
	}

	if (iIsSpeech) {
		iSilentFrames = 0;
	} else {
		iSilentFrames++;
		if (iSilentFrames > 200)
			iFrameCounter = 0;
	}

	if (p)
		p->setTalking(iIsSpeech, (g.iAltSpeak > 0));

	if (g.s.bPushClick && (g.s.atTransmit == Settings::PushToTalk)) {
		AudioOutputPtr ao = g.ao;
		if (iIsSpeech && ! bPreviousVoice && ao)
			ao->playSine(400.0f,1200.0f,5);
		else if (ao && !iIsSpeech && bPreviousVoice && ao)
			ao->playSine(620.0f,-1200.0f,5);
	}
	if (! iIsSpeech && ! bPreviousVoice) {
		iBitrate = 0;
		if (g.s.iIdleTime && ! g.s.bMute && ((tIdle.elapsed() / 1000000ULL) > g.s.iIdleTime)) {
			emit doMute();
			tIdle.restart();
		}
		return;
	}

	bPreviousVoice = iIsSpeech;

	tIdle.restart();

	if (! iIsSpeech) {
		memset(psMic, 0, sizeof(short) * iFrameSize);
	}

	if (g.s.bTransmitPosition && g.p && ! g.bCenterPosition && (iFrames == 0) && g.p->fetch()) {
		QByteArray q;
		QDataStream ds(&q, QIODevice::WriteOnly);
		ds << g.p->fPosition[0];
		ds << g.p->fPosition[1];
		ds << g.p->fPosition[2];

		speex_bits_pack(&sbBits, 13, 5);
		speex_bits_pack(&sbBits, q.size(), 4);

		const unsigned char *d=reinterpret_cast<const unsigned char*>(q.data());
		for (i=0;i<q.size();i++) {
			speex_bits_pack(&sbBits, d[i], 8);
		}
	}

	speex_encode_int(esEncState, psSource, &sbBits);
	iFrames++;

	speex_encoder_ctl(esEncState, SPEEX_GET_BITRATE, &iBitrate);

	flushCheck();
}