PJ_DEF(pj_bool_t) pjmedia_silence_det_detect( pjmedia_silence_det *sd, const pj_int16_t samples[], pj_size_t count, pj_int32_t *p_level) { pj_uint32_t level; /* Calculate average signal level. */ level = pjmedia_calc_avg_signal(samples, count); /* Report to caller, if required. */ if (p_level) *p_level = level; return pjmedia_silence_det_apply(sd, level); }
/* * Perform echo cancellation. */ PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state, pj_int16_t *rec_frm, const pj_int16_t *play_frm, unsigned options, void *reserved ) { unsigned i, N; echo_supp *ec = (echo_supp*) state; PJ_UNUSED_ARG(options); PJ_UNUSED_ARG(reserved); /* Calculate number of segments. This should be okay even if * samples_per_frame is not a multiply of samples_per_segment, since * we only calculate level. */ N = ec->samples_per_frame / ec->samples_per_segment; pj_assert(N>0); for (i=0; i<N; ++i) { unsigned pos = i * ec->samples_per_segment; echo_supp_update(ec, rec_frm+pos, play_frm+pos); } if (ec->tail_index < 0) { /* Not ready */ } else { unsigned lookup_cnt, rec_level=0, play_level=0; unsigned tail_cnt; float factor; /* How many previous segments to lookup */ lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME; if (lookup_cnt > ec->templ_cnt) lookup_cnt = ec->templ_cnt; /* Lookup in recording history to get maximum mic level, to see * if local user is currently talking */ for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) { if (ec->rec_hist[i] > rec_level) rec_level = ec->rec_hist[i]; } rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF; /* Calculate the detected tail length, in # of segments */ tail_cnt = (ec->tail_cnt - ec->tail_index); /* Lookup in playback history to get max speaker level, to see * if remote user is currently talking */ for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt; i<ec->play_hist_cnt-tail_cnt; ++i) { if (ec->play_hist[i] > play_level) play_level = ec->play_hist[i]; } play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF; if (rec_level >= MIN_SIGNAL_ULAW) { if (play_level < MIN_SIGNAL_ULAW) { /* Mic is talking, speaker is idle. Let mic signal pass as is. */ factor = 1.0; echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level); } else if (rec_level > play_level) { /* Seems that both are talking. Scale the mic signal * down a little bit to reduce echo, while allowing both * parties to talk at the same time. */ factor = (float)(ec->avg_factor[ec->tail_index] * 2); echo_supp_set_state(ec, ST_DOUBLETALK, rec_level); } else { /* Speaker is active, but we've picked up large signal in * the microphone. Assume that this is an echo, so bring * the level down to minimum too. */ factor = ec->min_factor[ec->tail_index] / 2; echo_supp_set_state(ec, ST_REM_TALK, play_level); } } else { if (play_level < MIN_SIGNAL_ULAW) { /* Both mic and speaker seems to be idle. Also scale the * mic signal down with average factor to reduce low power * echo. */ factor = ec->avg_factor[ec->tail_index] * 3 / 2; echo_supp_set_state(ec, ST_REM_SILENT, rec_level); } else { /* Mic is idle, but there's something playing in speaker. * Scale the mic down to minimum */ factor = ec->min_factor[ec->tail_index] / 2; echo_supp_set_state(ec, ST_REM_TALK, play_level); } } /* Smoothen the transition */ if (factor >= ec->last_factor) factor = (factor + ec->last_factor) / 2; else factor = (factor + ec->last_factor*19) / 20; /* Amplify frame */ amplify_frame(rec_frm, ec->samples_per_frame, pj_ufloat_from_float(factor)); ec->last_factor = factor; if (ec->talk_state == ST_REM_TALK) { unsigned level, recalc_cnt; /* Get the adjusted frame signal level */ level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame); level = pjmedia_linear2ulaw(level) ^ 0xFF; /* Accumulate average echo residue to see the ES effectiveness */ ec->residue = ((ec->residue * ec->running_cnt) + level) / (ec->running_cnt + 1); ++ec->running_cnt; /* Check if we need to re-learn */ recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame; if (ec->running_cnt > recalc_cnt) { int iresidue; iresidue = (int)(ec->residue*1000); PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d", iresidue/1000, iresidue%1000)); if (ec->residue > MAX_RESIDUE && !ec->learning) { echo_supp_soft_reset(ec); ec->residue = 0; } else { ec->running_cnt = 0; ec->residue = 0; } } } } return PJ_SUCCESS; }
/* * Update EC state */ static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm, const pj_int16_t *play_frm) { int prev_index; unsigned i, j, frm_level, sum_play_level, ulaw; pj_uint16_t old_rec_frm_level, old_play_frm_level; float play_corr; ++ec->update_cnt; if (ec->update_cnt > 0x7FFFFFFF) ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */ /* Calculate current play frame level */ frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment); ++frm_level; /* to avoid division by zero */ /* Save the oldest frame level for later */ old_play_frm_level = ec->play_hist[0]; /* Push current frame level to the back of the play history */ pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0); ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level; /* Calculate level of current mic frame */ frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment); ++frm_level; /* to avoid division by zero */ /* Save the oldest frame level for later */ old_rec_frm_level = ec->rec_hist[0]; /* Push to the back of the rec history */ pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0); ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level; /* Can't do the calc until the play history is full. */ if (ec->update_cnt < ec->play_hist_cnt) return; /* Skip if learning is done */ if (!ec->learning) return; /* Calculate rec signal pattern */ if (ec->sum_rec_level == 0) { /* Buffer has just been filled up, do full calculation */ ec->rec_corr = 0; ec->sum_rec_level = 0; for (i=0; i < ec->templ_cnt-1; ++i) { float corr; corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i]; ec->rec_corr += corr; ec->sum_rec_level += ec->rec_hist[i]; } ec->sum_rec_level += ec->rec_hist[i]; } else { /* Update from previous calculation */ ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level + ec->rec_hist[ec->templ_cnt-1]; ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] / old_rec_frm_level) + ((float)ec->rec_hist[ec->templ_cnt-1] / ec->rec_hist[ec->templ_cnt-2]); } /* Iterate through the play history and calculate the signal correlation * for every tail position in the play_hist. Save the result in temporary * array since we may bail out early if the conversation state is not good * to detect echo. */ /* * First phase: do full calculation for the first position */ if (ec->sum_play_level0 == 0) { /* Buffer has just been filled up, do full calculation */ sum_play_level = 0; play_corr = 0; for (j=0; j<ec->templ_cnt-1; ++j) { float corr; corr = (float)ec->play_hist[j+1] / ec->play_hist[j]; play_corr += corr; sum_play_level += ec->play_hist[j]; } sum_play_level += ec->play_hist[j]; ec->sum_play_level0 = sum_play_level; ec->play_corr0 = play_corr; } else { /* Update from previous calculation */ ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level + ec->play_hist[ec->templ_cnt-1]; ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] / old_play_frm_level) + ((float)ec->play_hist[ec->templ_cnt-1] / ec->play_hist[ec->templ_cnt-2]); sum_play_level = ec->sum_play_level0; play_corr = ec->play_corr0; } ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr); ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level; /* Bail out if remote isn't talking */ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; if (ulaw < MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_REM_SILENT, ulaw); return; } /* Bail out if local user is talking */ if (ec->sum_rec_level >= sum_play_level) { echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); return; } /* * Second phase: do incremental calculation for the rest of positions */ for (i=1; i < ec->tail_cnt; ++i) { unsigned end; end = i + ec->templ_cnt; sum_play_level = sum_play_level - ec->play_hist[i-1] + ec->play_hist[end-1]; play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) + ((float)ec->play_hist[end-1]/ec->play_hist[end-2]); /* Bail out if remote isn't talking */ ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF; if (ulaw < MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_REM_SILENT, ulaw); return; } /* Bail out if local user is talking */ if (ec->sum_rec_level >= sum_play_level) { echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw); return; } #if 0 // disabled: not a good idea if mic throws out loud echo /* Also bail out if we suspect there's a doubletalk */ ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF; if (ulaw > MIN_SIGNAL_ULAW) { echo_supp_set_state(ec, ST_DOUBLETALK, ulaw); return; } #endif /* Calculate correlation and save to temporary array */ ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr); /* Also calculate the gain factor between mic and speaker level */ ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level; pj_assert(ec->tmp_factor[i] < 1); } /* We seem to have good signal, we can update the EC state */ echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW); /* Accummulate the correlation value to the history and at the same * time find the tail index of the best correlation. */ prev_index = ec->tail_index; for (i=1; i<ec->tail_cnt-1; ++i) { float *p = &ec->corr_sum[i], sum; /* Accummulate correlation value for this tail position */ ec->corr_sum[i] += ec->tmp_corr[i]; /* Update the min and avg gain factor for this tail position */ if (ec->tmp_factor[i] < ec->min_factor[i]) ec->min_factor[i] = ec->tmp_factor[i]; ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + ec->tmp_factor[i]) / (ec->tail_cnt + 1); /* To get the best correlation, also include the correlation * value of the neighbouring tail locations. */ sum = *(p-1) + (*p)*2 + *(p+1); //sum = *p; /* See if we have better correlation value */ if (sum < ec->best_corr) { ec->tail_index = i; ec->best_corr = sum; } } if (ec->tail_index != prev_index) { unsigned duration; int imin, iavg; duration = ec->update_cnt * SEGMENT_PTIME; imin = (int)(ec->min_factor[ec->tail_index] * 1000); iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); PJ_LOG(4,(THIS_FILE, "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec" ", factor min/avg=%d.%03d/%d.%03d", (duration/1000), (duration%1000), (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, imin/1000, imin%1000, iavg/1000, iavg%1000)); } ++ec->calc_cnt; if (ec->calc_cnt > ec->max_calc) { unsigned duration; int imin, iavg; ec->learning = PJ_FALSE; ec->running_cnt = 0; duration = ec->update_cnt * SEGMENT_PTIME; imin = (int)(ec->min_factor[ec->tail_index] * 1000); iavg = (int)(ec->avg_factor[ec->tail_index] * 1000); PJ_LOG(4,(THIS_FILE, "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms" ", factor min/avg=%d.%03d/%d.%03d", (duration/1000), (duration%1000), (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME, imin/1000, imin%1000, iavg/1000, iavg%1000)); } }
/* * main() */ int main(int argc, char *argv[]) { enum { NSAMPLES = 640, COUNT=100 }; pj_caching_pool cp; pjmedia_endpt *med_endpt; pj_pool_t *pool; pjmedia_port *file_port; int i; pj_status_t status; /* Verify cmd line arguments. */ if (argc != 2) { puts(""); puts(desc); return 1; } /* Must init PJLIB first: */ status = pj_init(0); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); /* Must create a pool factory before we can allocate any memory. */ pj_caching_pool_init(0, &cp, &pj_pool_factory_default_policy, 0); /* * Initialize media endpoint. * This will implicitly initialize PJMEDIA too. */ //status = pjmedia_endpt_create(&cp.factory, NULL, 1, &med_endpt); // charles modified status = pjmedia_endpt_create(0, &cp.factory, NULL, 1, 0, &med_endpt); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); /* Create memory pool for our file player */ pool = pj_pool_create( &cp.factory, /* pool factory */ "wav", /* pool name. */ 4000, /* init size */ 4000, /* increment size */ NULL /* callback on error */ ); /* Create file media port from the WAV file */ status = pjmedia_wav_player_port_create( pool, /* memory pool */ argv[1], /* file to play */ 0, /* use default ptime*/ 0, /* flags */ 0, /* default buffer */ &file_port/* returned port */ ); if (status != PJ_SUCCESS) { app_perror(THIS_FILE, "Unable to use WAV file", status); return 1; } if (file_port->info.samples_per_frame > NSAMPLES) { app_perror(THIS_FILE, "WAV clock rate is too big", PJ_EINVAL); return 1; } puts("Time\tPCMU\tLinear"); puts("------------------------"); for (i=0; i<COUNT; ++i) { pj_int16_t framebuf[NSAMPLES]; pjmedia_frame frm; pj_int32_t level32; unsigned ms; int level; frm.buf = framebuf; frm.size = sizeof(framebuf); pjmedia_port_get_frame(file_port, &frm); level32 = pjmedia_calc_avg_signal(framebuf, file_port->info.samples_per_frame); level = pjmedia_linear2ulaw(level32) ^ 0xFF; ms = i * 1000 * file_port->info.samples_per_frame / file_port->info.clock_rate; printf("%03d.%03d\t%7d\t%7d\n", ms/1000, ms%1000, level, level32); } puts(""); /* Destroy file port */ status = pjmedia_port_destroy( file_port ); PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1); /* Release application pool */ pj_pool_release( pool ); /* Destroy media endpoint. */ pjmedia_endpt_destroy( med_endpt ); /* Destroy pool factory */ pj_caching_pool_destroy( &cp ); /* Shutdown PJLIB */ pj_shutdown(0); /* Done. */ return 0; }