static int _ms_audio_diff_one_chunk(int16_t *s1, int16_t *s2, int nsamples, int max_shift_samples, int nchannels, double *ret, int64_t *s1_energy, ProgressContext *pctx){ int xcorr_size; int max_index_r; int max_index_l; int max_pos; ProgressContext local_pctx; int64_t er, el; xcorr_size=max_shift_samples*2; if (nchannels == 2){ float *xcorr_r = ms_new0(float, xcorr_size); float *xcorr_l = ms_new0(float, xcorr_size); double max = 0; double max_r, max_l; int i; progress_context_push(pctx, &local_pctx, 0.5); max_index_r = compute_cross_correlation(s1, nsamples, s2, xcorr_r, xcorr_size, &local_pctx, 2, &er); max_r = xcorr_r[max_index_r]; progress_context_pop(pctx, &local_pctx); progress_context_push(pctx, &local_pctx, 0.5); max_index_l = compute_cross_correlation(s1 + 1, nsamples, s2 + 1, xcorr_l, xcorr_size, &local_pctx, 2, &el); max_l = xcorr_l[max_index_l]; progress_context_pop(pctx, &local_pctx); max_pos = 0; /*sum the square of r and l xcorr signals to determine the global maximum*/ for (i = 0; i < max_shift_samples; ++i){ xcorr_r[i] = xcorr_r[i]*xcorr_r[i] + xcorr_l[i]*xcorr_l[i]; if (xcorr_r[i] > max){ max = xcorr_r[i]; max_pos = i; } } max = sqrt(max/2); ms_message("chunk - max stereo cross-correlation obtained at position [%i,%i], similarity factor=%g,%g", max_index_r-max_shift_samples,max_index_l-max_shift_samples,max_r, max_l); max_pos = max_pos - max_shift_samples; ms_message("chunk - max stereo overall cross-correlation obtained at position [%i], similarity factor=[%g]", max_pos, max); *ret = max; if (s1_energy) *s1_energy = (er + el)/2; ms_free(xcorr_r); ms_free(xcorr_l); }else{
int ms_audio_diff(const char *file1, const char *file2, double *ret, int max_shift_percent, MSAudioDiffProgressNotify func, void *user_data){ FileInfo *fi1,*fi2; int64_t *xcorr; int xcorr_size; int max_shift_samples; int max_index_r; int max_index_l; double max_r, max_l; *ret=0; fi1=file_info_new(file1); if (fi1==NULL) return 0; fi2=file_info_new(file2); if (fi2==NULL){ file_info_destroy(fi1); return -1; } if (fi1->rate!=fi2->rate){ ms_error("Comparing files of different sampling rates is not supported (%d vs %d)", fi1->rate, fi2->rate); return -1; } if (fi1->nchannels!=fi2->nchannels){ ms_error("Comparing files with different number of channels is not supported (%d vs %d)", fi1->nchannels, fi2->nchannels); return -1; } file_info_compute_energy(fi1); file_info_compute_energy(fi2); if (fi1->energy_r==0 || fi2->energy_r==0){ /*avoid division by zero*/ ms_error("One of the two files is pure silence."); return -1; } max_shift_samples = MIN(fi1->nsamples, fi2->nsamples) * MIN(MAX(1, max_shift_percent), 100) / 100; xcorr_size=max_shift_samples*2; xcorr=ms_new0(int64_t,xcorr_size); if (fi1->nchannels == 2){ max_index_r=compute_cross_correlation_interleaved(fi1->buffer,fi1->nsamples,fi2->buffer,fi2->nsamples,xcorr,xcorr_size, func, user_data, 0, max_shift_samples); max_r=xcorr[max_index_r]; ms_message("max_r=%g", (double)max_r); max_r/=sqrt((double)fi1->energy_r*(double)fi2->energy_r); max_index_l=compute_cross_correlation_interleaved(fi1->buffer,fi1->nsamples,fi2->buffer,fi2->nsamples,xcorr,xcorr_size, func, user_data, 1, max_shift_samples); max_l=xcorr[max_index_l]; ms_message("max_l=%g", (double)max_l); max_l/=sqrt((double)fi1->energy_l*(double)fi2->energy_l); ms_message("Max stereo cross-correlation obtained at position [%i,%i], similarity factor=%g,%g", max_index_r-max_shift_samples,max_index_l-max_shift_samples,max_r, max_l); *ret = 0.5 * (fabs(max_r) + fabs(max_l)) * (1 - (double)abs(max_index_r-max_index_l)/(double)xcorr_size); }else{ max_index_r=compute_cross_correlation(fi1->buffer,fi1->nsamples,fi2->buffer,fi2->nsamples,xcorr,xcorr_size, func, user_data, max_shift_samples); max_r=xcorr[max_index_r]; max_r/=(sqrt(fi1->energy_r)*sqrt(fi2->energy_r)); *ret=max_r; ms_message("Max cross-correlation obtained at position [%i], similarity factor=%g",max_index_r-max_shift_samples,*ret); } ms_free(xcorr); file_info_destroy(fi1); file_info_destroy(fi2); return 0; }