int main (int argc, char *argv[]) { char *filename_AG = NULL; char *filename_WD = NULL; int num_WD = 0; char *filename_AGWD = NULL; char *filename_ALN = NULL; float thresholdWD = 0.00; unsigned int p0AG = 0; unsigned int p0WD = 0; switch (argc) { case 2: if (strcmp(argv [1], "--help") == 0) { welcome(); help(); return 0; } break; case 17: if (strcmp(argv [1], "--sequence") == 0 && strcmp(argv [3], "--wd") == 0 && strcmp(argv [5], "--num") == 0 && strcmp(argv [7], "--out") == 0 && strcmp(argv [9], "--aln") == 0 && strcmp(argv [11], "--th") == 0 && strcmp(argv [13], "--pwd") == 0 && strcmp(argv [15], "--pag") == 0) { filename_AG = argv[2]; filename_WD = argv[4]; assert(sscanf(argv [6], "%d", &num_WD) == 1); filename_AGWD = argv[8]; filename_ALN = argv[10]; assert(sscanf(argv [12], "%f", &thresholdWD) == 1); assert(sscanf(argv [14], "%d", &p0WD) == 1); assert(sscanf(argv [16], "%d", &p0AG) == 1); } else return -1; break; default: welcome(); help(); return 0; break; } /* The num_WD variable is not needed any more. * I keep it just for because it helps me while * reading at the log files. */ printf("Working on WD %d\n", num_WD); /* Here I create the PCM objects, crucial for * PCM data import. * pcmWD contains the WD data (extracted from the US stream) * pcmAG contains the AG sequence, as shipped by the AG500 machine from hell */ mObjectPCM<int16_t> *pcmWD; mObjectPCM<int16_t> *pcmAG; pcmWD = new mObjectPCM<int16_t>(MPCM_STEREO, MPCM_STREAM_TINY); pcmAG = new mObjectPCM<int16_t>(MPCM_MONO, MPCM_STREAM_TINY); /* Read the previoulsy described WAV files */ mAudio2PCM(filename_WD, 0, 0, pcmWD); mAudio2PCM(filename_AG, 0, 0, pcmAG); /* Free unused memory */ pcmWD->Trim(); pcmAG->Trim(); /* Here I dump the PCM informations in handy buffers * bufferWD contains the WD data, right channel of the US stream * bufferSN contains the SN (sync) data, left channel of the US stream * bufferAG contains the AG SEQ data. This buffer will be used r/w * for peak detection. The original information is gonna * be lost. In fact... * bufferAG_copy ...contains a copy of the AG SEQ data */ int16_t *bufferWD = NULL; int16_t *bufferSN = NULL; int16_t *bufferAG = NULL; int16_t *bufferAG_copy = NULL; /* A size for almost each buffer. Almost each means * that bufferWD and bufferSN have the same size samplesWD. * Also bufferAG and its copy bufferAG_copy have the same size, * but they are used for processing, so I prefere to keep two * sizes. */ unsigned int samplesWD; unsigned int samplesAG; unsigned int samplesAG_copy; /* Fill the buffers... */ pcmWD->ExportBuffers(&bufferSN, &bufferWD, samplesWD); pcmAG->ExportBuffer(&bufferAG, samplesAG); pcmAG->ExportBuffer(&bufferAG_copy, samplesAG_copy); /* ... and release the unused memory. */ delete pcmAG; delete pcmWD; free(bufferSN); /* Threshold the WD and AG buffers. * AG buffer thresholding uses a standard value * WD buffer requires the same value used by pdetect2. * The value is stored in config/lm_pdetect.cfg */ #ifdef USE_SAT_INVERT negative(bufferWD, samplesWD); negative(bufferAG, samplesAG); #endif threshold(bufferWD, samplesWD, thresholdWD); threshold(bufferAG, samplesAG, AG_TH); #ifdef DEVELOP /* Now is time to look at the thresholded signals */ mEncodePCM16("debug/th_wd.wav", bufferWD, NULL, samplesWD, 48000, 1); mEncodePCM16("debug/th_ag.wav", bufferAG, NULL, samplesAG, 16000, 1); #endif /* Remove spikes and artifacts. * clean_spurious_spikes runs twice, just because I'm used * to work on the clean_spikes code. It's a preventive bug-fix. */ unsigned int peaksWD; clean_spurious_spikes(bufferWD, samplesWD); peaksWD = clean_spikes(bufferWD, samplesWD, US_DS_PEAK, US_DS_ARTI); clean_spurious_spikes(bufferWD, samplesWD); unsigned int peaksAG; clean_spurious_spikes(bufferAG, samplesAG); //peaksAG = clean_spikes(bufferAG, samplesAG, AG_DS_PEAK, AG_DS_ARTI); peaksAG = clean_spikes(bufferAG, samplesAG, 10, AG_DS_ARTI); clean_spurious_spikes(bufferAG, samplesAG); printf("Peaks found on RAW data:\n"); printf(" WD data: %d\n", peaksWD); printf(" AG data: %d\n", peaksAG); #ifdef DEVELOP /* Now is time to look at those spiky signals */ mEncodePCM16("debug/p_wd.wav", bufferWD, NULL, samplesWD, 48000, 1); mEncodePCM16("debug/p_ag.wav", bufferAG, NULL, samplesAG, 16000, 1); #endif /* Sample-and-Hold AG signal (the spiky one) * This method works fine with spiky signals since * it samples and holds a sample value for samplesAGR * times. * Q: Why 3? * A: AG = 16kHz, WD = 48kHz. I want to compare the WD spiky * signal with the resampled version or the AG spiky signal, find * the WD peaks there, calculate the lag, crop the AG signal rescaling * bach the AGR sample values. */ unsigned int samplesAGR = 3 * samplesAG; int16_t *bufferAGR = (int16_t *)malloc(samplesAGR * sizeof(int16_t)); memset(bufferAGR, 0, samplesAGR * sizeof(int16_t)); resample_linear(bufferAG, bufferAGR, samplesAG, 3, 0); #ifdef DEVELOP /* Now is time to look at the spiky signal, resampled. */ mEncodePCM16("debug/p_agr.wav", bufferAGR, NULL, samplesAGR, 48000, 1); #endif /* Collect the peaks found on the AGR data */ WD_peaks peaks_dataAGR; memset(&peaks_dataAGR, 0, sizeof(WD_peaks)); find_peaks(bufferAGR, samplesAGR, peaks_dataAGR, 0); /* Collect the peaks found on the WD data */ WD_peaks peaks_dataWD; memset(&peaks_dataWD, 0, sizeof(WD_peaks)); find_peaks(bufferWD, samplesWD, peaks_dataWD, 0); printf("Peaks found on cleaned data:\n"); printf(" WD data: %d\n", peaks_dataWD.tot); printf(" AGR data: %d\n", peaks_dataAGR.tot); #ifdef DEVELOP printf("Peaks AGR:\n"); print_peaks(peaks_dataAGR); printf("Peaks WD:\n"); print_peaks(peaks_dataWD); #endif /* Sequence peak-consistency check. * Controls if the sequence starts with 3 +SAT peaks * and if it ends with 3 -SAT peaks. */ bool start_ok = check_sequence_start(peaks_dataAGR); bool stop_ok = check_sequence_stop(peaks_dataAGR); printf("AGR sequence peak-consistency:\n"); printf(" Start: %s\n", start_ok ? "passed" : "failed"); printf(" Stop: %s\n", stop_ok ? "passed" : "failed"); /* Two cases here: * - If we are aligning using the WD start peak, it means * that the WD stop peak is/is not distorted/broken/ghosted, * so the WD start peak will be the first one in * peaks_dataWD. * - If we are aligning using the WD stop peak, it means * that the WD start peak is broken. * Just one peak should be present in peaks_dataWD * So, p0WD_idx is alwais equal to 0! */ unsigned int p0WD_idx = 0; printf("Peaks used for the alignment:\n"); printf(" AGR: %d (value=%d, lenght=%d)\n", p0AG, peaks_dataAGR.type[p0AG], peaks_dataAGR.length[p0AG]); printf(" WD: %d (value=%d, lenght=%d)\n", p0WD, peaks_dataWD.type[p0WD_idx], peaks_dataWD.length[p0WD_idx]); /* Let's use AGR as reference and find where the WD * word is, calculating the lag between the two signals. */ unsigned int s0AGR = peaks_dataAGR.start[p0AG] - peaks_dataWD.start[p0WD_idx]; unsigned int s1AGR = s0AGR + samplesWD; /* if(p0WD == 0) assert(peaks_dataWD.tot == 2); */ if(p0WD == 1) { printf("Error: user supplied pwd does not match WD peak data\n"); assert(peaks_dataWD.tot == 1); } /* unsigned int s0AGR = 0; unsigned int s1AGR = 0; if(p0WD == 0) { s0AGR = peaks_dataAGR.start[p0AG] - peaks_dataWD.start[p0WD]; s1AGR = s0AGR + samplesWD; } else { s0AGR = peaks_dataAGR.start[p0AG] - peaks_dataWD.start[p0WD_idx]; s1AGR = s0AGR + samplesWD; } */ /* Scale the AGR samples values back to 16kHz (for AG)... */ unsigned int s0AG = s0AGR/3; unsigned int s1AG = s1AGR/3; /* ...and to 200Hz for AGAMP/POS. */ unsigned int s0AGAMP = s0AG/80; unsigned int s1AGAMP = s1AG/80; /* Done! As usual, put spam on the term... */ printf("Results, in full ALN fashion:\n"); printf(" AGR: %d:%d\n", s0AGR, s1AGR); printf(" AG: %d:%d\n", s0AG, s1AG); printf(" AGAMP: %d:%d\n", s0AGAMP, s1AGAMP); /* I'm ready to save a cropped version of the AG SEQ file, that * matches in lenght the WD word. * Here I crop... */ unsigned int samplesAGWD = s1AG - s0AG + 1; int16_t *bufferAGWD = (int16_t *)malloc(samplesAGWD * sizeof(int16_t)); /* .. and here I put ham on some audio file... */ memcpy(bufferAGWD, bufferAG_copy + s0AG, samplesAGWD * sizeof(int16_t)); mEncodePCM16(filename_AGWD, bufferAGWD, NULL, samplesAGWD, 16000, 1); /* .. and spam on some ALN file. */ FILE *file_align = fopen(filename_ALN, "w"); fprintf(file_align, "%d/%d/", s0AGR, s1AGR); fprintf(file_align, "%d/%d/", s0AG, s1AG); fprintf(file_align, "%d/%d\n", s0AGAMP, s1AGAMP); fclose(file_align); /* Cleaning up memory */ free(bufferWD); free(bufferAG); free(bufferAG_copy); free(bufferAGR); return 0; }
int main (int argc, char *argv[]) { mObjectPCM<int16_t> *pcmAG; mObjectPCM<int16_t> *pcmUS; pcmAG = new mObjectPCM<int16_t>(MPCM_MONO, MPCM_STREAM_TINY); pcmUS = new mObjectPCM<int16_t>(MPCM_MONO, MPCM_STREAM_TINY); /* Read WAV files */ mAudio2PCM(FILE_AG, 0, 0, pcmAG); mAudio2PCM(FILE_US, 0, 0, pcmUS); /* Free unused memory */ pcmAG->Trim(); pcmUS->Trim(); int16_t *bufferAG = NULL; int16_t *bufferUS = NULL; unsigned int samplesAG; unsigned int samplesUS; /* Get buffers */ pcmAG->ExportBuffer(&bufferAG, samplesAG); pcmUS->ExportBuffer(&bufferUS, samplesUS); /* Not needed any more... */ delete pcmAG; delete pcmUS; /* Threshold */ threshold(bufferUS, samplesUS, US_TH); threshold(bufferAG, samplesAG, AG_TH); /* Remove spikes and artifacts */ unsigned int peaksUS1; unsigned int peaksAG1; peaksUS1 = clean_spikes(bufferUS, samplesUS, US_DS_PEAK, US_DS_ARTI); peaksAG1 = clean_spikes(bufferAG, samplesAG, AG_DS_PEAK, AG_DS_ARTI); #ifdef DEVELOP /* Now is time to look at the signals */ mEncodePCM16("temp/agp.wav", bufferAG, NULL, samplesAG, 16000, 1); mEncodePCM16("temp/usp.wav", bufferUS, NULL, samplesUS, 48000, 1); #endif /* After some Dumb(TM) processing, I can resample * the signals, find (for each peak) its median sample * and then measure the delay between the singnals! * Using this method, US and AG speech should be well * aligned. Also, the estimated error should be less * than 12 samples at 48kHz, that means the maximum * drift in time should be around 0.00025 seconds. */ /* First of all, I set that the resapled AG signal * will last as long as the US signal. */ unsigned int samplesAGR = 3 * samplesAG; assert(!(samplesAGR > samplesUS)); if(samplesAGR < samplesUS) samplesAGR = samplesUS; /* Secondly, I alloc a buffer */ int16_t *bufferAGR = (int16_t *)malloc(samplesAGR * sizeof(int16_t)); memset(bufferAGR, 0, samplesAGR * sizeof(int16_t)); #ifdef DEBUG /* Some spamming on the term */ printf("AG samples: %d\n", samplesAG); printf("AGR samples: %d\n", samplesAGR); printf("US samples: %d\n", samplesUS); #endif /* Dumb(TM) resampling (sample and hold)! */ resample_linear(bufferAG, bufferAGR, samplesAG, 3, 0); /* for(unsigned int s = 0; s < samplesAG; s++) { if(bufferAG[s] != 0) { bufferAGR[3*s + 0] = bufferAG[s]; bufferAGR[3*s + 1] = bufferAG[s]; bufferAGR[3*s + 2] = bufferAG[s]; } } */ /* Cool! US+AG speech in a single stereo file */ mEncodePCM16("temp/syn.wav", bufferUS, bufferAGR, samplesUS, 48000, 2); unsigned int peaksUS2; unsigned int peaksAG2; peaksUS2 = median(bufferUS, samplesUS); peaksAG2 = median(bufferAGR, samplesAGR); /* Let's check media values */ mEncodePCM16("temp/med.wav", bufferUS, bufferAGR, samplesUS, 48000, 2); printf("AGR Peaks: %d-->%d\n", peaksAG1, peaksAG2); printf("US Peaks: %d-->%d\n", peaksUS1, peaksUS2); assert (peaksUS2 == peaksAG2); unsigned int firstAGR = 0; unsigned int firstUS = 0; firstUS = first(bufferUS, samplesUS); firstAGR = first(bufferAGR, samplesAGR); printf("AGR First Peak: %d\n", firstAGR); printf("US First Peak: %d\n", firstUS); printf("Delay First Peak: %d\n", firstAGR - firstUS); unsigned int *peaksAGR; unsigned int *peaksUS; peaksAGR = (unsigned int *)malloc(peaksAG2 * sizeof(unsigned int)); peaksUS = (unsigned int *)malloc(peaksUS2 * sizeof(unsigned int)); memset(peaksAGR, 0, peaksAG2 * sizeof(unsigned int)); memset(peaksUS, 0, peaksUS2 * sizeof(unsigned int)); unsigned int peaksUS3; unsigned int peaksAG3; peaksAG3 = find_peak_values(bufferAGR, samplesAGR, peaksAGR); peaksUS3 = find_peak_values(bufferUS, samplesUS, peaksUS); printf("AGR Peaks: %d\n", peaksAG3); printf("US Peaks: %d\n", peaksUS3); FILE *file_drift = fopen("temp/peaks_drift.txt", "w"); if(file_drift == NULL) return -1; for(unsigned int p = 0; p < peaksUS3; p++) fprintf(file_drift, "%.03d/%d/%d/%d\n", p, peaksUS[p], peaksAGR[p], peaksUS[p] - peaksAGR[p]); fclose(file_drift); FILE *file_dist = fopen("temp/peaks_distance.txt", "w"); if(file_dist == NULL) return -1; for(unsigned int p = 5; p < peaksUS3 - 5; p += 2) { fprintf(file_dist, "%.3d-%.3d/%d/%d/%d\n", p, p - 1, peaksUS[p] - peaksUS[p - 1], peaksAGR[p] - peaksAGR[p - 1], (peaksUS[p] - peaksUS[p - 1]) - (peaksAGR[p] - peaksAGR[p - 1])); } fclose(file_dist); /* Cleaning up memory */ free(peaksUS); free(peaksAGR); free(bufferAGR); free(bufferAG); free(bufferUS); return 0; }