void D4C(const double *x, int x_length, int fs, const double *time_axis, const double *f0, int f0_length, int fft_size, const D4COption *option, double **aperiodicity) { int fft_size_d4c = static_cast<int>(pow(2.0, 1.0 + static_cast<int>(log(4.0 * fs / world::kFloorF0 + 1) / world::kLog2))); ForwardRealFFT forward_real_fft = {0}; InitializeForwardRealFFT(fft_size_d4c, &forward_real_fft); int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 - world::kFrequencyInterval) / world::kFrequencyInterval); // Since the window function is common in D4CGeneralBody(), // it is designed here to speed up. int window_length = static_cast<int>(world::kFrequencyInterval * fft_size_d4c / fs) * 2 + 1; double *window = new double[window_length]; NuttallWindow(window_length, window); double *coarse_aperiodicity = new double[number_of_aperiodicities + 2]; coarse_aperiodicity[0] = -60.0; coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0; double *coarse_frequency_axis = new double[number_of_aperiodicities + 2]; for (int i = 0; i <= number_of_aperiodicities; ++i) coarse_frequency_axis[i] = static_cast<double>(i) * world::kFrequencyInterval; coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0; double *frequency_axis = new double[fft_size / 2 + 1]; for (int i = 0; i <= fft_size / 2; ++i) frequency_axis[i] = static_cast<double>(i) * fs / fft_size; for (int i = 0; i < f0_length; ++i) { if (f0[i] == 0) { for (int j = 0; j <= fft_size / 2; ++j) aperiodicity[i][j] = 0.0; continue; } D4CGeneralBody(x, x_length, fs, MyMaxDouble(f0[i], world::kFloorF0), fft_size_d4c, time_axis[i], number_of_aperiodicities, window, window_length, &forward_real_fft, &coarse_aperiodicity[1]); // Linear interpolation to convert the coarse aperiodicity into its // spectral representation. interp1(coarse_frequency_axis, coarse_aperiodicity, number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1, aperiodicity[i]); for (int j = 0; j <= fft_size / 2; ++j) aperiodicity[i][j] = pow(10.0, aperiodicity[i][j] / 20.0); } DestroyForwardRealFFT(&forward_real_fft); delete[] coarse_frequency_axis; delete[] coarse_aperiodicity; delete[] window; delete[] frequency_axis; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 7) { printf("command: synth FFT_length sampling_rate F0_file spectrogram_file aperiodicity_file output_waveform\n"); return -2; } int fft_size = atoi(argv[1]); int fs = atoi(argv[2]); // compute n bands from fs as in d4c.cpp:325 int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 - world::kFrequencyInterval) / world::kFrequencyInterval); WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; world_parameters.fft_size = fft_size; // find number of frames (doubles) in f0 file: struct stat st; if (stat(argv[3], &st) == -1) { printf("cannot read f0\n"); return -2; } int f0_length = (st.st_size / sizeof(double)); world_parameters.f0_length = f0_length; // printf("%d\n", f0_length); world_parameters.f0 = new double[f0_length]; FILE *fp; fp = fopen(argv[3], "rb"); for (int i = 0; i < f0_length; i++) { fread(&world_parameters.f0[i], sizeof(double), 1, fp); } fclose(fp); double **coarse_aperiodicities = new double *[world_parameters.f0_length]; world_parameters.aperiodicity = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.aperiodicity[i] = new double[fft_size / 2 + 1]; coarse_aperiodicities[i] = new double[number_of_aperiodicities]; } world_parameters.spectrogram = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.spectrogram[i] = new double[fft_size / 2 + 1]; } fp = fopen(argv[4], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < fft_size / 2 + 1; j++) { fread(&world_parameters.spectrogram[i][j], sizeof(double), 1, fp); } } fclose(fp); // aper fp = fopen(argv[5], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < number_of_aperiodicities; j++) { fread(&coarse_aperiodicities[i][j], sizeof(double), 1, fp); } } fclose(fp); // convert bandaps to full aperiodic spectrum by interpolation (originally in d4c extraction): // Linear interpolation to convert the coarse aperiodicity into its // spectral representation. // -- for interpolating -- double *coarse_aperiodicity = new double[number_of_aperiodicities + 2]; coarse_aperiodicity[0] = -60.0; coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0; double *coarse_frequency_axis = new double[number_of_aperiodicities + 2]; for (int i = 0; i <= number_of_aperiodicities; ++i) coarse_frequency_axis[i] = static_cast<double>(i) * world::kFrequencyInterval; coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0; double *frequency_axis = new double[fft_size / 2 + 1]; for (int i = 0; i <= fft_size / 2; ++i) frequency_axis[i] = static_cast<double>(i) * fs / fft_size; // ---- for (int i = 0; i < f0_length; ++i) { // load band ap values for this frame into coarse_aperiodicity for (int k = 0; k < number_of_aperiodicities; ++k) { coarse_aperiodicity[k+1] = coarse_aperiodicities[i][k]; } interp1(coarse_frequency_axis, coarse_aperiodicity, number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1, world_parameters.aperiodicity[i]); for (int j = 0; j <= fft_size / 2; ++j) world_parameters.aperiodicity[i][j] = pow(10.0, world_parameters.aperiodicity[i][j] / 20.0); } //printf("%d %d\n", world_parameters.f0_length, fs); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[6]); delete[] y; DestroyMemory(&world_parameters); for (int i=0; i<f0_length; i++){ delete[] coarse_aperiodicities[i]; } delete[] coarse_aperiodicities; delete[] coarse_aperiodicity; delete[] frequency_axis; printf("complete %s.\n", argv[6]); return 0; }