//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return 0; } int fs, nbit, x_length; double *x = wavread(argv[1], &fs, &nbit, &x_length); if (CheckLoadedFile(x, fs, nbit, x_length) == false) { printf("error: File not found.\n"); return 0; } // Allocate memories // The number of samples for F0 int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD); double *f0 = new double[f0_length]; double *time_axis = new double[f0_length]; // FFT size for CheapTrick int fft_size = GetFFTSizeForCheapTrick(fs); double **spectrogram = new double *[f0_length]; double **aperiodicity = new double *[f0_length]; for (int i = 0; i < f0_length; ++i) { spectrogram[i] = new double[fft_size / 2 + 1]; aperiodicity[i] = new double[fft_size / 2 + 1]; } // F0 estimation F0Estimation(x, x_length, fs, f0_length, f0, time_axis); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length, spectrogram); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length, fft_size, aperiodicity); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, f0, f0_length, spectrogram); // The length of the output waveform int y_length = static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size, FRAMEPERIOD, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); printf("complete.\n"); delete[] x; delete[] time_axis; delete[] f0; delete[] y; for (int i = 0; i < f0_length; ++i) { delete[] spectrogram[i]; delete[] aperiodicity[i]; } delete[] spectrogram; delete[] aperiodicity; return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = new double[x_length]; // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation // DIO // F0EstimationDio(x, x_length, &world_parameters); // Harvest F0EstimationHarvest(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part (2016/04/19) // There are three samples in speech synthesis // 1: Conventional synthesis // 2: Example of real-time synthesis // 3: Example of real-time synthesis (Ring buffer is efficiently used) //--------------------------------------------------------------------------- char filename[100]; // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis 1 (conventional synthesis) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis(&world_parameters, fs, y_length, y); sprintf(filename, "01%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 2 (All frames are added at the same time) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis2(&world_parameters, fs, y_length, y); sprintf(filename, "02%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 3 (Ring buffer is efficiently used.) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis3(&world_parameters, fs, y_length, y); sprintf(filename, "03%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); delete[] y; delete[] x; DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
/** * Main function * */ int main(int argc, char *argv[]) { if (argc != 5) { fprintf(stderr, "%s <input_f0_file> <input_spectrum_file> <input_aperiodicity_file> <output_wav_file>", argv[0]); return EXIT_FAILURE; } // Define a default filled structures WorldParameters world_parameters; world_parameters.fs = 22050; // FIXME: hardcoded value world_parameters.f0_length = filesize(argv[1]) / sizeof(double); world_parameters.fft_size = ((filesize(argv[2]) / (sizeof(double) * world_parameters.f0_length)) - 1) * 2; // Be careful that .sp contains only first half of the spectrum std::cout << "fft size = " << world_parameters.fft_size << std::endl; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = DEFAULT_FRAME_PERIOD; //--------------------------------------------------------------------------- // Prepare memory //--------------------------------------------------------------------------- world_parameters.f0 = new double[world_parameters.f0_length]; world_parameters.spectrogram = new double*[world_parameters.f0_length]; for (int i=0;i<world_parameters.f0_length; i++) world_parameters.spectrogram[i] = new double[world_parameters.fft_size / 2 + 1]; world_parameters.aperiodicity = new double*[world_parameters.f0_length]; for (int i=0;i<world_parameters.f0_length; i++) world_parameters.aperiodicity[i] = new double[world_parameters.fft_size / 2 + 1]; //--------------------------------------------------------------------------- // Loading //--------------------------------------------------------------------------- // F0 loading std::ifstream is_f0(argv[1], std::ios::binary | std::ios::in); if ( !is_f0.is_open() ) return false; is_f0.read(reinterpret_cast<char*>(world_parameters.f0), std::streamsize(world_parameters.f0_length*sizeof(double))); // for (int i=0; i<world_parameters.f0_length; i++) // std::cout << world_parameters.f0[i] << std::endl; is_f0.close(); // Spectrogram loading std::ifstream is_spectrogram(argv[2], std::ios::binary | std::ios::in); if ( !is_spectrogram.is_open() ) return false; for (int i=0; i<world_parameters.f0_length; i++) { is_spectrogram.read(reinterpret_cast<char*>(world_parameters.spectrogram[i]), std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double))); } is_spectrogram.close(); // Aperiodicity loading std::ifstream is_aperiodicity(argv[3], std::ios::binary | std::ios::in); if ( !is_aperiodicity.is_open() ) return false; for (int i=0; i<world_parameters.f0_length; i++) { is_aperiodicity.read(reinterpret_cast<char*>(world_parameters.aperiodicity[i]), std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double))); } is_aperiodicity.close(); //--------------------------------------------------------------------------- // Synthesis //--------------------------------------------------------------------------- int y_length = static_cast<int>((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * world_parameters.fs) + 1; double *y = new double[y_length]; for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis(&world_parameters, y); wavwrite(y, y_length, world_parameters.fs, 16, argv[4]); //--------------------------------------------------------------------------- // Cleaning part //--------------------------------------------------------------------------- delete[] y; DestroyMemory(&world_parameters); std::cout << "complete" << std::endl; return EXIT_SUCCESS; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = (double*) malloc(sizeof(double) * (x_length)); // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation F0Estimation(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = (int)((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = (double*) malloc(sizeof(double) * (y_length)); // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); if(y != NULL) { free(y); y = NULL; } if(x != NULL) { free(x); x = NULL; } DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 7) { printf("command: synth FFT_length sampling_rate F0_file spectrogram_file aperiodicity_file output_waveform\n"); return -2; } int fft_size = atoi(argv[1]); int fs = atoi(argv[2]); // compute n bands from fs as in d4c.cpp:325 int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 - world::kFrequencyInterval) / world::kFrequencyInterval); WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; world_parameters.fft_size = fft_size; // find number of frames (doubles) in f0 file: struct stat st; if (stat(argv[3], &st) == -1) { printf("cannot read f0\n"); return -2; } int f0_length = (st.st_size / sizeof(double)); world_parameters.f0_length = f0_length; // printf("%d\n", f0_length); world_parameters.f0 = new double[f0_length]; FILE *fp; fp = fopen(argv[3], "rb"); for (int i = 0; i < f0_length; i++) { fread(&world_parameters.f0[i], sizeof(double), 1, fp); } fclose(fp); double **coarse_aperiodicities = new double *[world_parameters.f0_length]; world_parameters.aperiodicity = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.aperiodicity[i] = new double[fft_size / 2 + 1]; coarse_aperiodicities[i] = new double[number_of_aperiodicities]; } world_parameters.spectrogram = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.spectrogram[i] = new double[fft_size / 2 + 1]; } fp = fopen(argv[4], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < fft_size / 2 + 1; j++) { fread(&world_parameters.spectrogram[i][j], sizeof(double), 1, fp); } } fclose(fp); // aper fp = fopen(argv[5], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < number_of_aperiodicities; j++) { fread(&coarse_aperiodicities[i][j], sizeof(double), 1, fp); } } fclose(fp); // convert bandaps to full aperiodic spectrum by interpolation (originally in d4c extraction): // Linear interpolation to convert the coarse aperiodicity into its // spectral representation. // -- for interpolating -- double *coarse_aperiodicity = new double[number_of_aperiodicities + 2]; coarse_aperiodicity[0] = -60.0; coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0; double *coarse_frequency_axis = new double[number_of_aperiodicities + 2]; for (int i = 0; i <= number_of_aperiodicities; ++i) coarse_frequency_axis[i] = static_cast<double>(i) * world::kFrequencyInterval; coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0; double *frequency_axis = new double[fft_size / 2 + 1]; for (int i = 0; i <= fft_size / 2; ++i) frequency_axis[i] = static_cast<double>(i) * fs / fft_size; // ---- for (int i = 0; i < f0_length; ++i) { // load band ap values for this frame into coarse_aperiodicity for (int k = 0; k < number_of_aperiodicities; ++k) { coarse_aperiodicity[k+1] = coarse_aperiodicities[i][k]; } interp1(coarse_frequency_axis, coarse_aperiodicity, number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1, world_parameters.aperiodicity[i]); for (int j = 0; j <= fft_size / 2; ++j) world_parameters.aperiodicity[i][j] = pow(10.0, world_parameters.aperiodicity[i][j] / 20.0); } //printf("%d %d\n", world_parameters.f0_length, fs); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[6]); delete[] y; DestroyMemory(&world_parameters); for (int i=0; i<f0_length; i++){ delete[] coarse_aperiodicities[i]; } delete[] coarse_aperiodicities; delete[] coarse_aperiodicity; delete[] frequency_axis; printf("complete %s.\n", argv[6]); return 0; }