//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return 0; } int fs, nbit, x_length; double *x = wavread(argv[1], &fs, &nbit, &x_length); if (CheckLoadedFile(x, fs, nbit, x_length) == false) { printf("error: File not found.\n"); return 0; } // Allocate memories // The number of samples for F0 int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD); double *f0 = new double[f0_length]; double *time_axis = new double[f0_length]; // FFT size for CheapTrick int fft_size = GetFFTSizeForCheapTrick(fs); double **spectrogram = new double *[f0_length]; double **aperiodicity = new double *[f0_length]; for (int i = 0; i < f0_length; ++i) { spectrogram[i] = new double[fft_size / 2 + 1]; aperiodicity[i] = new double[fft_size / 2 + 1]; } // F0 estimation F0Estimation(x, x_length, fs, f0_length, f0, time_axis); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length, spectrogram); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length, fft_size, aperiodicity); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, f0, f0_length, spectrogram); // The length of the output waveform int y_length = static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size, FRAMEPERIOD, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); printf("complete.\n"); delete[] x; delete[] time_axis; delete[] f0; delete[] y; for (int i = 0; i < f0_length; ++i) { delete[] spectrogram[i]; delete[] aperiodicity[i]; } delete[] spectrogram; delete[] aperiodicity; return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = (double*) malloc(sizeof(double) * (x_length)); // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation F0Estimation(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = (int)((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = (double*) malloc(sizeof(double) * (y_length)); // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); if(y != NULL) { free(y); y = NULL; } if(x != NULL) { free(x); x = NULL; } DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = new double[x_length]; // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation // DIO // F0EstimationDio(x, x_length, &world_parameters); // Harvest F0EstimationHarvest(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part (2016/04/19) // There are three samples in speech synthesis // 1: Conventional synthesis // 2: Example of real-time synthesis // 3: Example of real-time synthesis (Ring buffer is efficiently used) //--------------------------------------------------------------------------- char filename[100]; // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis 1 (conventional synthesis) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis(&world_parameters, fs, y_length, y); sprintf(filename, "01%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 2 (All frames are added at the same time) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis2(&world_parameters, fs, y_length, y); sprintf(filename, "02%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 3 (Ring buffer is efficiently used.) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis3(&world_parameters, fs, y_length, y); sprintf(filename, "03%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); delete[] y; delete[] x; DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
/** * Main function * */ int main(int argc, char *argv[]) { if (argc != 5) { std::cerr << argv[0] << "<input_wav_file> <output_f0_file> <output_spectrum_file> <output_aperiodicity_file>" << std::endl; return EXIT_FAILURE; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) std::cerr << "error: File \"" << argv[1] << "\" not found" << std::endl; else std::cerr << "error: File \"" << argv[1] << "\" is not a .wav format" << std::endl; return EXIT_FAILURE; } double *x = new double[x_length]; // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // F0 estimation F0Estimation(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); std::cout << "fft size = " << world_parameters.fft_size << std::endl; //--------------------------------------------------------------------------- // Saving part //--------------------------------------------------------------------------- // F0 saving std::ofstream out_f0(argv[2], std::ios::out | std::ios::binary); if(!out_f0) { std::cerr << "Cannot open file: " << argv[2] << std::endl; return EXIT_FAILURE; } out_f0.write(reinterpret_cast<const char*>(world_parameters.f0), std::streamsize(world_parameters.f0_length * sizeof(double))); out_f0.close(); // Spectrogram saving std::ofstream out_spectrogram(argv[3], std::ios::out | std::ios::binary); if(!out_spectrogram) { std::cerr << "Cannot open file: " << argv[3] << std::endl; return EXIT_FAILURE; } // write the sampling frequency out_spectrogram.write(reinterpret_cast<const char*>(&world_parameters.fs), std::streamsize( sizeof(world_parameters.fs) ) ); // write the sampling frequency out_spectrogram.write(reinterpret_cast<const char*>(&world_parameters.frame_period), std::streamsize( sizeof(world_parameters.frame_period) ) ); // write the spectrogram data for (int i=0; i<world_parameters.f0_length; i++) { out_spectrogram.write(reinterpret_cast<const char*>(world_parameters.spectrogram[i]), std::streamsize((world_parameters.fft_size / 2 + 1) * sizeof(double))); } out_spectrogram.close(); // Aperiodicity saving std::ofstream out_aperiodicity(argv[4], std::ios::out | std::ios::binary); if(!out_aperiodicity) { std::cerr << "Cannot open file: " << argv[4] << std::endl; return EXIT_FAILURE; } for (int i=0; i<world_parameters.f0_length; i++) { out_aperiodicity.write(reinterpret_cast<const char*>(world_parameters.aperiodicity[i]), std::streamsize((world_parameters.fft_size / 2 + 1) * sizeof(double))); } out_aperiodicity.close(); //--------------------------------------------------------------------------- // Cleaning part //--------------------------------------------------------------------------- delete[] x; DestroyMemory(&world_parameters); std::cout << "complete" << std::endl; return EXIT_SUCCESS; }