static void SpectralEnvelopeEstimation(double *x, int x_length, WorldParameters *world_parameters) { CheapTrickOption option = {0}; InitializeCheapTrickOption(&option); // This value may be better one for HMM speech synthesis. // Default value is -0.09. option.q1 = -0.15; // Important notice (2016/02/02) // You can control a parameter used for the lowest F0 in speech. // You must not set the f0_floor to 0. // It will cause a fatal error because fft_size indicates the infinity. // You must not change the f0_floor after memory allocation. // You should check the fft_size before excucing the analysis/synthesis. // The default value (71.0) is strongly recommended. // On the other hand, setting the lowest F0 of speech is a good choice // to reduce the fft_size. option.f0_floor = 71.0; // Parameters setting and memory allocation. world_parameters->fft_size = GetFFTSizeForCheapTrick(world_parameters->fs, &option); world_parameters->spectrogram = (double **) malloc(sizeof(double *) * (world_parameters->f0_length)); for (int i = 0; i < world_parameters->f0_length; ++i) { world_parameters->spectrogram[i] = (double*) malloc(sizeof(double) * (world_parameters->fft_size / 2 + 1)); } DWORD elapsed_time = timeGetTime(); CheapTrick(x, x_length, world_parameters->fs, world_parameters->time_axis, world_parameters->f0, world_parameters->f0_length, &option, world_parameters->spectrogram); printf("CheapTrick: %d [msec]\n", timeGetTime() - elapsed_time); }
void InitializeCheapTrickOption(int fs, CheapTrickOption *option) { // q1 is the parameter used for the spectral recovery. // Since The parameter is optimized, you don't need to change the parameter. option->q1 = -0.15; // f0_floor and fs are used to determine fft_size; // We strongly recommend not to change this value unless you have enough // knowledge of the signal processing in CheapTrick. option->f0_floor = world::kFloorF0; option->fft_size = GetFFTSizeForCheapTrick(fs, option); }
void CheapTrick(double *x, int x_length, int fs, double *time_axis, double *f0, int f0_length, double **spectrogram) { int fft_size = GetFFTSizeForCheapTrick(fs); double *spectral_envelope = new double[fft_size]; ForwardRealFFT forward_real_fft = {0}; InitializeForwardRealFFT(fft_size, &forward_real_fft); InverseRealFFT inverse_real_fft = {0}; InitializeInverseRealFFT(fft_size, &inverse_real_fft); double current_f0; for (int i = 0; i < f0_length; ++i) { current_f0 = f0[i] <= world::kFloorF0 ? world::kDefaultF0 : f0[i]; CheapTrickGeneralBody(x, x_length, fs, current_f0, fft_size, time_axis[i], &forward_real_fft, &inverse_real_fft, spectral_envelope); for (int j = 0; j <= fft_size / 2; ++j) spectrogram[i][j] = spectral_envelope[j]; } DestroyForwardRealFFT(&forward_real_fft); DestroyInverseRealFFT(&inverse_real_fft); delete[] spectral_envelope; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return 0; } int fs, nbit, x_length; double *x = wavread(argv[1], &fs, &nbit, &x_length); if (CheckLoadedFile(x, fs, nbit, x_length) == false) { printf("error: File not found.\n"); return 0; } // Allocate memories // The number of samples for F0 int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD); double *f0 = new double[f0_length]; double *time_axis = new double[f0_length]; // FFT size for CheapTrick int fft_size = GetFFTSizeForCheapTrick(fs); double **spectrogram = new double *[f0_length]; double **aperiodicity = new double *[f0_length]; for (int i = 0; i < f0_length; ++i) { spectrogram[i] = new double[fft_size / 2 + 1]; aperiodicity[i] = new double[fft_size / 2 + 1]; } // F0 estimation F0Estimation(x, x_length, fs, f0_length, f0, time_axis); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length, spectrogram); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length, fft_size, aperiodicity); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, f0, f0_length, spectrogram); // The length of the output waveform int y_length = static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size, FRAMEPERIOD, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); printf("complete.\n"); delete[] x; delete[] time_axis; delete[] f0; delete[] y; for (int i = 0; i < f0_length; ++i) { delete[] spectrogram[i]; delete[] aperiodicity[i]; } delete[] spectrogram; delete[] aperiodicity; return 0; }