Beispiel #1
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
    if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
        printf("error\n");
        return 0;
    }
    int fs, nbit, x_length;
    double *x = wavread(argv[1], &fs, &nbit, &x_length);

    if (CheckLoadedFile(x, fs, nbit, x_length) == false) {
        printf("error: File not found.\n");
        return 0;
    }

    // Allocate memories
    // The number of samples for F0
    int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD);
    double *f0 = new double[f0_length];
    double *time_axis = new double[f0_length];

    // FFT size for CheapTrick
    int fft_size = GetFFTSizeForCheapTrick(fs);
    double **spectrogram = new double *[f0_length];
    double **aperiodicity = new double *[f0_length];
    for (int i = 0; i < f0_length; ++i) {
        spectrogram[i] = new double[fft_size / 2 + 1];
        aperiodicity[i] = new double[fft_size / 2 + 1];
    }

    // F0 estimation
    F0Estimation(x, x_length, fs, f0_length, f0, time_axis);

    // Spectral envelope estimation
    SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length,
                               spectrogram);

    // Aperiodicity estimation by D4C
    AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length,
                           fft_size, aperiodicity);

    // Note that F0 must not be changed until all parameters are estimated.
    ParameterModification(argc, argv, fs, f0, f0_length, spectrogram);

    // The length of the output waveform
    int y_length =
        static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1;
    double *y = new double[y_length];
    // Synthesis
    WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size,
                      FRAMEPERIOD, fs, y_length, y);

    // Output
    wavwrite(y, y_length, fs, 16, argv[2]);

    printf("complete.\n");

    delete[] x;
    delete[] time_axis;
    delete[] f0;
    delete[] y;
    for (int i = 0; i < f0_length; ++i) {
        delete[] spectrogram[i];
        delete[] aperiodicity[i];
    }
    delete[] spectrogram;
    delete[] aperiodicity;

    return 0;
}
Beispiel #2
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
    printf("error\n");
    return -2;
  }

  // 2016/01/28: Important modification.
  // Memory allocation is carried out in advanse.
  // This is for compatibility with C language.
  int x_length = GetAudioLength(argv[1]);
  if (x_length <= 0) {
    if (x_length == 0)
      printf("error: File not found.\n");
    else
      printf("error: The file is not .wav format.\n");
    return -1;
  }
  double *x = (double*) malloc(sizeof(double) * (x_length));
  // wavread() must be called after GetAudioLength().
  int fs, nbit;
  wavread(argv[1], &fs, &nbit, x);
  DisplayInformation(fs, nbit, x_length);

  //---------------------------------------------------------------------------
  // Analysis part
  //---------------------------------------------------------------------------
  // 2016/02/02
  // A new struct is introduced to implement safe program.
  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;

  // F0 estimation
  F0Estimation(x, x_length, &world_parameters);

  // Spectral envelope estimation
  SpectralEnvelopeEstimation(x, x_length, &world_parameters);

  // Aperiodicity estimation by D4C
  AperiodicityEstimation(x, x_length, &world_parameters);

  // Note that F0 must not be changed until all parameters are estimated.
  ParameterModification(argc, argv, fs, world_parameters.f0_length,
    world_parameters.fft_size, world_parameters.f0,
    world_parameters.spectrogram);

  //---------------------------------------------------------------------------
  // Synthesis part
  //---------------------------------------------------------------------------
  // The length of the output waveform
  int y_length = (int)((world_parameters.f0_length - 1) *
    world_parameters.frame_period / 1000.0 * fs) + 1;
  double *y = (double*) malloc(sizeof(double) * (y_length));
  // Synthesis
  WaveformSynthesis(&world_parameters, fs, y_length, y);

  // Output
  wavwrite(y, y_length, fs, 16, argv[2]);

  if(y != NULL) {
    free(y);
    y = NULL;
  }
  if(x != NULL) {
    free(x);
    x = NULL;
  }
  DestroyMemory(&world_parameters);

  printf("complete.\n");
  return 0;
}
Beispiel #3
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
    printf("error\n");
    return -2;
  }

  // 2016/01/28: Important modification.
  // Memory allocation is carried out in advanse.
  // This is for compatibility with C language.
  int x_length = GetAudioLength(argv[1]);
  if (x_length <= 0) {
    if (x_length == 0)
      printf("error: File not found.\n");
    else
      printf("error: The file is not .wav format.\n");
    return -1;
  }
  double *x = new double[x_length];
  // wavread() must be called after GetAudioLength().
  int fs, nbit;
  wavread(argv[1], &fs, &nbit, x);
  DisplayInformation(fs, nbit, x_length);

  //---------------------------------------------------------------------------
  // Analysis part
  //---------------------------------------------------------------------------
  // 2016/02/02
  // A new struct is introduced to implement safe program.
  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;

  // F0 estimation
  // DIO
  // F0EstimationDio(x, x_length, &world_parameters);
  // Harvest
  F0EstimationHarvest(x, x_length, &world_parameters);

  // Spectral envelope estimation
  SpectralEnvelopeEstimation(x, x_length, &world_parameters);

  // Aperiodicity estimation by D4C
  AperiodicityEstimation(x, x_length, &world_parameters);

  // Note that F0 must not be changed until all parameters are estimated.
  ParameterModification(argc, argv, fs, world_parameters.f0_length,
    world_parameters.fft_size, world_parameters.f0,
    world_parameters.spectrogram);

  //---------------------------------------------------------------------------
  // Synthesis part (2016/04/19)
  // There are three samples in speech synthesis
  // 1: Conventional synthesis
  // 2: Example of real-time synthesis
  // 3: Example of real-time synthesis (Ring buffer is efficiently used)
  //---------------------------------------------------------------------------
  char filename[100];
  // The length of the output waveform
  int y_length = static_cast<int>((world_parameters.f0_length - 1) *
    world_parameters.frame_period / 1000.0 * fs) + 1;
  double *y = new double[y_length];

  // Synthesis 1 (conventional synthesis)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis(&world_parameters, fs, y_length, y);
  sprintf(filename, "01%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  // Synthesis 2 (All frames are added at the same time)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis2(&world_parameters, fs, y_length, y);
  sprintf(filename, "02%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  // Synthesis 3 (Ring buffer is efficiently used.)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis3(&world_parameters, fs, y_length, y);
  sprintf(filename, "03%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  delete[] y;
  delete[] x;
  DestroyMemory(&world_parameters);

  printf("complete.\n");
  return 0;
}
Beispiel #4
0
/**
 * Main function
 *
 */
int main(int argc, char *argv[])
{
    if (argc != 5)
    {
        std::cerr << argv[0] << "<input_wav_file> <output_f0_file> <output_spectrum_file> <output_aperiodicity_file>" << std::endl;
        return EXIT_FAILURE;
    }

    // 2016/01/28: Important modification.
    // Memory allocation is carried out in advanse.
    // This is for compatibility with C language.
    int x_length = GetAudioLength(argv[1]);
    if (x_length <= 0) {
        if (x_length == 0)
            std::cerr << "error: File \"" << argv[1] << "\" not found"  << std::endl;
        else
            std::cerr << "error: File \"" << argv[1] << "\" is not a .wav format"  << std::endl;
        return EXIT_FAILURE;
    }
    double *x = new double[x_length];

    // wavread() must be called after GetAudioLength().
    int fs, nbit;
    wavread(argv[1], &fs, &nbit, x);
    DisplayInformation(fs, nbit, x_length);

    // 2016/02/02
    // A new struct is introduced to implement safe program.
    WorldParameters world_parameters;

    // You must set fs and frame_period before analysis/synthesis.
    world_parameters.fs = fs;

    // 5.0 ms is the default value.
    // Generally, the inverse of the lowest F0 of speech is the best.
    // However, the more elapsed time is required.
    world_parameters.frame_period = 5.0;


    //---------------------------------------------------------------------------
    // Analysis part
    //---------------------------------------------------------------------------

    // F0 estimation
    F0Estimation(x, x_length, &world_parameters);

    // Spectral envelope estimation
    SpectralEnvelopeEstimation(x, x_length, &world_parameters);

    // Aperiodicity estimation by D4C
    AperiodicityEstimation(x, x_length, &world_parameters);

    std::cout << "fft size = " << world_parameters.fft_size << std::endl;

    //---------------------------------------------------------------------------
    // Saving part
    //---------------------------------------------------------------------------

    // F0 saving
    std::ofstream out_f0(argv[2],  std::ios::out | std::ios::binary);
    if(!out_f0)
    {
        std::cerr << "Cannot open file: " << argv[2] << std::endl;
        return EXIT_FAILURE;
    }

    out_f0.write(reinterpret_cast<const char*>(world_parameters.f0),
                 std::streamsize(world_parameters.f0_length * sizeof(double)));
    out_f0.close();

    // Spectrogram saving
    std::ofstream out_spectrogram(argv[3],  std::ios::out | std::ios::binary);
    if(!out_spectrogram)
    {
        std::cerr << "Cannot open file: " << argv[3] << std::endl;
        return EXIT_FAILURE;
    }

    // write the sampling frequency
    out_spectrogram.write(reinterpret_cast<const char*>(&world_parameters.fs),
                 std::streamsize( sizeof(world_parameters.fs) ) );

    // write the sampling frequency
    out_spectrogram.write(reinterpret_cast<const char*>(&world_parameters.frame_period),
                 std::streamsize( sizeof(world_parameters.frame_period) ) );

    // write the spectrogram data
    for (int i=0; i<world_parameters.f0_length; i++)
    {
        out_spectrogram.write(reinterpret_cast<const char*>(world_parameters.spectrogram[i]),
                              std::streamsize((world_parameters.fft_size / 2 + 1) * sizeof(double)));
    }

    out_spectrogram.close();

    // Aperiodicity saving
    std::ofstream out_aperiodicity(argv[4],  std::ios::out | std::ios::binary);
    if(!out_aperiodicity)
    {
        std::cerr << "Cannot open file: " << argv[4] << std::endl;
        return EXIT_FAILURE;
    }

    for (int i=0; i<world_parameters.f0_length; i++)
    {
        out_aperiodicity.write(reinterpret_cast<const char*>(world_parameters.aperiodicity[i]),
                               std::streamsize((world_parameters.fft_size / 2 + 1) * sizeof(double)));
    }

    out_aperiodicity.close();


    //---------------------------------------------------------------------------
    // Cleaning part
    //---------------------------------------------------------------------------
    delete[] x;
    DestroyMemory(&world_parameters);

    std::cout << "complete" << std::endl;
    return EXIT_SUCCESS;
}