Esempio n. 1
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
    if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
        printf("error\n");
        return 0;
    }
    int fs, nbit, x_length;
    double *x = wavread(argv[1], &fs, &nbit, &x_length);

    if (CheckLoadedFile(x, fs, nbit, x_length) == false) {
        printf("error: File not found.\n");
        return 0;
    }

    // Allocate memories
    // The number of samples for F0
    int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD);
    double *f0 = new double[f0_length];
    double *time_axis = new double[f0_length];

    // FFT size for CheapTrick
    int fft_size = GetFFTSizeForCheapTrick(fs);
    double **spectrogram = new double *[f0_length];
    double **aperiodicity = new double *[f0_length];
    for (int i = 0; i < f0_length; ++i) {
        spectrogram[i] = new double[fft_size / 2 + 1];
        aperiodicity[i] = new double[fft_size / 2 + 1];
    }

    // F0 estimation
    F0Estimation(x, x_length, fs, f0_length, f0, time_axis);

    // Spectral envelope estimation
    SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length,
                               spectrogram);

    // Aperiodicity estimation by D4C
    AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length,
                           fft_size, aperiodicity);

    // Note that F0 must not be changed until all parameters are estimated.
    ParameterModification(argc, argv, fs, f0, f0_length, spectrogram);

    // The length of the output waveform
    int y_length =
        static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1;
    double *y = new double[y_length];
    // Synthesis
    WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size,
                      FRAMEPERIOD, fs, y_length, y);

    // Output
    wavwrite(y, y_length, fs, 16, argv[2]);

    printf("complete.\n");

    delete[] x;
    delete[] time_axis;
    delete[] f0;
    delete[] y;
    for (int i = 0; i < f0_length; ++i) {
        delete[] spectrogram[i];
        delete[] aperiodicity[i];
    }
    delete[] spectrogram;
    delete[] aperiodicity;

    return 0;
}
Esempio n. 2
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
    printf("error\n");
    return -2;
  }

  // 2016/01/28: Important modification.
  // Memory allocation is carried out in advanse.
  // This is for compatibility with C language.
  int x_length = GetAudioLength(argv[1]);
  if (x_length <= 0) {
    if (x_length == 0)
      printf("error: File not found.\n");
    else
      printf("error: The file is not .wav format.\n");
    return -1;
  }
  double *x = new double[x_length];
  // wavread() must be called after GetAudioLength().
  int fs, nbit;
  wavread(argv[1], &fs, &nbit, x);
  DisplayInformation(fs, nbit, x_length);

  //---------------------------------------------------------------------------
  // Analysis part
  //---------------------------------------------------------------------------
  // 2016/02/02
  // A new struct is introduced to implement safe program.
  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;

  // F0 estimation
  // DIO
  // F0EstimationDio(x, x_length, &world_parameters);
  // Harvest
  F0EstimationHarvest(x, x_length, &world_parameters);

  // Spectral envelope estimation
  SpectralEnvelopeEstimation(x, x_length, &world_parameters);

  // Aperiodicity estimation by D4C
  AperiodicityEstimation(x, x_length, &world_parameters);

  // Note that F0 must not be changed until all parameters are estimated.
  ParameterModification(argc, argv, fs, world_parameters.f0_length,
    world_parameters.fft_size, world_parameters.f0,
    world_parameters.spectrogram);

  //---------------------------------------------------------------------------
  // Synthesis part (2016/04/19)
  // There are three samples in speech synthesis
  // 1: Conventional synthesis
  // 2: Example of real-time synthesis
  // 3: Example of real-time synthesis (Ring buffer is efficiently used)
  //---------------------------------------------------------------------------
  char filename[100];
  // The length of the output waveform
  int y_length = static_cast<int>((world_parameters.f0_length - 1) *
    world_parameters.frame_period / 1000.0 * fs) + 1;
  double *y = new double[y_length];

  // Synthesis 1 (conventional synthesis)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis(&world_parameters, fs, y_length, y);
  sprintf(filename, "01%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  // Synthesis 2 (All frames are added at the same time)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis2(&world_parameters, fs, y_length, y);
  sprintf(filename, "02%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  // Synthesis 3 (Ring buffer is efficiently used.)
  for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  WaveformSynthesis3(&world_parameters, fs, y_length, y);
  sprintf(filename, "03%s", argv[2]);
  wavwrite(y, y_length, fs, 16, filename);

  delete[] y;
  delete[] x;
  DestroyMemory(&world_parameters);

  printf("complete.\n");
  return 0;
}
Esempio n. 3
0
/**
 * Main function
 *
 */
int main(int argc, char *argv[])
{
    if (argc != 5)
    {
        fprintf(stderr, "%s <input_f0_file> <input_spectrum_file> <input_aperiodicity_file> <output_wav_file>", argv[0]);
        return EXIT_FAILURE;
    }

    // Define a default filled structures
    WorldParameters world_parameters;
    world_parameters.fs = 22050; // FIXME: hardcoded value
    world_parameters.f0_length = filesize(argv[1]) / sizeof(double);
    world_parameters.fft_size = ((filesize(argv[2]) / (sizeof(double) * world_parameters.f0_length)) - 1) * 2; // Be careful that .sp contains only first half of the spectrum
    std::cout << "fft size = " << world_parameters.fft_size << std::endl;

    // 5.0 ms is the default value.
    // Generally, the inverse of the lowest F0 of speech is the best.
    // However, the more elapsed time is required.
    world_parameters.frame_period = DEFAULT_FRAME_PERIOD;


    //---------------------------------------------------------------------------
    // Prepare memory
    //---------------------------------------------------------------------------
    world_parameters.f0 = new double[world_parameters.f0_length];

    world_parameters.spectrogram = new double*[world_parameters.f0_length];
    for (int i=0;i<world_parameters.f0_length; i++)
        world_parameters.spectrogram[i] = new double[world_parameters.fft_size / 2 + 1];

    world_parameters.aperiodicity = new double*[world_parameters.f0_length];
    for (int i=0;i<world_parameters.f0_length; i++)
        world_parameters.aperiodicity[i] = new double[world_parameters.fft_size / 2 + 1];

    //---------------------------------------------------------------------------
    // Loading
    //---------------------------------------------------------------------------
    // F0 loading
    std::ifstream is_f0(argv[1], std::ios::binary | std::ios::in);
    if ( !is_f0.is_open() )
        return false;

    is_f0.read(reinterpret_cast<char*>(world_parameters.f0),
               std::streamsize(world_parameters.f0_length*sizeof(double)));
    // for (int i=0; i<world_parameters.f0_length; i++)
    //     std::cout << world_parameters.f0[i] << std::endl;
    is_f0.close();

    // Spectrogram loading
    std::ifstream is_spectrogram(argv[2], std::ios::binary | std::ios::in);
    if ( !is_spectrogram.is_open() )
        return false;

    for (int i=0; i<world_parameters.f0_length; i++)
    {
        is_spectrogram.read(reinterpret_cast<char*>(world_parameters.spectrogram[i]),
                            std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double)));
    }

    is_spectrogram.close();

    // Aperiodicity loading
    std::ifstream is_aperiodicity(argv[3], std::ios::binary | std::ios::in);
    if ( !is_aperiodicity.is_open() )
        return false;

    for (int i=0; i<world_parameters.f0_length; i++)
    {
        is_aperiodicity.read(reinterpret_cast<char*>(world_parameters.aperiodicity[i]),
                             std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double)));
    }

    is_aperiodicity.close();


    //---------------------------------------------------------------------------
    // Synthesis
    //---------------------------------------------------------------------------

    int y_length = static_cast<int>((world_parameters.f0_length - 1) *
                                    world_parameters.frame_period / 1000.0 * world_parameters.fs) + 1;
    double *y = new double[y_length];
    for (int i = 0; i < y_length; ++i) y[i] = 0.0;
    WaveformSynthesis(&world_parameters, y);
    wavwrite(y, y_length, world_parameters.fs, 16, argv[4]);

    //---------------------------------------------------------------------------
    // Cleaning part
    //---------------------------------------------------------------------------
    delete[] y;
    DestroyMemory(&world_parameters);

    std::cout << "complete" << std::endl;
    return EXIT_SUCCESS;
}
Esempio n. 4
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 2 && argc != 3 && argc != 4 && argc != 5) {
    printf("error\n");
    return -2;
  }

  // 2016/01/28: Important modification.
  // Memory allocation is carried out in advanse.
  // This is for compatibility with C language.
  int x_length = GetAudioLength(argv[1]);
  if (x_length <= 0) {
    if (x_length == 0)
      printf("error: File not found.\n");
    else
      printf("error: The file is not .wav format.\n");
    return -1;
  }
  double *x = (double*) malloc(sizeof(double) * (x_length));
  // wavread() must be called after GetAudioLength().
  int fs, nbit;
  wavread(argv[1], &fs, &nbit, x);
  DisplayInformation(fs, nbit, x_length);

  //---------------------------------------------------------------------------
  // Analysis part
  //---------------------------------------------------------------------------
  // 2016/02/02
  // A new struct is introduced to implement safe program.
  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;

  // F0 estimation
  F0Estimation(x, x_length, &world_parameters);

  // Spectral envelope estimation
  SpectralEnvelopeEstimation(x, x_length, &world_parameters);

  // Aperiodicity estimation by D4C
  AperiodicityEstimation(x, x_length, &world_parameters);

  // Note that F0 must not be changed until all parameters are estimated.
  ParameterModification(argc, argv, fs, world_parameters.f0_length,
    world_parameters.fft_size, world_parameters.f0,
    world_parameters.spectrogram);

  //---------------------------------------------------------------------------
  // Synthesis part
  //---------------------------------------------------------------------------
  // The length of the output waveform
  int y_length = (int)((world_parameters.f0_length - 1) *
    world_parameters.frame_period / 1000.0 * fs) + 1;
  double *y = (double*) malloc(sizeof(double) * (y_length));
  // Synthesis
  WaveformSynthesis(&world_parameters, fs, y_length, y);

  // Output
  wavwrite(y, y_length, fs, 16, argv[2]);

  if(y != NULL) {
    free(y);
    y = NULL;
  }
  if(x != NULL) {
    free(x);
    x = NULL;
  }
  DestroyMemory(&world_parameters);

  printf("complete.\n");
  return 0;
}
Esempio n. 5
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 7) {
    printf("command: synth FFT_length sampling_rate F0_file spectrogram_file aperiodicity_file output_waveform\n");
    return -2;
  }

  int fft_size = atoi(argv[1]);
  int fs = atoi(argv[2]);


  // compute n bands from fs as in d4c.cpp:325   
  int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 -
      world::kFrequencyInterval) / world::kFrequencyInterval);

  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;
  world_parameters.fft_size = fft_size;


  // find number of frames (doubles) in f0 file:  
  struct stat st;
  if (stat(argv[3], &st) == -1) {
    printf("cannot read f0\n");
    return -2;
    }
  int f0_length = (st.st_size / sizeof(double));
  world_parameters.f0_length = f0_length;

//  printf("%d\n", f0_length);
  
  world_parameters.f0 = new double[f0_length];

  FILE *fp;
  fp = fopen(argv[3], "rb");
  for (int i = 0; i < f0_length; i++) {
	fread(&world_parameters.f0[i], sizeof(double), 1, fp);
  }
  fclose(fp);

  double **coarse_aperiodicities = new double *[world_parameters.f0_length];
  world_parameters.aperiodicity = new double *[world_parameters.f0_length];
  for (int i = 0; i < world_parameters.f0_length; ++i) {
    world_parameters.aperiodicity[i] = new double[fft_size / 2 + 1];
    coarse_aperiodicities[i]  = new double[number_of_aperiodicities];
  }

  world_parameters.spectrogram = new double *[world_parameters.f0_length];
  for (int i = 0; i < world_parameters.f0_length; ++i) {
    world_parameters.spectrogram[i] = new double[fft_size / 2 + 1];
  }

  fp = fopen(argv[4], "rb");
  for (int i = 0; i < f0_length; i++) {
    for (int j = 0; j < fft_size / 2 + 1; j++) {
      fread(&world_parameters.spectrogram[i][j], sizeof(double), 1, fp);
    }
  }
  fclose(fp);

  // aper
  fp = fopen(argv[5], "rb");
  for (int i = 0; i < f0_length; i++) {
    for (int j = 0; j < number_of_aperiodicities; j++) {
      fread(&coarse_aperiodicities[i][j], sizeof(double), 1, fp);
    }
  }
  fclose(fp);  



  // convert bandaps to full aperiodic spectrum by interpolation (originally in d4c extraction):
  
  // Linear interpolation to convert the coarse aperiodicity into its
  // spectral representation.
  
  // -- for interpolating --
  double *coarse_aperiodicity = new double[number_of_aperiodicities + 2];
  coarse_aperiodicity[0] = -60.0;
  coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0;
  double *coarse_frequency_axis = new double[number_of_aperiodicities + 2];
  for (int i = 0; i <= number_of_aperiodicities; ++i)
    coarse_frequency_axis[i] =
      static_cast<double>(i) * world::kFrequencyInterval;
  coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0;

  double *frequency_axis = new double[fft_size / 2 + 1];
  for (int i = 0; i <= fft_size / 2; ++i)
    frequency_axis[i] = static_cast<double>(i) * fs / fft_size;
  // ----
  
  for (int i = 0; i < f0_length; ++i) {
    // load band ap values for this frame into  coarse_aperiodicity
    for (int k = 0; k < number_of_aperiodicities; ++k) {
        coarse_aperiodicity[k+1] = coarse_aperiodicities[i][k];
    }
    interp1(coarse_frequency_axis, coarse_aperiodicity,
      number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1,
      world_parameters.aperiodicity[i]);
    for (int j = 0; j <= fft_size / 2; ++j)
      world_parameters.aperiodicity[i][j] = pow(10.0, world_parameters.aperiodicity[i][j] / 20.0);
  }  
  
  
  //printf("%d %d\n", world_parameters.f0_length, fs);
  
  //---------------------------------------------------------------------------
  // Synthesis part
  //---------------------------------------------------------------------------
  // The length of the output waveform
  int y_length = static_cast<int>((world_parameters.f0_length - 1) *
    FRAMEPERIOD / 1000.0 * fs) + 1;
  double *y = new double[y_length];
  // Synthesis
  WaveformSynthesis(&world_parameters, fs, y_length, y);

  // Output
  wavwrite(y, y_length, fs, 16, argv[6]);

  delete[] y;
  DestroyMemory(&world_parameters);

  for (int i=0; i<f0_length; i++){
    delete[] coarse_aperiodicities[i];
  }
  delete[] coarse_aperiodicities;
  delete[] coarse_aperiodicity;
  delete[] frequency_axis;
    
  printf("complete %s.\n", argv[6]);
  return 0;
}