Beispiel #1
0
void D4C(const double *x, int x_length, int fs, const double *time_axis,
    const double *f0, int f0_length, int fft_size, const D4COption *option,
    double **aperiodicity) {
  int fft_size_d4c = static_cast<int>(pow(2.0, 1.0 +
      static_cast<int>(log(4.0 * fs / world::kFloorF0 + 1) / world::kLog2)));

  ForwardRealFFT forward_real_fft = {0};
  InitializeForwardRealFFT(fft_size_d4c, &forward_real_fft);

  int number_of_aperiodicities =
    static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 -
      world::kFrequencyInterval) / world::kFrequencyInterval);
  // Since the window function is common in D4CGeneralBody(),
  // it is designed here to speed up.
  int window_length =
    static_cast<int>(world::kFrequencyInterval * fft_size_d4c / fs) * 2 + 1;
  double *window =  new double[window_length];
  NuttallWindow(window_length, window);

  double *coarse_aperiodicity = new double[number_of_aperiodicities + 2];
  coarse_aperiodicity[0] = -60.0;
  coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0;
  double *coarse_frequency_axis = new double[number_of_aperiodicities + 2];
  for (int i = 0; i <= number_of_aperiodicities; ++i)
    coarse_frequency_axis[i] =
      static_cast<double>(i) * world::kFrequencyInterval;
  coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0;

  double *frequency_axis = new double[fft_size / 2 + 1];
  for (int i = 0; i <= fft_size / 2; ++i)
    frequency_axis[i] = static_cast<double>(i) * fs / fft_size;
  for (int i = 0; i < f0_length; ++i) {
    if (f0[i] == 0) {
      for (int j = 0; j <= fft_size / 2; ++j) aperiodicity[i][j] = 0.0;
      continue;
    }
    D4CGeneralBody(x, x_length, fs, MyMaxDouble(f0[i], world::kFloorF0),
        fft_size_d4c, time_axis[i], number_of_aperiodicities, window,
        window_length, &forward_real_fft, &coarse_aperiodicity[1]);
    // Linear interpolation to convert the coarse aperiodicity into its
    // spectral representation.
    interp1(coarse_frequency_axis, coarse_aperiodicity,
        number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1,
        aperiodicity[i]);
    for (int j = 0; j <= fft_size / 2; ++j)
      aperiodicity[i][j] = pow(10.0, aperiodicity[i][j] / 20.0);
  }

  DestroyForwardRealFFT(&forward_real_fft);
  delete[] coarse_frequency_axis;
  delete[] coarse_aperiodicity;
  delete[] window;
  delete[] frequency_axis;
}
Beispiel #2
0
//-----------------------------------------------------------------------------
// Test program.
// test.exe input.wav outout.wav f0 spec flag
// input.wav  : argv[1] Input file
// output.wav : argv[2] Output file
// f0         : argv[3] F0 scaling (a positive number)
// spec       : argv[4] Formant shift (a positive number)
//-----------------------------------------------------------------------------
int main(int argc, char *argv[]) {
  if (argc != 7) {
    printf("command: synth FFT_length sampling_rate F0_file spectrogram_file aperiodicity_file output_waveform\n");
    return -2;
  }

  int fft_size = atoi(argv[1]);
  int fs = atoi(argv[2]);


  // compute n bands from fs as in d4c.cpp:325   
  int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 -
      world::kFrequencyInterval) / world::kFrequencyInterval);

  WorldParameters world_parameters = { 0 };
  // You must set fs and frame_period before analysis/synthesis.
  world_parameters.fs = fs;

  // 5.0 ms is the default value.
  // Generally, the inverse of the lowest F0 of speech is the best.
  // However, the more elapsed time is required.
  world_parameters.frame_period = 5.0;
  world_parameters.fft_size = fft_size;


  // find number of frames (doubles) in f0 file:  
  struct stat st;
  if (stat(argv[3], &st) == -1) {
    printf("cannot read f0\n");
    return -2;
    }
  int f0_length = (st.st_size / sizeof(double));
  world_parameters.f0_length = f0_length;

//  printf("%d\n", f0_length);
  
  world_parameters.f0 = new double[f0_length];

  FILE *fp;
  fp = fopen(argv[3], "rb");
  for (int i = 0; i < f0_length; i++) {
	fread(&world_parameters.f0[i], sizeof(double), 1, fp);
  }
  fclose(fp);

  double **coarse_aperiodicities = new double *[world_parameters.f0_length];
  world_parameters.aperiodicity = new double *[world_parameters.f0_length];
  for (int i = 0; i < world_parameters.f0_length; ++i) {
    world_parameters.aperiodicity[i] = new double[fft_size / 2 + 1];
    coarse_aperiodicities[i]  = new double[number_of_aperiodicities];
  }

  world_parameters.spectrogram = new double *[world_parameters.f0_length];
  for (int i = 0; i < world_parameters.f0_length; ++i) {
    world_parameters.spectrogram[i] = new double[fft_size / 2 + 1];
  }

  fp = fopen(argv[4], "rb");
  for (int i = 0; i < f0_length; i++) {
    for (int j = 0; j < fft_size / 2 + 1; j++) {
      fread(&world_parameters.spectrogram[i][j], sizeof(double), 1, fp);
    }
  }
  fclose(fp);

  // aper
  fp = fopen(argv[5], "rb");
  for (int i = 0; i < f0_length; i++) {
    for (int j = 0; j < number_of_aperiodicities; j++) {
      fread(&coarse_aperiodicities[i][j], sizeof(double), 1, fp);
    }
  }
  fclose(fp);  



  // convert bandaps to full aperiodic spectrum by interpolation (originally in d4c extraction):
  
  // Linear interpolation to convert the coarse aperiodicity into its
  // spectral representation.
  
  // -- for interpolating --
  double *coarse_aperiodicity = new double[number_of_aperiodicities + 2];
  coarse_aperiodicity[0] = -60.0;
  coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0;
  double *coarse_frequency_axis = new double[number_of_aperiodicities + 2];
  for (int i = 0; i <= number_of_aperiodicities; ++i)
    coarse_frequency_axis[i] =
      static_cast<double>(i) * world::kFrequencyInterval;
  coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0;

  double *frequency_axis = new double[fft_size / 2 + 1];
  for (int i = 0; i <= fft_size / 2; ++i)
    frequency_axis[i] = static_cast<double>(i) * fs / fft_size;
  // ----
  
  for (int i = 0; i < f0_length; ++i) {
    // load band ap values for this frame into  coarse_aperiodicity
    for (int k = 0; k < number_of_aperiodicities; ++k) {
        coarse_aperiodicity[k+1] = coarse_aperiodicities[i][k];
    }
    interp1(coarse_frequency_axis, coarse_aperiodicity,
      number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1,
      world_parameters.aperiodicity[i]);
    for (int j = 0; j <= fft_size / 2; ++j)
      world_parameters.aperiodicity[i][j] = pow(10.0, world_parameters.aperiodicity[i][j] / 20.0);
  }  
  
  
  //printf("%d %d\n", world_parameters.f0_length, fs);
  
  //---------------------------------------------------------------------------
  // Synthesis part
  //---------------------------------------------------------------------------
  // The length of the output waveform
  int y_length = static_cast<int>((world_parameters.f0_length - 1) *
    FRAMEPERIOD / 1000.0 * fs) + 1;
  double *y = new double[y_length];
  // Synthesis
  WaveformSynthesis(&world_parameters, fs, y_length, y);

  // Output
  wavwrite(y, y_length, fs, 16, argv[6]);

  delete[] y;
  DestroyMemory(&world_parameters);

  for (int i=0; i<f0_length; i++){
    delete[] coarse_aperiodicities[i];
  }
  delete[] coarse_aperiodicities;
  delete[] coarse_aperiodicity;
  delete[] frequency_axis;
    
  printf("complete %s.\n", argv[6]);
  return 0;
}