//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return 0; } int fs, nbit, x_length; double *x = wavread(argv[1], &fs, &nbit, &x_length); if (CheckLoadedFile(x, fs, nbit, x_length) == false) { printf("error: File not found.\n"); return 0; } // Allocate memories // The number of samples for F0 int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD); double *f0 = new double[f0_length]; double *time_axis = new double[f0_length]; // FFT size for CheapTrick int fft_size = GetFFTSizeForCheapTrick(fs); double **spectrogram = new double *[f0_length]; double **aperiodicity = new double *[f0_length]; for (int i = 0; i < f0_length; ++i) { spectrogram[i] = new double[fft_size / 2 + 1]; aperiodicity[i] = new double[fft_size / 2 + 1]; } // F0 estimation F0Estimation(x, x_length, fs, f0_length, f0, time_axis); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length, spectrogram); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length, fft_size, aperiodicity); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, f0, f0_length, spectrogram); // The length of the output waveform int y_length = static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(f0, f0_length, spectrogram, aperiodicity, fft_size, FRAMEPERIOD, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); printf("complete.\n"); delete[] x; delete[] time_axis; delete[] f0; delete[] y; for (int i = 0; i < f0_length; ++i) { delete[] spectrogram[i]; delete[] aperiodicity[i]; } delete[] spectrogram; delete[] aperiodicity; return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = new double[x_length]; // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation // DIO // F0EstimationDio(x, x_length, &world_parameters); // Harvest F0EstimationHarvest(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part (2016/04/19) // There are three samples in speech synthesis // 1: Conventional synthesis // 2: Example of real-time synthesis // 3: Example of real-time synthesis (Ring buffer is efficiently used) //--------------------------------------------------------------------------- char filename[100]; // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis 1 (conventional synthesis) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis(&world_parameters, fs, y_length, y); sprintf(filename, "01%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 2 (All frames are added at the same time) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis2(&world_parameters, fs, y_length, y); sprintf(filename, "02%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); // Synthesis 3 (Ring buffer is efficiently used.) for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis3(&world_parameters, fs, y_length, y); sprintf(filename, "03%s", argv[2]); wavwrite(y, y_length, fs, 16, filename); delete[] y; delete[] x; DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
int main (int argc, char **argv) { float mfcc_result[NUMFILTERBANK-1]; FILE *fptr; float Min=0.0f,Max=0.0f; int16_t *samples = NULL; uint16_t numFrame = 0; uint16_t i,j,k; int32_t Val_RGB; uint32_t idxCoeff; int tmp; char Len; Len = strlen(argv[1]); for ( i=0; i < Len-4; i++) { FileOut[i]= argv[1][i]; FileOutTxt[i]= argv[1][i]; } FileOut[Len-4] = '.'; FileOut[Len-3] = 'b'; FileOut[Len-2] = 'm'; FileOut[Len-1] = 'p'; FileOut[Len] = '\0'; FileOutTxt[Len-4] = '.'; FileOutTxt[Len-3] = 't'; FileOutTxt[Len-2] = 'x'; FileOutTxt[Len-1] = 't'; FileOutTxt[Len] = '\0'; /* Sotarage MFCC coeffience */ fptr=fopen(FileOutTxt,"w"); PreCalcFilterBank(FilterBank,fNorm, NUMBINHALF, NUMFILTERBANK); //printf("Length: %d \n", strlen(argv[1])); wavread(argv[1], &samples); printf("No. of channels: %d\n", header->num_channels); printf("Sample rate: %d\n", header->sample_rate); printf("Bit rate: %dkbps\n", header->byte_rate*8 / 1000); printf("Bits per sample: %d\n\n", header->bps); //printf("Sample 0: %d\n", samples[0]); //printf("Sample 1: %d\n", samples[1]); // Modify the header values & samples before writing the new file wavwrite("track2.wav", samples); numFrame = header->datachunk_size/(2*2*NUMBINHALF) ; //printf("Num Frame: %d \n", numFrame ); /* BMP = (RGB_data **)malloc((NUMFILTERBANK-1)*sizeof(RGB_data*)); for (i=0; i < NUMFILTERBANK-1; i++) { BMP[i] = (RGB_data *)malloc(sizeof(RGB_data)*(2*numFrame)) ; if(BMP[i] == NULL) { fprintf(stderr, "out of memory\n"); exit (0); } } */ BMP = (RGB_data *)malloc((NUMFILTERBANK-1)*sizeof(RGB_data*)*2*numFrame); //memset(BMP, 0, sizeof(BMP)); for (i = 0; i < 2*numFrame-1; i++) { for (j=0; j< 2*NUMBINHALF;j++) { V[j].Re = (float)(samples[i*NUMBINHALF + j]); //printf(" %d", samples[i*NUMBINHALF + j]); V[j].Im = (float)0.0f; } //printf("\n"); MFCC(V, FilterBank ,fNorm, mfcc_result); //printf("MFCC:"); for(idxCoeff = 0; idxCoeff < NUMFILTERBANK -1; idxCoeff++) { fprintf(fptr,"%f ", mfcc_result[idxCoeff]); //Val_RGB = (int)(255/(63.164356+46.877232)*(mfcc_result[idxCoeff]+10)); Val_RGB = (int)(255*(mfcc_result[idxCoeff])); //Val_RGB = tmp*tmp; //Val_RGB = Val_RGB/255; if (Val_RGB < 0) Val_RGB = 0; if (Val_RGB > 255) Val_RGB = 255; BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].g = (BYTE)(Val_RGB); BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].b = (BYTE)(Val_RGB); BMP[(NUMFILTERBANK - idxCoeff-2)*2*numFrame +i].r = (BYTE)(Val_RGB); //BMP[NUMFILTERBANK - idxCoeff-1][i].b = (BYTE)((Val_RGB & 0x00FF00)>>8); //BMP[NUMFILTERBANK - idxCoeff-1][i].r = (BYTE)((Val_RGB & 0xFF0000)>>16); //printf(" %d ", Val_RGB); //if (mfcc_result[idxCoeff] < Min) Min=mfcc_result[idxCoeff]; //if (mfcc_result[idxCoeff] > Max) Max=mfcc_result[idxCoeff]; } fprintf(fptr,"\r\n"); } //printf("Min: %f Max: %f \n", Min, Max); bmp_generator(FileOut, 2*numFrame, NUMFILTERBANK -1 ,(BYTE*) (BMP)); //bmpread("record_21.bmp"); free(header); free(samples); free(BMP); fclose(fptr); exit (0); }
int main() { //============================================================================= //STEP 1 - INITIALISATION //============================================================================= //Create input and output SIGNAL structures. SIGNAL input,output; int i=0; //Initialise SIGNAL structures. memset(&input,0,sizeof(SIGNAL)); memset(&output,0,sizeof(SIGNAL)); //============================================================================= //STEP 2 - OPEN AUDIO FILE AND READ INTO MEMORY //============================================================================= //Read input signal into memory. printf("Reading %s into memory...\n",kInputSignalLocation); wavread(kInputSignalLocation, &input); //============================================================================= //STEP 3 - PREPARE THE OUTPUT SIGNAL //============================================================================= //Set up output SIGNAL structure. Fill_SIGNAL(&output, input.frames, input.samplerate, input.channels, SF_FORMAT_WAV|SF_FORMAT_PCM_32, input.sections, input.seekable, NULL); //Allocate space for the 'output.data' buffer. output.data=(float *)malloc(sizeof(float)* input.frames*input.channels); //============================================================================= //STEP 4 - PROCESSING //============================================================================= //In this example we reverse the input signal //and store the results in the output signal for(i=0;i<input.frames;i++){ output.data[i]=input.data[input.frames-i]; //If the signal is stereo then a frame comprises two samples if (input.channels==2) output.data[i+1]=input.data[input.frames-i+1]; } //============================================================================= //STEP 5 - WRITE OUTPUT TO AUDIO FILE //============================================================================= //Write output file to disk printf("\n\nWriting audio to output file %s\n",kOutputSignalLocation); wavwrite(kOutputSignalLocation, &output); //============================================================================= //STEP 6 - CLEAN UP //============================================================================= free(input.data); free(output.data); return 0; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 2 && argc != 3 && argc != 4 && argc != 5) { printf("error\n"); return -2; } // 2016/01/28: Important modification. // Memory allocation is carried out in advanse. // This is for compatibility with C language. int x_length = GetAudioLength(argv[1]); if (x_length <= 0) { if (x_length == 0) printf("error: File not found.\n"); else printf("error: The file is not .wav format.\n"); return -1; } double *x = (double*) malloc(sizeof(double) * (x_length)); // wavread() must be called after GetAudioLength(). int fs, nbit; wavread(argv[1], &fs, &nbit, x); DisplayInformation(fs, nbit, x_length); //--------------------------------------------------------------------------- // Analysis part //--------------------------------------------------------------------------- // 2016/02/02 // A new struct is introduced to implement safe program. WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; // F0 estimation F0Estimation(x, x_length, &world_parameters); // Spectral envelope estimation SpectralEnvelopeEstimation(x, x_length, &world_parameters); // Aperiodicity estimation by D4C AperiodicityEstimation(x, x_length, &world_parameters); // Note that F0 must not be changed until all parameters are estimated. ParameterModification(argc, argv, fs, world_parameters.f0_length, world_parameters.fft_size, world_parameters.f0, world_parameters.spectrogram); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = (int)((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * fs) + 1; double *y = (double*) malloc(sizeof(double) * (y_length)); // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[2]); if(y != NULL) { free(y); y = NULL; } if(x != NULL) { free(x); x = NULL; } DestroyMemory(&world_parameters); printf("complete.\n"); return 0; }
void wavwriteclose( WAV *wav ) { if ( wav->nsamples ) wavwrite( wav, wav->nsamples, wav->samples[0], wav->samples[1] ); wavclose( wav ); }
/** * Main function * */ int main(int argc, char *argv[]) { if (argc != 5) { fprintf(stderr, "%s <input_f0_file> <input_spectrum_file> <input_aperiodicity_file> <output_wav_file>", argv[0]); return EXIT_FAILURE; } // Define a default filled structures WorldParameters world_parameters; world_parameters.fs = 22050; // FIXME: hardcoded value world_parameters.f0_length = filesize(argv[1]) / sizeof(double); world_parameters.fft_size = ((filesize(argv[2]) / (sizeof(double) * world_parameters.f0_length)) - 1) * 2; // Be careful that .sp contains only first half of the spectrum std::cout << "fft size = " << world_parameters.fft_size << std::endl; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = DEFAULT_FRAME_PERIOD; //--------------------------------------------------------------------------- // Prepare memory //--------------------------------------------------------------------------- world_parameters.f0 = new double[world_parameters.f0_length]; world_parameters.spectrogram = new double*[world_parameters.f0_length]; for (int i=0;i<world_parameters.f0_length; i++) world_parameters.spectrogram[i] = new double[world_parameters.fft_size / 2 + 1]; world_parameters.aperiodicity = new double*[world_parameters.f0_length]; for (int i=0;i<world_parameters.f0_length; i++) world_parameters.aperiodicity[i] = new double[world_parameters.fft_size / 2 + 1]; //--------------------------------------------------------------------------- // Loading //--------------------------------------------------------------------------- // F0 loading std::ifstream is_f0(argv[1], std::ios::binary | std::ios::in); if ( !is_f0.is_open() ) return false; is_f0.read(reinterpret_cast<char*>(world_parameters.f0), std::streamsize(world_parameters.f0_length*sizeof(double))); // for (int i=0; i<world_parameters.f0_length; i++) // std::cout << world_parameters.f0[i] << std::endl; is_f0.close(); // Spectrogram loading std::ifstream is_spectrogram(argv[2], std::ios::binary | std::ios::in); if ( !is_spectrogram.is_open() ) return false; for (int i=0; i<world_parameters.f0_length; i++) { is_spectrogram.read(reinterpret_cast<char*>(world_parameters.spectrogram[i]), std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double))); } is_spectrogram.close(); // Aperiodicity loading std::ifstream is_aperiodicity(argv[3], std::ios::binary | std::ios::in); if ( !is_aperiodicity.is_open() ) return false; for (int i=0; i<world_parameters.f0_length; i++) { is_aperiodicity.read(reinterpret_cast<char*>(world_parameters.aperiodicity[i]), std::streamsize((world_parameters.fft_size / 2 + 1)*sizeof(double))); } is_aperiodicity.close(); //--------------------------------------------------------------------------- // Synthesis //--------------------------------------------------------------------------- int y_length = static_cast<int>((world_parameters.f0_length - 1) * world_parameters.frame_period / 1000.0 * world_parameters.fs) + 1; double *y = new double[y_length]; for (int i = 0; i < y_length; ++i) y[i] = 0.0; WaveformSynthesis(&world_parameters, y); wavwrite(y, y_length, world_parameters.fs, 16, argv[4]); //--------------------------------------------------------------------------- // Cleaning part //--------------------------------------------------------------------------- delete[] y; DestroyMemory(&world_parameters); std::cout << "complete" << std::endl; return EXIT_SUCCESS; }
//----------------------------------------------------------------------------- // Test program. // test.exe input.wav outout.wav f0 spec flag // input.wav : argv[1] Input file // output.wav : argv[2] Output file // f0 : argv[3] F0 scaling (a positive number) // spec : argv[4] Formant shift (a positive number) //----------------------------------------------------------------------------- int main(int argc, char *argv[]) { if (argc != 7) { printf("command: synth FFT_length sampling_rate F0_file spectrogram_file aperiodicity_file output_waveform\n"); return -2; } int fft_size = atoi(argv[1]); int fs = atoi(argv[2]); // compute n bands from fs as in d4c.cpp:325 int number_of_aperiodicities = static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 - world::kFrequencyInterval) / world::kFrequencyInterval); WorldParameters world_parameters = { 0 }; // You must set fs and frame_period before analysis/synthesis. world_parameters.fs = fs; // 5.0 ms is the default value. // Generally, the inverse of the lowest F0 of speech is the best. // However, the more elapsed time is required. world_parameters.frame_period = 5.0; world_parameters.fft_size = fft_size; // find number of frames (doubles) in f0 file: struct stat st; if (stat(argv[3], &st) == -1) { printf("cannot read f0\n"); return -2; } int f0_length = (st.st_size / sizeof(double)); world_parameters.f0_length = f0_length; // printf("%d\n", f0_length); world_parameters.f0 = new double[f0_length]; FILE *fp; fp = fopen(argv[3], "rb"); for (int i = 0; i < f0_length; i++) { fread(&world_parameters.f0[i], sizeof(double), 1, fp); } fclose(fp); double **coarse_aperiodicities = new double *[world_parameters.f0_length]; world_parameters.aperiodicity = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.aperiodicity[i] = new double[fft_size / 2 + 1]; coarse_aperiodicities[i] = new double[number_of_aperiodicities]; } world_parameters.spectrogram = new double *[world_parameters.f0_length]; for (int i = 0; i < world_parameters.f0_length; ++i) { world_parameters.spectrogram[i] = new double[fft_size / 2 + 1]; } fp = fopen(argv[4], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < fft_size / 2 + 1; j++) { fread(&world_parameters.spectrogram[i][j], sizeof(double), 1, fp); } } fclose(fp); // aper fp = fopen(argv[5], "rb"); for (int i = 0; i < f0_length; i++) { for (int j = 0; j < number_of_aperiodicities; j++) { fread(&coarse_aperiodicities[i][j], sizeof(double), 1, fp); } } fclose(fp); // convert bandaps to full aperiodic spectrum by interpolation (originally in d4c extraction): // Linear interpolation to convert the coarse aperiodicity into its // spectral representation. // -- for interpolating -- double *coarse_aperiodicity = new double[number_of_aperiodicities + 2]; coarse_aperiodicity[0] = -60.0; coarse_aperiodicity[number_of_aperiodicities + 1] = 0.0; double *coarse_frequency_axis = new double[number_of_aperiodicities + 2]; for (int i = 0; i <= number_of_aperiodicities; ++i) coarse_frequency_axis[i] = static_cast<double>(i) * world::kFrequencyInterval; coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0; double *frequency_axis = new double[fft_size / 2 + 1]; for (int i = 0; i <= fft_size / 2; ++i) frequency_axis[i] = static_cast<double>(i) * fs / fft_size; // ---- for (int i = 0; i < f0_length; ++i) { // load band ap values for this frame into coarse_aperiodicity for (int k = 0; k < number_of_aperiodicities; ++k) { coarse_aperiodicity[k+1] = coarse_aperiodicities[i][k]; } interp1(coarse_frequency_axis, coarse_aperiodicity, number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1, world_parameters.aperiodicity[i]); for (int j = 0; j <= fft_size / 2; ++j) world_parameters.aperiodicity[i][j] = pow(10.0, world_parameters.aperiodicity[i][j] / 20.0); } //printf("%d %d\n", world_parameters.f0_length, fs); //--------------------------------------------------------------------------- // Synthesis part //--------------------------------------------------------------------------- // The length of the output waveform int y_length = static_cast<int>((world_parameters.f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1; double *y = new double[y_length]; // Synthesis WaveformSynthesis(&world_parameters, fs, y_length, y); // Output wavwrite(y, y_length, fs, 16, argv[6]); delete[] y; DestroyMemory(&world_parameters); for (int i=0; i<f0_length; i++){ delete[] coarse_aperiodicities[i]; } delete[] coarse_aperiodicities; delete[] coarse_aperiodicity; delete[] frequency_axis; printf("complete %s.\n", argv[6]); return 0; }