int main(int argc, char **argv) { float old_loss; float new_loss; float epsilon; float lambda; epsilon = .1; lambda = .00000001; build_neural_net_from_cmds(argc, argv); samples = get_samples_from_file("training_data/mnist_test.csv", 3000, 784); old_loss = calculate_loss(nn, samples); printf("%f\n", old_loss); new_loss = old_loss + 1; while ((old_loss - new_loss) * (old_loss - new_loss) > epsilon) { old_loss = new_loss; train_neural_net(nn, samples, lambda); new_loss = calculate_loss(nn, samples); printf("%f, %f\n", new_loss, calculate_percent_predicted_correctly(nn, samples)); } }
// transition between phonemes void Synthesizer::phoneme_transition() { int ch = phonemes[phoneme_idx]; int next = phonemes[phoneme_idx + 1]; //printf("generating %d : %d..\n", phoneme_idx, ch); std::stringstream filename; filename << std::setfill('0') << std::setw(2) << ch << std::setw(2) << next; std::cout << filename.str() << std::endl; samples = get_samples_from_file("untitled");//filename.str()); duration = samples.size() / Fs; }
/* TODO: write version to pick up all prominent notes */ struct note get_note_from_file(const char * const filename, double secs_to_sample) { int sample_rate; int num_channels; long sample_num_of_highest_magnitude; long samples_returned; long num_samples; double * samples; double * mono_samples; double * hannd_samples; double * fft_magnitudes; struct note invalid_note; fftw_complex * fft_samples; /* initialize as invalid note for error checking purposes */ invalid_note.semitone = UNKNOWN_SEMITONE; invalid_note.octave = INVALID_OCTAVE; invalid_note.cents = INVALID_CENTS ; if (NULL == filename) { fprintf(stderr, "filename is null\n"); return invalid_note; } if (0.0 >= secs_to_sample) { fprintf(stderr, "secs_to_sample is less than zero\n"); return invalid_note; } /* get the sample rate and number of channels in the file */ get_sound_file_metadata(filename, &sample_rate, &num_channels); if (-1 == sample_rate || -1 == num_channels) { fprintf(stderr, "could not retrieve sound file metadata; does the file exist?\n"); return invalid_note; } /* get the number of samples we'll be working with */ num_samples = secs_to_sample * sample_rate; /* * Get the samples from the file. * * If the the file is gone or we don't get the requested number of * samples back, we return an invalid note. */ samples = get_samples_from_file(filename, num_samples, &samples_returned); if (NULL == samples || num_samples != samples_returned) { fprintf(stderr, "could not access file or retrieve requested number of samples from file\n"); FREE_SAFELY(samples); return invalid_note; } /* combine all the channels into one */ mono_samples = combine_channels(samples, num_samples, num_channels); FREE_SAFELY(samples); /* apply the Hanning function to our window of samples */ if (NULL == (hannd_samples = apply_hann_function(mono_samples, num_samples))) { fprintf(stderr, "could not apply hanning function; chances are there is a bigger problem\n"); FREE_SAFELY(mono_samples); FREE_SAFELY(hannd_samples); return invalid_note; } FREE_SAFELY(mono_samples); /* get the Fast Fourier Transform of our samples */ if (NULL == (fft_samples = get_fft(hannd_samples, num_samples))) { fprintf(stderr, "could not calculate fft\n"); FREE_SAFELY(hannd_samples); FREE_SAFELY(fft_samples); return invalid_note; } FREE_SAFELY(hannd_samples); /* * The FFT output array is all complex numbers. We need to calculate * the magnitude of each output value in order to reveal the frequencies * that we care about. */ fft_magnitudes = get_fft_magnitudes(fft_samples, num_samples); fftw_free(fft_samples); fft_samples = NULL; /* get the FFT sample index that has the highest magnitude */ sample_num_of_highest_magnitude = get_index_of_maximum(fft_magnitudes, num_samples); FREE_SAFELY(fft_magnitudes); /* * Finally, get the note. * * The frequency is simply the sample number in our FFT divided by the * number of seconds that we sampled. This is because we have * num_samples samples in our FFT and secs_to_sample = num_samples / * sample_rate. It also makes sense that the denominator here would be * in seconds since hertz = seconds^-1. */ return get_exact_note(sample_num_of_highest_magnitude / secs_to_sample); }