void setup() { stream.setLabelsForAllDimensions({"x", "y", "z"}); useInputStream(stream); DTW dtw(false, true, null_rej); dtw.enableTrimTrainingData(true, 0.1, 75); pipeline.setClassifier(dtw); pipeline.addPostProcessingModule(ClassLabelTimeoutFilter(timeout)); usePipeline(pipeline); registerTuneable( null_rej, 0.1, 5.0, "Variability", "How different from the training data a new gesture can be and " "still be considered the same gesture. The higher the number, the " "more different it can be.", [](double new_null_rej) { pipeline.getClassifier()->setNullRejectionCoeff(new_null_rej); pipeline.getClassifier()->recomputeNullRejectionThresholds(); }); registerTuneable( timeout, 1, 3000, "Timeout", "How long (in milliseconds) to wait after recognizing a " "gesture before recognizing another one.", [](double new_timeout) { ClassLabelTimeoutFilter* filter = dynamic_cast<ClassLabelTimeoutFilter*>( pipeline.getPostProcessingModule(0)); assert(filter != nullptr); filter->setTimeoutDuration(new_timeout); }); }
void setup() { stream.setLabelsForAllDimensions({"audio"}); pipeline.addFeatureExtractionModule( FFT(kFftWindowSize, kFftHopSize, DIM, FFT::HAMMING_WINDOW, true, false)); MFCC::Options options; options.sample_rate = kSampleRate; options.fft_size = kFftWindowSize / 2; options.start_freq = 300; options.end_freq = 3700; options.num_tri_filter = 26; options.num_cepstral_coeff = 12; options.lifter_param = 22; options.use_vad = true; options.noise_level = noise_level; pipeline.addFeatureExtractionModule(MFCC(options)); pipeline.setClassifier(SVM()); // GMM(16, true, false, 1, 100, 0.001)); // In post processing, we wait #n predicitons. If m out of n predictions are // from the same class, we declare the class as the right one. // // n = (duration * sample_rate) / frame_size // where duration = post_duration // sample_rate = kSampleRate // frame_size = kFftHopSize // m = n * post_ratio int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; pipeline.addPostProcessingModule( ClassLabelFilter(num_predictions * post_ratio, num_predictions)); auto ratio_updater = [](double new_ratio) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setMinimumCount(new_ratio * num_predictions); }; auto duration_updater = [](int new_duration) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setBufferSize(num_predictions); }; auto noise_updater = [](int new_noise_level) { MFCC *mfcc = dynamic_cast<MFCC*>(pipeline.getFeatureExtractionModule(1)); mfcc->setNoiseLevel(new_noise_level); }; registerTuneable(noise_level, 0, 20, "Noise Level", "The threshold for the system to distinguish between " "ambient noise and speech/sound", noise_updater); registerTuneable(post_duration, 0, 2000, "Duration", "Time (in ms) that is considered as a whole " "for smoothing the prediction", duration_updater); registerTuneable(post_ratio, 0.0f, 1.0f, "Ratio", "The portion of time in duration that " "should be from the same class", ratio_updater); useInputStream(stream); useOutputStream(oStream); usePipeline(pipeline); useLeaveOneOutScoring(false); setGUIBufferSize(kSampleRate); }