void setup() { stream.setLabelsForAllDimensions({"x", "y", "z"}); useInputStream(stream); DTW dtw(false, true, null_rej); dtw.enableTrimTrainingData(true, 0.1, 75); pipeline.setClassifier(dtw); pipeline.addPostProcessingModule(ClassLabelTimeoutFilter(timeout)); usePipeline(pipeline); registerTuneable( null_rej, 0.1, 5.0, "Variability", "How different from the training data a new gesture can be and " "still be considered the same gesture. The higher the number, the " "more different it can be.", [](double new_null_rej) { pipeline.getClassifier()->setNullRejectionCoeff(new_null_rej); pipeline.getClassifier()->recomputeNullRejectionThresholds(); }); registerTuneable( timeout, 1, 3000, "Timeout", "How long (in milliseconds) to wait after recognizing a " "gesture before recognizing another one.", [](double new_timeout) { ClassLabelTimeoutFilter* filter = dynamic_cast<ClassLabelTimeoutFilter*>( pipeline.getPostProcessingModule(0)); assert(filter != nullptr); filter->setTimeoutDuration(new_timeout); }); }
int main (int argc, const char * argv[]) { //Create a new gesture recognition pipeline GestureRecognitionPipeline pipeline; //Add an ANBC module pipeline.setClassifier( ANBC() ); //Add a ClassLabelFilter as a post processing module with a minCount of 5 and a buffer size of 10 pipeline.addPostProcessingModule( ClassLabelFilter(5,10) ); //Load some training data to train and test the classifier ClassificationData trainingData; ClassificationData testData; if( !trainingData.loadDatasetFromFile("ClassLabelFilterTrainingData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } if( !testData.loadDatasetFromFile("ClassLabelFilterTestData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the classifier if( !pipeline.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Use the test dataset to demonstrate the output of the ClassLabelFilter for(UINT i=0; i<testData.getNumSamples(); i++){ VectorDouble inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label (this will be the processed class label) UINT predictedClassLabel = pipeline.getPredictedClassLabel(); //Get the unprocessed class label (i.e. the direct output of the classifier) UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel(); //Also print the results to the screen cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl; } return EXIT_SUCCESS; }
bool GRT_Recognizer::initPipeline(string trainingdatafile, int dimension) { //Initialize the training and info variables // infoText = ""; // trainingClassLabel = 1; // noOfHands = 2; //noOfTrackedHands = 0; //The input to the training data will be the R[x y z]L[x y z] from the left end right hand // so we set the number of dimensions to 6 LabelledTimeSeriesClassificationData trainingData; //trainingData.setNumDimensions(6); trainingData.loadDatasetFromFile(trainingdatafile); //Initialize the DTW classifier DTW dtw; //Turn on null rejection, this lets the classifier output the predicted class label of 0 when the likelihood of a gesture is low dtw.enableNullRejection( true); //Set the null rejection coefficient to 3, this controls the thresholds for the automatic null rejection //You can increase this value if you find that your real-time gestures are not being recognized //If you are getting too many false positives then you should decrease this value dtw.setNullRejectionCoeff(2); //Turn on the automatic data triming, this will remove any sections of none movement from the start and end of the training samples dtw.enableTrimTrainingData(true, 0.1, 90); //Offset the timeseries data by the first sample, this makes your gestures (more) invariant to the location the gesture is performed dtw.setOffsetTimeseriesUsingFirstSample(true); //Add the classifier to the pipeline (after we do this, we don't need the DTW classifier anymore) pipeline.setClassifier( dtw ); //pipeline.addPreProcessingModule(MovingAverageFilter(5,dimension)); //pipeline.addFeatureExtractionModule(FFT(16,1, dimension)); /*ClassLabelFilter myFilter = ClassLabelFilter(); myFilter.setBufferSize(6); myFilter.setBufferSize(2);*/ pipeline.addPostProcessingModule(ClassLabelChangeFilter()); pipeline.train(trainingData); return true; }
void setup() { stream.setLabelsForAllDimensions({"audio"}); pipeline.addFeatureExtractionModule( FFT(kFftWindowSize, kFftHopSize, DIM, FFT::HAMMING_WINDOW, true, false)); MFCC::Options options; options.sample_rate = kSampleRate; options.fft_size = kFftWindowSize / 2; options.start_freq = 300; options.end_freq = 3700; options.num_tri_filter = 26; options.num_cepstral_coeff = 12; options.lifter_param = 22; options.use_vad = true; options.noise_level = noise_level; pipeline.addFeatureExtractionModule(MFCC(options)); pipeline.setClassifier(SVM()); // GMM(16, true, false, 1, 100, 0.001)); // In post processing, we wait #n predicitons. If m out of n predictions are // from the same class, we declare the class as the right one. // // n = (duration * sample_rate) / frame_size // where duration = post_duration // sample_rate = kSampleRate // frame_size = kFftHopSize // m = n * post_ratio int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; pipeline.addPostProcessingModule( ClassLabelFilter(num_predictions * post_ratio, num_predictions)); auto ratio_updater = [](double new_ratio) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setMinimumCount(new_ratio * num_predictions); }; auto duration_updater = [](int new_duration) { ClassLabelFilter* filter = dynamic_cast<ClassLabelFilter*>(pipeline.getPostProcessingModule(0)); // Recalculate num_predictions as post_duration might have been changed int num_predictions = post_duration / 1000 * kSampleRate / kFftHopSize; filter->setBufferSize(num_predictions); }; auto noise_updater = [](int new_noise_level) { MFCC *mfcc = dynamic_cast<MFCC*>(pipeline.getFeatureExtractionModule(1)); mfcc->setNoiseLevel(new_noise_level); }; registerTuneable(noise_level, 0, 20, "Noise Level", "The threshold for the system to distinguish between " "ambient noise and speech/sound", noise_updater); registerTuneable(post_duration, 0, 2000, "Duration", "Time (in ms) that is considered as a whole " "for smoothing the prediction", duration_updater); registerTuneable(post_ratio, 0.0f, 1.0f, "Ratio", "The portion of time in duration that " "should be from the same class", ratio_updater); useInputStream(stream); useOutputStream(oStream); usePipeline(pipeline); useLeaveOneOutScoring(false); setGUIBufferSize(kSampleRate); }