int main (int argc, const char * argv[]) { //Create a new gesture recognition pipeline GestureRecognitionPipeline pipeline; //Add an ANBC module pipeline.setClassifier( ANBC() ); //Add a ClassLabelChangeFilter as a post processing module pipeline.addPostProcessingModule( ClassLabelChangeFilter() ); //Load some training data to train and test the classifier ClassificationData trainingData; ClassificationData testData; if( !trainingData.load("ClassLabelChangeFilterTrainingData.grt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } if( !testData.load("ClassLabelChangeFilterTestData.grt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the classifier if( !pipeline.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Use the test dataset to demonstrate the output of the ClassLabelChangeFilter for(UINT i=0; i<testData.getNumSamples(); i++){ VectorFloat inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label (this will be the processed class label) UINT predictedClassLabel = pipeline.getPredictedClassLabel(); //Get the unprocessed class label (i.e. the direct output of the classifier) UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel(); //Also print the results to the screen cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl; } return EXIT_SUCCESS; }
// Tests the learning algorithm on a basic dataset TEST(BAG, TrainBasicDataset) { BAG bag; //Check the module is not trained EXPECT_TRUE( !bag.getTrained() ); //Generate a basic dataset const UINT numSamples = 10000; const UINT numClasses = 10; const UINT numDimensions = 100; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Add an adaptive naive bayes classifier to the BAG ensemble bag.addClassifierToEnsemble( ANBC() ); //Add a MinDist classifier to the BAG ensemble, using two clusters MinDist min_dist_two_clusters; min_dist_two_clusters.setNumClusters(2); bag.addClassifierToEnsemble( min_dist_two_clusters ); //Add a MinDist classifier to the BAG ensemble, using five clusters MinDist min_dist_five_clusters; min_dist_five_clusters.setNumClusters(5); bag.addClassifierToEnsemble( min_dist_five_clusters ); //Train the classifier EXPECT_TRUE( bag.train( trainingData ) ); EXPECT_TRUE( bag.getTrained() ); EXPECT_TRUE( bag.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } EXPECT_TRUE( bag.save( "bag_model.grt" ) ); bag.clear(); EXPECT_TRUE( !bag.getTrained() ); EXPECT_TRUE( bag.load( "bag_model.grt" ) ); EXPECT_TRUE( bag.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } }
int main (int argc, const char * argv[]) { //Load the example data ClassificationData data; if( !data.load("WiiAccShakeData.grt") ){ cout << "ERROR: Failed to load data from file!\n"; return EXIT_FAILURE; } //The variables used to initialize the MovementIndex feature extraction UINT windowSize = 10; UINT numDimensions = data.getNumDimensions(); //Create a new instance of the MovementIndex feature extraction MovementIndex movementIndex(windowSize,numDimensions); //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file for(UINT i=0; i<data.getNumSamples(); i++){ //Compute the features using this new sample movementIndex.computeFeatures( data[i].getSample() ); //Write the data cout << "InputVector: "; for(UINT j=0; j<data.getNumDimensions(); j++){ cout << data[i].getSample()[j] << "\t"; } //Get the latest feature vector VectorFloat featureVector = movementIndex.getFeatureVector(); //Write the features cout << "FeatureVector: "; for(UINT j=0; j<featureVector.size(); j++){ cout << featureVector[j]; if( j != featureVector.size()-1 ) cout << "\t"; } cout << endl; } //Save the MovementIndex settings to a file movementIndex.save("MovementIndexSettings.grt"); //You can then load the settings again if you need them movementIndex.load("MovementIndexSettings.grt"); return EXIT_SUCCESS; }
// Tests the learning algorithm on a basic dataset TEST(KNN, TrainBasicDataset) { KNN knn; //Check the module is not trained EXPECT_TRUE( !knn.getTrained() ); //Generate a basic dataset const UINT numSamples = 1000; const UINT numClasses = 10; const UINT numDimensions = 10; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Train the classifier EXPECT_TRUE( knn.train( trainingData ) ); EXPECT_TRUE( knn.getTrained() ); EXPECT_TRUE( knn.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } EXPECT_TRUE( knn.save( "knn_model.grt" ) ); knn.clear(); EXPECT_TRUE( !knn.getTrained() ); EXPECT_TRUE( knn.load( "knn_model.grt" ) ); EXPECT_TRUE( knn.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } }
int main (int argc, const char * argv[]) { //Create a new KMeans instance KMeans kmeans; kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-10 ); kmeans.setMinNumEpochs( 10 ); kmeans.setMaxNumEpochs( 10000 ); //There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for //These are: //- with labelled training data (in the ClassificationData format) //- with unlablled training data (in the UnlabelledData format) //- with unlabelled training data (in a simple MatrixDouble format) //This example shows you how to train the algorithm with ClassificationData //Load some training data to train the KMeans algorithm ClassificationData trainingData; if( !trainingData.load("LabelledClusterData.csv") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset if( !kmeans.train( trainingData ) ){ cout << "Failed to train model!\n"; return EXIT_FAILURE; } //Get the K clusters from the KMeans instance and print them cout << "\nClusters:\n"; MatrixFloat clusters = kmeans.getClusters(); for(unsigned int k=0; k<clusters.getNumRows(); k++){ for(unsigned int n=0; n<clusters.getNumCols(); n++){ cout << clusters[k][n] << "\t"; }cout << endl; } return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Create a new AdaBoost instance AdaBoost adaBoost; //Set the weak classifier you want to use adaBoost.setWeakClassifier( DecisionStump() ); //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.load("AdaBoostTrainingData.grt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !adaBoost.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the model to a file if( !adaBoost.save("AdaBoostModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the model from a file if( !adaBoost.load("AdaBoostModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the AdaBoost model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); //Perform a prediction using the classifier if( !adaBoost.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = adaBoost.getPredictedClassLabel(); double maximumLikelhood = adaBoost.getMaximumLikelihood(); vector< double > classLikelihoods = adaBoost.getClassLikelihoods(); vector< double > classDistances = adaBoost.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel; cout << " PredictedClassLabel: " << predictedClassLabel << " Likelihood: " << maximumLikelhood; cout << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Create a new Softmax instance Softmax softmax; //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.load("SoftmaxTrainingData.grt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !softmax.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the Softmax model to a file if( !softmax.save("SoftmaxModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the Softmax model from a file if( !softmax.load("SoftmaxModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the softmax model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); //Perform a prediction using the classifier if( !softmax.predict( inputVector ) ){ cout << "Failed to perform prediction for test sample: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = softmax.getPredictedClassLabel(); vector< double > classLikelihoods = softmax.getClassLikelihoods(); vector< double > classDistances = softmax.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Parse the data filename from the argument list if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //Create a new Softmax instance Softmax softmax; //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.load( filename ) ){ cout << "Failed to load training data: " << filename << endl; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.split( 80 ); //Train the classifier if( !softmax.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the Softmax model to a file if( !softmax.save("SoftmaxModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the Softmax model from a file if( !softmax.load("SoftmaxModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the softmax model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); VectorFloat inputVector = testData[i].getSample(); //Perform a prediction using the classifier if( !softmax.predict( inputVector ) ){ cout << "Failed to perform prediction for test sample: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = softmax.getPredictedClassLabel(); VectorFloat classLikelihoods = softmax.getClassLikelihoods(); VectorFloat classDistances = softmax.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
bool train( CommandLineParser &parser ){ string trainDatasetFilename = ""; string modelFilename = ""; unsigned int forestSize = 0; unsigned int maxDepth = 0; unsigned int minNodeSize = 0; unsigned int numSplits = 0; bool removeFeatures = false; double bootstrapWeight = 0.0; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printUsage(); return false; } //Get the model filename parser.get("model-filename",modelFilename); //Get the forest size parser.get("forest-size",forestSize); //Get the max depth parser.get("max-depth",maxDepth); //Get the min node size parser.get("min-node-size",minNodeSize); //Get the number of random splits parser.get("num-splits",numSplits); //Get the remove features parser.get("remove-features",removeFeatures); //Get the bootstrap weight parser.get("bootstrap-weight",bootstrapWeight); //Load some training data to train the classifier ClassificationData trainingData; infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumDimensions(); Vector< ClassTracker > tracker = trainingData.getClassTracker(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num dimensions: " << N << endl; infoLog << "- Num classes: " << trainingData.getNumClasses() << endl; infoLog << "- Class stats: " << endl; for(unsigned int i=0; i<tracker.getSize(); i++){ infoLog << "- class " << tracker[i].classLabel << " number of samples: " << tracker[i].counter << endl; } //Create a new RandomForests instance RandomForests forest; //Set the decision tree node that will be used for each tree in the forest string nodeType = "cluster-node"; //TODO: make this a command line option in the future if( nodeType == "cluster-node" ){ forest.setDecisionTreeNode( DecisionTreeClusterNode() ); } if( nodeType == "threshold-node" ){ forest.setTrainingMode( Tree::BEST_RANDOM_SPLIT ); forest.setDecisionTreeNode( DecisionTreeThresholdNode() ); } //Set the number of trees in the forest forest.setForestSize( forestSize ); //Set the maximum depth of the tree forest.setMaxDepth( maxDepth ); //Set the minimum number of samples allowed per node forest.setMinNumSamplesPerNode( minNodeSize ); //Set the number of random splits used per node forest.setNumRandomSplits( numSplits ); //Set if selected features should be removed at each node forest.setRemoveFeaturesAtEachSplit( removeFeatures ); //Set the bootstrap weight forest.setBootstrappedDatasetWeight( bootstrapWeight ); //Add the classifier to a pipeline GestureRecognitionPipeline pipeline; pipeline.setClassifier( forest ); infoLog << "- Training model..." << endl; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train classifier!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Training time: " << (pipeline.getTrainingTime() * 0.001) / 60.0 << " (minutes)" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( !pipeline.save( modelFilename ) ){ warningLog << "Failed to save model to file: " << modelFilename << endl; } return true; }
int main (int argc, const char * argv[]) { //Parse the data filename from the argument list, you should pass in the data path to the iris data set in the GRT data folder if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //We are going to use the Iris dataset, you can find more about the orginal dataset at: http://en.wikipedia.org/wiki/Iris_flower_data_set //Create a new instance of ClassificationData to hold the training data ClassificationData trainingData; //Load the training dataset from a file, the file should be in the same directory as this program if( !trainingData.load( filename ) ){ cout << "Failed to load Iris data from file!\n"; return EXIT_FAILURE; } //Print some basic stats about the dataset we have loaded trainingData.printStats(); //Partition the training dataset into a training dataset and test dataset //We will use 60% of the data to train the algorithm and 40% of the data to test it //The true parameter flags that we want to use stratified sampling, which means there //should be an equal class distribution between the training and test datasets ClassificationData testData = trainingData.split( 60, true ); //Setup the gesture recognition pipeline GestureRecognitionPipeline pipeline; //Add a KNN classification algorithm as the main classifier with a K value of 10 pipeline << KNN(10); //Train the KNN algorithm using the training dataset if( !pipeline.train( trainingData ) ){ cout << "Failed to train the pipeline!\n"; return EXIT_FAILURE; } //Test the KNN model using the test dataset if( !pipeline.test( testData ) ){ cout << "Failed to test the pipeline!\n"; return EXIT_FAILURE; } //Print some metrics about how successful the classification was //Print the accuracy cout << "The classification accuracy was: " << pipeline.getTestAccuracy() << "%\n" << endl; //Print the precision for each class for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){ UINT classLabel = pipeline.getClassLabels()[k]; double classPrecision = pipeline.getTestPrecision( classLabel ); cout << "The precision for class " << classLabel << " was " << classPrecision << endl; } cout << endl; //Print the recall for each class for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){ UINT classLabel = pipeline.getClassLabels()[k]; double classRecall = pipeline.getTestRecall( classLabel ); cout << "The recall for class " << classLabel << " was " << classRecall << endl; } cout << endl; //Print the confusion matrix MatrixFloat confusionMatrix = pipeline.getTestConfusionMatrix(); cout << "Confusion Matrix: \n"; for(UINT i=0; i<confusionMatrix.getNumRows(); i++){ for(UINT j=0; j<confusionMatrix.getNumCols(); j++){ cout << confusionMatrix[i][j] << "\t"; } cout << endl; } cout << endl; return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Parse the data filename from the argument list if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //Create a new KNN classifier with a K value of 10 KNN knn(10); knn.setNullRejectionCoeff( 10 ); knn.enableScaling( true ); knn.enableNullRejection( true ); //Train the classifier with some training data ClassificationData trainingData; if( !trainingData.load( filename ) ){ cout << "Failed to load training data: " << filename << endl; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !knn.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the knn model to a file if( !knn.save("KNNModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the knn model from a file if( !knn.load("KNNModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the KNN model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); VectorFloat inputVector = testData[i].getSample(); //Perform a prediction using the classifier bool predictSuccess = knn.predict( inputVector ); if( !predictSuccess ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = knn.getPredictedClassLabel(); VectorFloat classLikelihoods = knn.getClassLikelihoods(); VectorFloat classDistances = knn.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
bool test( CommandLineParser &parser ){ infoLog << "Testing model..." << endl; string datasetFilename = ""; string modelFilename = ""; string resultsFilename = ""; //Get the model filename if( !parser.get("model-filename",modelFilename) ){ errorLog << "Failed to parse model filename from command line! You can set the model filename using the -m." << endl; printUsage(); return false; } //Get the filename if( !parser.get("dataset-filename",datasetFilename) ){ errorLog << "Failed to parse dataset filename from command line! You can set the dataset filename using the -f." << endl; printUsage(); return false; } //Get the model filename parser.get("results-filename",resultsFilename,string("results.txt")); //Load the pipeline from a file GestureRecognitionPipeline pipeline; infoLog << "- Loading model..." << endl; if( !pipeline.load( modelFilename ) ){ errorLog << "Failed to load model from file: " << modelFilename << endl; printUsage(); return false; } infoLog << "- Model loaded!" << endl; //Load the data to test the classifier ClassificationData data; infoLog << "- Loading Training Data..." << endl; if( !data.load( datasetFilename ) ){ errorLog << "Failed to load data!\n"; return false; } const unsigned int N = data.getNumDimensions(); infoLog << "- Num training samples: " << data.getNumSamples() << endl; infoLog << "- Num dimensions: " << N << endl; infoLog << "- Num classes: " << data.getNumClasses() << endl; //Test the classifier if( !pipeline.test( data ) ){ errorLog << "Failed to test pipeline!" << endl; return false; } infoLog << "- Test complete in " << pipeline.getTestTime()/1000.0 << " seconds with and accuracy of: " << pipeline.getTestAccuracy() << endl; return saveResults( pipeline, resultsFilename ); }
int main (int argc, const char * argv[]) { //Create a new instance of the ClassificationData ClassificationData trainingData; //Set the dimensionality of the data (you need to do this before you can add any samples) trainingData.setNumDimensions( 3 ); //You can also give the dataset a name (the name should have no spaces) trainingData.setDatasetName("DummyData"); //You can also add some info text about the data trainingData.setInfoText("This data contains some dummy data"); //Here you would grab some data from your sensor and label it with the corresponding gesture it belongs to UINT gestureLabel = 1; VectorFloat sample(3); //For now we will just add some random data Random random; for(UINT i=0; i<100; i++){ sample[0] = random.getRandomNumberUniform(-1.0,1.0); sample[1] = random.getRandomNumberUniform(-1.0,1.0); sample[2] = random.getRandomNumberUniform(-1.0,1.0); //Add the sample to the training data trainingData.addSample( gestureLabel, sample ); } //After recording your training data you can then save it to a file if( !trainingData.save( "TrainingData.grt" ) ){ cout << "ERROR: Failed to save dataset to file!\n"; return EXIT_FAILURE; } //This can then be loaded later if( !trainingData.load( "TrainingData.grt" ) ){ cout << "ERROR: Failed to load dataset from file!\n"; return EXIT_FAILURE; } //You can also save and load the training data to a CSV file //Each row will contain a sample, with the first column containing the class label and the remaining columns containing the data if( !trainingData.save( "TrainingData.csv" ) ){ cout << "ERROR: Failed to save dataset to csv file!\n"; return EXIT_FAILURE; } //The data structure will automatically detect the csv extension and parse the file accordingly if( !trainingData.load( "TrainingData.csv" ) ){ cout << "ERROR: Failed to load dataset from csv file!\n"; return EXIT_FAILURE; } //This is how you can get some stats from the training data string datasetName = trainingData.getDatasetName(); string infoText = trainingData.getInfoText(); UINT numSamples = trainingData.getNumSamples(); UINT numDimensions = trainingData.getNumDimensions(); UINT numClasses = trainingData.getNumClasses(); cout << "Dataset Name: " << datasetName << endl; cout << "InfoText: " << infoText << endl; cout << "NumberOfSamples: " << numSamples << endl; cout << "NumberOfDimensions: " << numDimensions << endl; cout << "NumberOfClasses: " << numClasses << endl; //You can also get the minimum and maximum ranges of the data Vector< MinMax > ranges = trainingData.getRanges(); cout << "The ranges of the dataset are: \n"; for(UINT j=0; j<ranges.size(); j++){ cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl; } //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset ClassificationData testData = trainingData.partition( 80 ); //If you have multiple datasets that you want to merge together then use the merge function if( !trainingData.merge( testData ) ){ cout << "ERROR: Failed to save merge datasets!\n"; return EXIT_FAILURE; } //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used if( !trainingData.spiltDataIntoKFolds( 10, true ) ){ cout << "ERROR: Failed to spiltDataIntoKFolds!\n"; return EXIT_FAILURE; } //After you have called the spilt function you can then get the training and test sets for each fold for(UINT foldIndex=0; foldIndex<10; foldIndex++){ ClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex ); ClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex ); } //If need you can clear any training data that you have recorded trainingData.clear(); return EXIT_SUCCESS; }
bool train( CommandLineParser &parser ){ infoLog << "Training regression model..." << endl; string trainDatasetFilename = ""; string modelFilename = ""; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printHelp(); return false; } //Get the model filename parser.get("model-filename",modelFilename); //Load the training data to train the model ClassificationData trainingData; infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num input dimensions: " << N << endl; infoLog << "- Num classes: " << K << endl; float learningRate = 0; float minChange = 0; unsigned int maxEpoch = 0; unsigned int batchSize = 0; parser.get( "learning-rate", learningRate ); parser.get( "min-change", minChange ); parser.get( "max-epoch", maxEpoch ); parser.get( "batch-size", batchSize ); infoLog << "Softmax settings: learning-rate: " << learningRate << " min-change: " << minChange << " max-epoch: " << maxEpoch << " batch-size: " << batchSize << endl; //Create a new softmax instance bool enableScaling = true; Softmax classifier(enableScaling,learningRate,minChange,maxEpoch,batchSize); //Create a new pipeline that will hold the classifier GestureRecognitionPipeline pipeline; //Add the classifier to the pipeline pipeline << classifier; infoLog << "- Training model...\n"; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train model!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( pipeline.save( modelFilename ) ){ infoLog << "- Model saved." << endl; }else warningLog << "Failed to save model to file: " << modelFilename << endl; infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl; string logFilename = ""; if( parser.get( "log-filename", logFilename ) && logFilename.length() > 0 ){ infoLog << "Writing training log to: " << logFilename << endl; fstream logFile( logFilename.c_str(), fstream::out ); if( !logFile.is_open() ){ errorLog << "Failed to open training log file: " << logFilename << endl; return false; } Vector< TrainingResult > trainingResults = pipeline.getTrainingResults(); for(UINT i=0; i<trainingResults.getSize(); i++){ logFile << trainingResults[i].getTrainingIteration() << "\t" << trainingResults[i].getAccuracy() << endl; } logFile.close(); } return true; }
int main(int argc, const char * argv[]) { //Parse the data filename from the argument list if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //Create a new DecisionTree instance DecisionTree dTree; //Set the node that the DecisionTree will use - different nodes may result in different decision boundaries //and some nodes may provide better accuracy than others on specific classification tasks //The current node options are: //- DecisionTreeClusterNode //- DecisionTreeThresholdNode dTree.setDecisionTreeNode( DecisionTreeClusterNode() ); //Set the number of steps that will be used to choose the best splitting values //More steps will give you a better model, but will take longer to train dTree.setNumSplittingSteps( 1000 ); //Set the maximum depth of the tree dTree.setMaxDepth( 10 ); //Set the minimum number of samples allowed per node dTree.setMinNumSamplesPerNode( 10 ); //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.load( filename ) ){ cout << "Failed to load training data: " << filename << endl; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.split( 80 ); //Train the classifier if( !dTree.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Print the tree dTree.print(); //Save the model to a file if( !dTree.save("DecisionTreeModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the model from a file if( !dTree.load("DecisionTreeModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Test the accuracy of the model on the test data double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); VectorDouble inputVector = testData[i].getSample(); //Perform a prediction using the classifier bool predictSuccess = dTree.predict( inputVector ); if( !predictSuccess ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = dTree.getPredictedClassLabel(); VectorDouble classLikelihoods = dTree.getClassLikelihoods(); VectorDouble classDistances = dTree.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { if( argc < 3 ){ errorLog << "Not enough input arguments!" << endl; printUsage(); return EXIT_FAILURE; } const string inputDirectory = argv[1]; const string outputFilename = argv[2]; //Parse the data directory for files vector< string > filenames; infoLog << "- Parsing data directory: " << inputDirectory << endl; if( !Util::parseDirectory( inputDirectory, ".csv", filenames ) ){ errorLog << "Failed to parse input directory: " << inputDirectory << endl; return EXIT_FAILURE; } if( filenames.size() == 0 ){ errorLog << "Failed to find any files in the input directory: " << inputDirectory << endl; return EXIT_FAILURE; } ClassificationData data; unsigned int numFiles = (unsigned int)filenames.size(); bool dataLoaded = false; for(unsigned int i=0; i<numFiles; i++){ //Load the data infoLog << "- Loading data " << i+1 << " of " << numFiles << endl; ClassificationData tmp; if( tmp.load( filenames[i] ) ){ if( i==0 ){ data.setNumDimensions( tmp.getNumDimensions() ); } dataLoaded = true; infoLog << "- Data loaded. Number of samples: " << tmp.getNumSamples() << endl; data.merge( tmp ); }else{ warningLog << "- Failed to load data!" << endl; } } if( dataLoaded ){ infoLog << "- Merged data to generate new dataset with " << data.getNumSamples() << " samples" << endl; //Save the new datasets infoLog << "- Saving main dataset to file: " << outputFilename << endl; if( !data.save( outputFilename ) ){ errorLog << "Failed to save output data: " << outputFilename << endl; return EXIT_FAILURE; } }else{ warningLog << "- Failed to load any data!" << endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main(int argc, const char * argv[]) { //Parse the data filename from the argument list if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //Create a new RandomForests instance RandomForests forest; //Set the number of trees in the forest forest.setForestSize( 10 ); //Set the number of random candidate splits that will be used to choose the best splitting values //More steps will give you a better model, but will take longer to train forest.setNumRandomSplits( 100 ); //Set the maximum depth of the tree forest.setMaxDepth( 10 ); //Set the minimum number of samples allowed per node forest.setMinNumSamplesPerNode( 10 ); //Load some training data to train the classifier ClassificationData trainingData; cout << "Loading Training Data\n"; if( !trainingData.load( filename ) ){ cout << "Failed to load training data: " << filename << endl; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !forest.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Print the forest forest.print(); //Save the model to a file if( !forest.save("RandomForestsModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the model from a file if( !forest.load("RandomForestsModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Test the accuracy of the model on the test data double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); VectorDouble inputVector = testData[i].getSample(); //Perform a prediction using the classifier bool predictSuccess = forest.predict( inputVector ); if( !predictSuccess ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = forest.getPredictedClassLabel(); VectorDouble classLikelihoods = forest.getClassLikelihoods(); VectorDouble classDistances = forest.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }