//-------------------------------------------------------------- void ofApp::setup(){ ofSetFrameRate(60); //Setup the FFT FFT fft; fft.init(FFT_WINDOW_SIZE,FFT_HOP_SIZE,1,FFT::RECTANGULAR_WINDOW,true,false,DATA_TYPE_MATRIX); //Setup the classifier RandomForests forest; forest.setForestSize( 10 ); forest.setNumRandomSplits( 100 ); forest.setMaxDepth( 10 ); forest.setMinNumSamplesPerNode( 10 ); //Add the feature extraction and classifier to the pipeline pipeline.addFeatureExtractionModule( fft ); pipeline.setClassifier( forest ); trainingClassLabel = 1; record = false; processAudio = true; trainingData.setNumDimensions( 1 ); //We are only going to use the data from one microphone channel, so the dimensions are 1 trainingSample.resize( AUDIO_BUFFER_SIZE, 1 ); //We will set the training matrix to match the audio buffer size //Setup the audio card ofSoundStreamSetup(2, 1, this, AUDIO_SAMPLE_RATE, AUDIO_BUFFER_SIZE, 4); }
bool RandomForests::deepCopyFrom(const Classifier *classifier){ if( classifier == NULL ) return false; if( this->getClassifierType() == classifier->getClassifierType() ){ RandomForests *ptr = (RandomForests*)classifier; //Clear this tree this->clear(); if( ptr->getTrained() ){ //Deep copy the forest for(UINT i=0; i<ptr->forest.size(); i++){ this->forest.push_back( ptr->forest[i]->deepCopyTree() ); } } this->forestSize = ptr->forestSize; this->numRandomSplits = ptr->numRandomSplits; this->minNumSamplesPerNode = ptr->minNumSamplesPerNode; this->maxDepth = ptr->maxDepth; //Copy the base classifier variables return copyBaseVariables( classifier ); } return false; }
bool RandomForests::combineModels( const RandomForests &forest ){ if( !getTrained() ){ errorLog << "combineModels( const RandomForests &forest ) - This instance has not been trained!" << endl; return false; } if( !forest.getTrained() ){ errorLog << "combineModels( const RandomForests &forest ) - This external forest instance has not been trained!" << endl; return false; } if( this->getNumInputDimensions() != forest.getNumInputDimensions() ) { errorLog << "combineModels( const RandomForests &forest ) - The number of input dimensions of the external forest ("; errorLog << forest.getNumInputDimensions() << ") does not match the number of input dimensions of this instance ("; errorLog << this->getNumInputDimensions() << ")!" << endl; return false; } //Add the trees in the other forest to this model DecisionTreeNode *node; for(UINT i=0; i<forest.getForestSize(); i++){ node = forest.getTree(i); if( node ){ this->forest.push_back( node->deepCopy() ); forestSize++; } } return true; }
bool RandomForests::deepCopyFrom(const Classifier *classifier){ if( classifier == NULL ) return false; if( this->getClassifierType() == classifier->getClassifierType() ){ RandomForests *ptr = (RandomForests*)classifier; //Clear this tree this->clear(); if( copyBaseVariables( classifier ) ){ //Deep copy the main node if( this->decisionTreeNode != NULL ){ delete decisionTreeNode; decisionTreeNode = NULL; } this->decisionTreeNode = ptr->deepCopyDecisionTreeNode(); if( ptr->getTrained() ){ //Deep copy the forest this->forest.reserve( ptr->forest.size() ); for(size_t i=0; i<ptr->forest.size(); i++){ this->forest.push_back( ptr->forest[i]->deepCopy() ); } } this->forestSize = ptr->forestSize; this->numRandomSplits = ptr->numRandomSplits; this->minNumSamplesPerNode = ptr->minNumSamplesPerNode; this->maxDepth = ptr->maxDepth; this->removeFeaturesAtEachSpilt = ptr->removeFeaturesAtEachSpilt; this->bootstrappedDatasetWeight = ptr->bootstrappedDatasetWeight; this->trainingMode = ptr->trainingMode; return true; } errorLog << "deepCopyFrom(const Classifier *classifier) - Failed to copy base variables!" << endl; } return false; }
bool computeFeatureWeights( CommandLineParser &parser ){ infoLog << "Computing feature weights..." << endl; string resultsFilename = ""; string modelFilename = ""; bool combineWeights = false; //Get the model filename if( !parser.get("model-filename",modelFilename) ){ errorLog << "Failed to parse filename from command line! You can set the model filename using the --model." << endl; printUsage(); return false; } //Get the results filename if( !parser.get("filename",resultsFilename) ){ errorLog << "Failed to parse results filename from command line! You can set the results filename using the -f." << endl; printUsage(); return false; } //Get the results filename parser.get("combine-weights",combineWeights); //Load the model GestureRecognitionPipeline pipeline; if( !pipeline.load( modelFilename ) ){ errorLog << "Failed to load model from file: " << modelFilename << endl; printUsage(); return false; } //Make sure the pipeline contains a random forest model and that it is trained RandomForests *forest = pipeline.getClassifier< RandomForests >(); if( !forest ){ errorLog << "Model loaded, but the pipeline does not contain a RandomForests classifier!" << endl; printUsage(); return false; } if( !forest->getTrained() ){ errorLog << "Model loaded, but the RandomForests classifier is not trained!" << endl; printUsage(); return false; } //Compute the feature weights if( combineWeights ){ VectorFloat weights = forest->getFeatureWeights(); if( weights.getSize() == 0 ){ errorLog << "Failed to compute feature weights!" << endl; printUsage(); return false; } //Save the results to a file fstream file; file.open( resultsFilename.c_str(), fstream::out ); const unsigned int N = weights.getSize(); for(unsigned int i=0; i<N; i++){ file << weights[i] << endl; } file.close(); }else{ double norm = 0.0; const unsigned int K = forest->getForestSize(); const unsigned int N = forest->getNumInputDimensions(); VectorFloat tmp( N, 0.0 ); MatrixDouble weights(K,N); for(unsigned int i=0; i<K; i++){ DecisionTreeNode *tree = forest->getTree(i); tree->computeFeatureWeights( tmp ); norm = 1.0 / Util::sum( tmp ); for(unsigned int j=0; j<N; j++){ tmp[j] *= norm; weights[i][j] = tmp[j]; tmp[j] = 0; } } //Save the results to a file weights.save( resultsFilename ); } return true; }
bool combineModels( CommandLineParser &parser ){ infoLog << "Combining models..." << endl; string directoryPath = ""; string modelFilename = ""; if( !parser.get("data-dir",directoryPath) ){ errorLog << "Failed to parse data-directory from command line! You can set the data-directory using the --data-dir option." << endl; printUsage(); return false; } //Get the filename if( !parser.get("model-filename",modelFilename) ){ errorLog << "Failed to parse filename from command line! You can set the model filename using the --model." << endl; printUsage(); return false; } Vector< string > files; infoLog << "- Parsing data directory: " << directoryPath << endl; //Parse the directory to get all the csv files if( !Util::parseDirectory( directoryPath, ".grt", files ) ){ errorLog << "Failed to parse data directory!" << endl; return false; } RandomForests forest; //Used to validate the random forest type GestureRecognitionPipeline *mainPipeline = NULL; // Points to the first valid pipeline that all the models will be merged to Vector< GestureRecognitionPipeline* > pipelineBuffer; //Stores the pipeline for each file that is loaded unsigned int inputVectorSize = 0; //Set to zero to mark we haven't loaded any models yet const unsigned int numFiles = files.getSize(); bool mainPipelineSet = false; bool combineModelsSuccessful = false; pipelineBuffer.reserve( numFiles ); //Loop over the files, load them, and add valid random forest pipelines to the pipelineBuffer so they can be combined with the mainPipeline for(unsigned int i=0; i<numFiles; i++){ infoLog << "- Loading model " << files[i] << ". File " << i+1 << " of " << numFiles << endl; GestureRecognitionPipeline *pipeline = new GestureRecognitionPipeline; if( pipeline->load( files[i] ) ){ infoLog << "- Pipeline loaded. Number of input dimensions: " << pipeline->getInputVectorDimensionsSize() << endl; if( pipelineBuffer.size() == 0 ){ inputVectorSize = pipeline->getInputVectorDimensionsSize(); } if( pipeline->getInputVectorDimensionsSize() != inputVectorSize ){ warningLog << "- Pipeline " << i+1 << " input vector size does not match the size of the first pipeline!" << endl; }else{ Classifier *classifier = pipeline->getClassifier(); if( classifier ){ if( classifier->getClassifierType() == forest.getClassifierType() ){ //Validate the classifier is a random forest if( !mainPipelineSet ){ mainPipelineSet = true; mainPipeline = pipeline; }else pipelineBuffer.push_back( pipeline ); }else{ warningLog << "- Pipeline " << i+1 << " does not contain a random forest classifer! Classifier type: " << classifier->getClassifierType() << endl; } } } }else{ warningLog << "- WARNING: Failed to load model from file: " << files[i] << endl; } } if( mainPipelineSet ){ //Combine the random forest models with the main pipeline model const unsigned int numPipelines = pipelineBuffer.getSize(); RandomForests *mainForest = mainPipeline->getClassifier< RandomForests >(); for(unsigned int i=0; i<numPipelines; i++){ infoLog << "- Combing model " << i+1 << " of " << numPipelines << " with main model..." << endl; RandomForests *f = pipelineBuffer[i]->getClassifier< RandomForests >(); if( !mainForest->combineModels( *f ) ){ warningLog << "- WARNING: Failed to combine model " << i+1 << " with the main model!" << endl; } } if( mainPipeline->getTrained() ){ infoLog << "- Saving combined pipeline to file..." << endl; combineModelsSuccessful = mainPipeline->save( modelFilename ); } }else{ errorLog << "Failed to combined models, no models were loaded!" << endl; } //Cleanup the pipeline buffer for(unsigned int i=0; i<pipelineBuffer.getSize(); i++){ delete pipelineBuffer[i]; pipelineBuffer[i] = NULL; } return combineModelsSuccessful; }
bool train( CommandLineParser &parser ){ string trainDatasetFilename = ""; string modelFilename = ""; unsigned int forestSize = 0; unsigned int maxDepth = 0; unsigned int minNodeSize = 0; unsigned int numSplits = 0; bool removeFeatures = false; double bootstrapWeight = 0.0; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printUsage(); return false; } //Get the model filename parser.get("model-filename",modelFilename); //Get the forest size parser.get("forest-size",forestSize); //Get the max depth parser.get("max-depth",maxDepth); //Get the min node size parser.get("min-node-size",minNodeSize); //Get the number of random splits parser.get("num-splits",numSplits); //Get the remove features parser.get("remove-features",removeFeatures); //Get the bootstrap weight parser.get("bootstrap-weight",bootstrapWeight); //Load some training data to train the classifier ClassificationData trainingData; infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumDimensions(); Vector< ClassTracker > tracker = trainingData.getClassTracker(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num dimensions: " << N << endl; infoLog << "- Num classes: " << trainingData.getNumClasses() << endl; infoLog << "- Class stats: " << endl; for(unsigned int i=0; i<tracker.getSize(); i++){ infoLog << "- class " << tracker[i].classLabel << " number of samples: " << tracker[i].counter << endl; } //Create a new RandomForests instance RandomForests forest; //Set the decision tree node that will be used for each tree in the forest string nodeType = "cluster-node"; //TODO: make this a command line option in the future if( nodeType == "cluster-node" ){ forest.setDecisionTreeNode( DecisionTreeClusterNode() ); } if( nodeType == "threshold-node" ){ forest.setTrainingMode( Tree::BEST_RANDOM_SPLIT ); forest.setDecisionTreeNode( DecisionTreeThresholdNode() ); } //Set the number of trees in the forest forest.setForestSize( forestSize ); //Set the maximum depth of the tree forest.setMaxDepth( maxDepth ); //Set the minimum number of samples allowed per node forest.setMinNumSamplesPerNode( minNodeSize ); //Set the number of random splits used per node forest.setNumRandomSplits( numSplits ); //Set if selected features should be removed at each node forest.setRemoveFeaturesAtEachSplit( removeFeatures ); //Set the bootstrap weight forest.setBootstrappedDatasetWeight( bootstrapWeight ); //Add the classifier to a pipeline GestureRecognitionPipeline pipeline; pipeline.setClassifier( forest ); infoLog << "- Training model..." << endl; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train classifier!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Training time: " << (pipeline.getTrainingTime() * 0.001) / 60.0 << " (minutes)" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( !pipeline.save( modelFilename ) ){ warningLog << "Failed to save model to file: " << modelFilename << endl; } return true; }
int main(int argc, const char * argv[]) { //Parse the data filename from the argument list if( argc != 2 ){ cout << "Error: failed to parse data filename from command line. You should run this example with one argument pointing to the data filename!\n"; return EXIT_FAILURE; } const string filename = argv[1]; //Create a new RandomForests instance RandomForests forest; //Set the number of trees in the forest forest.setForestSize( 10 ); //Set the number of random candidate splits that will be used to choose the best splitting values //More steps will give you a better model, but will take longer to train forest.setNumRandomSplits( 100 ); //Set the maximum depth of the tree forest.setMaxDepth( 10 ); //Set the minimum number of samples allowed per node forest.setMinNumSamplesPerNode( 10 ); //Load some training data to train the classifier ClassificationData trainingData; cout << "Loading Training Data\n"; if( !trainingData.load( filename ) ){ cout << "Failed to load training data: " << filename << endl; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !forest.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Print the forest forest.print(); //Save the model to a file if( !forest.save("RandomForestsModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the model from a file if( !forest.load("RandomForestsModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Test the accuracy of the model on the test data double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); VectorDouble inputVector = testData[i].getSample(); //Perform a prediction using the classifier bool predictSuccess = forest.predict( inputVector ); if( !predictSuccess ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = forest.getPredictedClassLabel(); VectorDouble classLikelihoods = forest.getClassLikelihoods(); VectorDouble classDistances = forest.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }