bool ClassificationData::merge(const ClassificationData &labelledData){ if( labelledData.getNumDimensions() != numDimensions ){ errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl; return false; } //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); //Reserve the memory reserve( getNumSamples() + labelledData.getNumSamples() ); //Add the data from the labelledData to this instance for(UINT i=0; i<labelledData.getNumSamples(); i++){ addSample(labelledData[i].getClassLabel(), labelledData[i].getSample()); } //Set the class names from the dataset vector< ClassTracker > classTracker = labelledData.getClassTracker(); for(UINT i=0; i<classTracker.size(); i++){ setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel); } //Sort the class labels sortClassLabels(); return true; }
// Tests the learning algorithm on a basic dataset TEST(BAG, TrainBasicDataset) { BAG bag; //Check the module is not trained EXPECT_TRUE( !bag.getTrained() ); //Generate a basic dataset const UINT numSamples = 10000; const UINT numClasses = 10; const UINT numDimensions = 100; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Add an adaptive naive bayes classifier to the BAG ensemble bag.addClassifierToEnsemble( ANBC() ); //Add a MinDist classifier to the BAG ensemble, using two clusters MinDist min_dist_two_clusters; min_dist_two_clusters.setNumClusters(2); bag.addClassifierToEnsemble( min_dist_two_clusters ); //Add a MinDist classifier to the BAG ensemble, using five clusters MinDist min_dist_five_clusters; min_dist_five_clusters.setNumClusters(5); bag.addClassifierToEnsemble( min_dist_five_clusters ); //Train the classifier EXPECT_TRUE( bag.train( trainingData ) ); EXPECT_TRUE( bag.getTrained() ); EXPECT_TRUE( bag.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } EXPECT_TRUE( bag.save( "bag_model.grt" ) ); bag.clear(); EXPECT_TRUE( !bag.getTrained() ); EXPECT_TRUE( bag.load( "bag_model.grt" ) ); EXPECT_TRUE( bag.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } }
bool SVM::convertClassificationDataToLIBSVMFormat(ClassificationData &trainingData){ //clear any previous problems deleteProblemSet(); const UINT numTrainingExamples = trainingData.getNumSamples(); numInputDimensions = trainingData.getNumDimensions(); //Compute the ranges encase the data should be scaled ranges = trainingData.getRanges(); //Init the memory prob.l = numTrainingExamples; prob.x = new svm_node*[numTrainingExamples]; prob.y = new double[numTrainingExamples]; problemSet = true; for(UINT i=0; i<numTrainingExamples; i++){ //Set the class ID prob.y[i] = trainingData[i].getClassLabel(); //Assign the memory for this training example, note that a dummy node is needed at the end of the vector prob.x[i] = new svm_node[numInputDimensions+1]; for(UINT j=0; j<numInputDimensions; j++){ prob.x[i][j].index = j+1; prob.x[i][j].value = trainingData[i].getSample()[j]; } prob.x[i][numInputDimensions].index = -1; //Assign the final node value prob.x[i][numInputDimensions].value = 0; } return true; }
bool SVM::train_(ClassificationData &trainingData){ //Clear any previous model clear(); if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } //Convert the labelled classification data into the LIBSVM data format if( !convertClassificationDataToLIBSVMFormat(trainingData) ){ errorLog << "train_(ClassificationData &trainingData) - Failed To Convert Labelled Classification Data To LIBSVM Format!" << endl; return false; } if( useAutoGamma ) param.gamma = 1.0/numInputDimensions; //Train the model bool trainingResult = trainSVM(); if(! trainingResult ){ errorLog << "train_(ClassificationData &trainingData) - Failed To Train SVM Model!" << endl; return false; } return true; }
bool BAG::train_(ClassificationData &trainingData){ //Clear any previous models clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } numInputDimensions = N; numClasses = K; classLabels.resize(K); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } UINT ensembleSize = (UINT)ensemble.size(); if( ensembleSize == 0 ){ errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl; return false; } for(UINT i=0; i<ensembleSize; i++){ if( ensemble[i] == NULL ){ errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl; return false; } } //Train the ensemble for(UINT i=0; i<ensembleSize; i++){ ClassificationData boostedDataset = trainingData.getBootstrappedDataset(); trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << endl; //Train the classifier with the bootstrapped dataset if( !ensemble[i]->train( boostedDataset ) ){ errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << endl; return false; } } //Set the class labels classLabels = trainingData.getClassLabels(); //Flag that the model has been trained trained = true; return trained; }
bool SwipeDetector::train_(ClassificationData &trainingData) { //Clear any previous models clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); if( M == 0 ) { errorLog << "train_(trainingData &labelledTrainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numClasses = 2; //This is always 2 for swipe detection [1 == swipe detected, everything else means no swipe detected] classLabels.resize( 2 ); classLabels[0] = 1; //Swipe classLabels[1] = 2; //No Swipe nullRejectionThresholds.resize(2,0); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ) { //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //We currently have no way to automatically train the swipe detection, user needs to manually set thresholds, so just flag the model is trained trained = true; return true; }
bool ANBC::setWeights(const ClassificationData &weightsData){ if( weightsData.getNumSamples() > 0 ){ weightsDataSet = true; this->weightsData = weightsData; return true; } return false; }
// Tests the learning algorithm on a basic dataset TEST(KNN, TrainBasicDataset) { KNN knn; //Check the module is not trained EXPECT_TRUE( !knn.getTrained() ); //Generate a basic dataset const UINT numSamples = 1000; const UINT numClasses = 10; const UINT numDimensions = 10; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Train the classifier EXPECT_TRUE( knn.train( trainingData ) ); EXPECT_TRUE( knn.getTrained() ); EXPECT_TRUE( knn.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } EXPECT_TRUE( knn.save( "knn_model.grt" ) ); knn.clear(); EXPECT_TRUE( !knn.getTrained() ); EXPECT_TRUE( knn.load( "knn_model.grt" ) ); EXPECT_TRUE( knn.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } }
bool HierarchicalClustering::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ return false; } //Convert the labelled training data to a training matrix M = trainingData.getNumSamples(); N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } return train_( data ); }
int main (int argc, const char * argv[]) { //Create a new gesture recognition pipeline GestureRecognitionPipeline pipeline; //Add an ANBC module pipeline.setClassifier( ANBC() ); //Add a ClassLabelFilter as a post processing module with a minCount of 5 and a buffer size of 10 pipeline.addPostProcessingModule( ClassLabelFilter(5,10) ); //Load some training data to train and test the classifier ClassificationData trainingData; ClassificationData testData; if( !trainingData.loadDatasetFromFile("ClassLabelFilterTrainingData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } if( !testData.loadDatasetFromFile("ClassLabelFilterTestData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the classifier if( !pipeline.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Use the test dataset to demonstrate the output of the ClassLabelFilter for(UINT i=0; i<testData.getNumSamples(); i++){ VectorDouble inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label (this will be the processed class label) UINT predictedClassLabel = pipeline.getPredictedClassLabel(); //Get the unprocessed class label (i.e. the direct output of the classifier) UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel(); //Also print the results to the screen cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl; } return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Load the example data ClassificationData data; if( !data.loadDatasetFromFile("WiiAccShakeData.txt") ){ cout << "ERROR: Failed to load data from file!\n"; return EXIT_FAILURE; } //The variables used to initialize the zero crossing counter feature extraction UINT searchWindowSize = 20; double deadZoneThreshold = 0.01; UINT numDimensions = data.getNumDimensions(); UINT featureMode = ZeroCrossingCounter::INDEPENDANT_FEATURE_MODE; //This could also be ZeroCrossingCounter::COMBINED_FEATURE_MODE //Create a new instance of the ZeroCrossingCounter feature extraction ZeroCrossingCounter zeroCrossingCounter(searchWindowSize,deadZoneThreshold,numDimensions,featureMode); //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file for(UINT i=0; i<data.getNumSamples(); i++){ //Compute the features using this new sample zeroCrossingCounter.computeFeatures( data[i].getSample() ); //Write the data to the file cout << "InputVector: "; for(UINT j=0; j<data.getNumDimensions(); j++){ cout << data[i].getSample()[j] << "\t"; } //Get the latest feature vector VectorDouble featureVector = zeroCrossingCounter.getFeatureVector(); //Write the features to the file cout << "FeatureVector: "; for(UINT j=0; j<featureVector.size(); j++){ cout << featureVector[j]; if( j != featureVector.size()-1 ) cout << "\t"; } cout << endl; } //Save the zero crossing counter settings to a file zeroCrossingCounter.saveModelToFile("ZeroCrossingCounterSettings.txt"); //You can then load the settings again if you need them zeroCrossingCounter.loadModelFromFile("ZeroCrossingCounterSettings.txt"); return EXIT_SUCCESS; }
bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){ const UINT M = trainingData.getNumSamples(); const UINT N = features.getSize(); const UINT K = classLabels.getSize(); if( N == 0 ) return false; if( K == 0 ) return false; minError = grt_numeric_limits< Float >::max(); Random random; UINT bestFeatureIndex = 0; Float bestThreshold = 0; Float error = 0; Vector< UINT > groupIndex(M); Vector< MinMax > ranges = trainingData.getRanges(); MatrixDouble data(M,1); //This will store our temporary data for each dimension //Randomly select which features we want to use UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps; Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures ); //Loop over each random feature and try and find the best split point for(UINT n=0; n<numRandomFeatures; n++){ featureIndex = features[ randomFeatures[n] ]; //Use the data in this feature dimension to create a sum dataset for(UINT i=0; i<M; i++){ data[i][0] = trainingData[i][featureIndex]; } if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){ //Store the best threshold and feature index if( error < minError ){ minError = error; bestThreshold = threshold; bestFeatureIndex = featureIndex; } } } //Set the best feature index that will be returned to the DecisionTree that called this function featureIndex = bestFeatureIndex; //Store the node size, feature index, best threshold and class probabilities for this node set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) ); return true; }
bool KMeans::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl; return false; } //Set the numClusters as the number of classes in the training data numClusters = trainingData.getNumClasses(); //Convert the labelled training data to a training matrix UINT M = trainingData.getNumSamples(); UINT N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } //Run the K-Means algorithm return train_( data ); }
int main (int argc, const char * argv[]) { //Load the example data ClassificationData data; if( !data.load("WiiAccShakeData.grt") ){ cout << "ERROR: Failed to load data from file!\n"; return EXIT_FAILURE; } //The variables used to initialize the MovementIndex feature extraction UINT windowSize = 10; UINT numDimensions = data.getNumDimensions(); //Create a new instance of the MovementIndex feature extraction MovementIndex movementIndex(windowSize,numDimensions); //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file for(UINT i=0; i<data.getNumSamples(); i++){ //Compute the features using this new sample movementIndex.computeFeatures( data[i].getSample() ); //Write the data cout << "InputVector: "; for(UINT j=0; j<data.getNumDimensions(); j++){ cout << data[i].getSample()[j] << "\t"; } //Get the latest feature vector VectorFloat featureVector = movementIndex.getFeatureVector(); //Write the features cout << "FeatureVector: "; for(UINT j=0; j<featureVector.size(); j++){ cout << featureVector[j]; if( j != featureVector.size()-1 ) cout << "\t"; } cout << endl; } //Save the MovementIndex settings to a file movementIndex.save("MovementIndexSettings.grt"); //You can then load the settings again if you need them movementIndex.load("MovementIndexSettings.grt"); return EXIT_SUCCESS; }
bool Softmax::train_(ClassificationData &trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numClasses = K; models.resize(K); classLabels.resize(K); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Train a regression model for each class in the training data for(UINT k=0; k<numClasses; k++){ //Set the class label classLabels[k] = trainingData.getClassTracker()[k].classLabel; //Train the model if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){ errorLog << "train(ClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << std::endl; return false; } } //Flag that the algorithm has been trained trained = true; return trained; }
int main(int argc, char * argv[]) { if( argc < 3 ){ errorLog << "Not enough input arguments!" << endl; printUsage(); return EXIT_FAILURE; } const string inputDirectory = argv[1]; const string outputFilename = argv[2]; //Parse the data directory for files vector< string > filenames; infoLog << "- Parsing data directory: " << inputDirectory << endl; if( !Util::parseDirectory( inputDirectory, ".csv", filenames ) ){ errorLog << "Failed to parse input directory: " << inputDirectory << endl; return EXIT_FAILURE; } if( filenames.size() == 0 ){ errorLog << "Failed to find any files in the input directory: " << inputDirectory << endl; return EXIT_FAILURE; } ClassificationData data; unsigned int numFiles = (unsigned int)filenames.size(); bool dataLoaded = false; for(unsigned int i=0; i<numFiles; i++){ //Load the data infoLog << "- Loading data " << i+1 << " of " << numFiles << endl; ClassificationData tmp; if( tmp.load( filenames[i] ) ){ if( i==0 ){ data.setNumDimensions( tmp.getNumDimensions() ); } dataLoaded = true; infoLog << "- Data loaded. Number of samples: " << tmp.getNumSamples() << endl; data.merge( tmp ); }else{ warningLog << "- Failed to load data!" << endl; } } if( dataLoaded ){ infoLog << "- Merged data to generate new dataset with " << data.getNumSamples() << " samples" << endl; //Save the new datasets infoLog << "- Saving main dataset to file: " << outputFilename << endl; if( !data.save( outputFilename ) ){ errorLog << "Failed to save output data: " << outputFilename << endl; return EXIT_FAILURE; } }else{ warningLog << "- Failed to load any data!" << endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
int main (int argc, const char * argv[]) { //Create a new Softmax instance Softmax softmax; //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.loadDatasetFromFile("SoftmaxTrainingData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !softmax.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the Softmax model to a file if( !softmax.saveModelToFile("SoftmaxModel.txt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the Softmax model from a file if( !softmax.loadModelFromFile("SoftmaxModel.txt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the softmax model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); //Perform a prediction using the classifier if( !softmax.predict( inputVector ) ){ cout << "Failed to perform prediction for test sample: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = softmax.getPredictedClassLabel(); vector< double > classLikelihoods = softmax.getClassLikelihoods(); vector< double > classDistances = softmax.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){ error = 0; threshold = 0; const UINT M = trainingData.getNumSamples(); const UINT K = (UINT)classLabels.size(); Float giniIndexL = 0; Float giniIndexR = 0; Float weightL = 0; Float weightR = 0; VectorFloat groupCounter(2,0); MatrixFloat classProbabilities(K,2); //Use this data to train a KMeans cluster with 2 clusters KMeans kmeans; kmeans.setNumClusters( 2 ); kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-5 ); kmeans.setMinNumEpochs( 1 ); kmeans.setMaxNumEpochs( 100 ); //Disable the logging to clean things up kmeans.setTrainingLoggingEnabled( false ); if( !kmeans.train_( data ) ){ errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl; return false; } //Set the split threshold as the mid point between the two clusters const MatrixFloat &clusters = kmeans.getClusters(); threshold = 0; for(UINT i=0; i<clusters.getNumRows(); i++){ threshold += clusters[i][0]; } threshold /= clusters.getNumRows(); //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold groupCounter[0] = groupCounter[1] = 0; classProbabilities.setAllValues(0); for(UINT i=0; i<M; i++){ groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0; groupCounter[ groupIndex[i] ]++; classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++; } //Compute the class probabilities for the lhs group and rhs group for(UINT k=0; k<K; k++){ classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0; classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0; } //Compute the Gini index for the lhs and rhs groups giniIndexL = giniIndexR = 0; for(UINT k=0; k<K; k++){ giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]); giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]); } weightL = groupCounter[0]/M; weightR = groupCounter[1]/M; error = (giniIndexL*weightL) + (giniIndexR*weightR); return true; }
bool MinDist::train_(ClassificationData &labelledTrainingData){ //Clear any previous models clear(); const unsigned int M = labelledTrainingData.getNumSamples(); const unsigned int N = labelledTrainingData.getNumDimensions(); const unsigned int K = labelledTrainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl; return false; } if( M <= numClusters ){ errorLog << "train_(ClassificationData &labelledTrainingData) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl; return false; } numInputDimensions = N; numClasses = K; models.resize(K); classLabels.resize(K); nullRejectionThresholds.resize(K); ranges = labelledTrainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 labelledTrainingData.scale(0, 1); } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class label for the kth class UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel; //Set the kth class label classLabels[k] = classLabel; //Get all the training data for this class ClassificationData classData = labelledTrainingData.getClassData(classLabel); MatrixDouble data(classData.getNumSamples(),N); //Copy the training data into a matrix for(UINT i=0; i<data.getNumRows(); i++){ for(UINT j=0; j<data.getNumCols(); j++){ data[i][j] = classData[i][j]; } } //Train the model for this class models[k].setGamma( nullRejectionCoeff ); if( !models[k].train(classLabel,data,numClusters) ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel; errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl; models.clear(); return false; } //Set the null rejection threshold nullRejectionThresholds[k] = models[k].getRejectionThreshold(); } trained = true; return true; }
int main (int argc, const char * argv[]) { //Create a new AdaBoost instance AdaBoost adaBoost; //Set the weak classifier you want to use adaBoost.setWeakClassifier( DecisionStump() ); //Load some training data to train the classifier ClassificationData trainingData; if( !trainingData.load("AdaBoostTrainingData.grt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Use 20% of the training dataset to create a test dataset ClassificationData testData = trainingData.partition( 80 ); //Train the classifier if( !adaBoost.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Save the model to a file if( !adaBoost.save("AdaBoostModel.grt") ){ cout << "Failed to save the classifier model!\n"; return EXIT_FAILURE; } //Load the model from a file if( !adaBoost.load("AdaBoostModel.grt") ){ cout << "Failed to load the classifier model!\n"; return EXIT_FAILURE; } //Use the test dataset to test the AdaBoost model double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ //Get the i'th test sample UINT classLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); //Perform a prediction using the classifier if( !adaBoost.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label UINT predictedClassLabel = adaBoost.getPredictedClassLabel(); double maximumLikelhood = adaBoost.getMaximumLikelihood(); vector< double > classLikelihoods = adaBoost.getClassLikelihoods(); vector< double > classDistances = adaBoost.getClassDistances(); //Update the accuracy if( classLabel == predictedClassLabel ) accuracy++; cout << "TestSample: " << i << " ClassLabel: " << classLabel; cout << " PredictedClassLabel: " << predictedClassLabel << " Likelihood: " << maximumLikelhood; cout << endl; } cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl; return EXIT_SUCCESS; }
void prediction_axis_data(){ // Training and test data ClassificationData trainingData; ClassificationData testData; string file_path = "../../../data/"; string class_name = "5"; if( !trainingData.loadDatasetFromFile(file_path + "train/grt/" + class_name + ".txt") ){ std::cout <<"Failed to load training data!\n"; } if( !testData.loadDatasetFromFile(file_path + "test/grt/" + class_name + ".txt") ){ std::cout <<"Failed to load training data!\n"; } // Pipeline setup ANBC anbc; anbc.setNullRejectionCoeff(1); anbc.enableScaling(true); anbc.enableNullRejection(true); GestureRecognitionPipeline pipeline; pipeline.setClassifier(anbc); // Train the pipeline if( !pipeline.train( trainingData ) ){ std::cout << "Failed to train classifier!\n"; } // File stream ofstream outputFileStream(class_name + ".csv"); // Evaluation double accuracy = 0; outputFileStream << "actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ \n"; for(UINT i=0; i<testData.getNumSamples(); i++){ UINT actualClassLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector )){ std::cout << "Failed to perform prediction for test sampel: " << i <<"\n"; } UINT predictedClassLabel = pipeline.getPredictedClassLabel(); double maximumLikelihood = pipeline.getMaximumLikelihood(); outputFileStream << actualClassLabel << "," << predictedClassLabel << "," << maximumLikelihood << "," << inputVector[0] << "," << inputVector[1] << "," << inputVector[2] << "," << inputVector[3] << "," << inputVector[4] << "," << inputVector[5] << "\n"; if( actualClassLabel == predictedClassLabel) accuracy++; } std::cout << "Test Accuracy testHandsUp : " << accuracy/double(testData.getNumSamples())*100.0 << " %\n"; }
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; UINT N = data.getNumDimensions(); UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); Vector< UINT > randomTrainingOrder(M); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; for(UINT m=0; m<M; m++){ //Select the random sample UINT i = randomTrainingOrder[m]; //Compute the error, given the current weights error = y[i] - model.compute( data[i].getSample() ); errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * data[i][j]; } model.w0 += learningRate * error; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; } return true; }
bool AdaBoost::train_(ClassificationData &trainingData){ //Clear any previous model clear(); if( trainingData.getNumSamples() <= 1 ){ errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples() << endl; return false; } numInputDimensions = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); const UINT M = trainingData.getNumSamples(); const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL; const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL; double alpha = 0; const double beta = 0.001; double epsilon = 0; TrainingResult trainingResult; const UINT K = (UINT)weakClassifiers.size(); if( K == 0 ){ errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl; return false; } classLabels.resize(numClasses); models.resize(numClasses); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ trainingData.scale(ranges,0,1); } //Create the weights vector VectorDouble weights(M); //Create the error matrix MatrixDouble errorMatrix(K,M); for(UINT classIter=0; classIter<numClasses; classIter++){ //Get the class label for the current class classLabels[classIter] = trainingData.getClassLabels()[classIter]; //Set the class label of the current model models[ classIter ].setClassLabel( classLabels[classIter] ); //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2 ClassificationData classData; classData.setNumDimensions(trainingData.getNumDimensions()); for(UINT i=0; i<M; i++){ UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL; VectorDouble trainingSample = trainingData[i].getSample(); classData.addSample(label,trainingSample); } //Setup the initial training sample weights std::fill(weights.begin(),weights.end(),1.0/M); //Run the boosting loop bool keepBoosting = true; UINT t = 0; while( keepBoosting ){ //Pick the classifier from the family of classifiers that minimizes the total error UINT bestClassifierIndex = 0; double minError = numeric_limits<double>::max(); for(UINT k=0; k<K; k++){ //Get the k'th possible classifier WeakClassifier *weakLearner = weakClassifiers[k]; //Train the current classifier if( !weakLearner->train(classData,weights) ){ errorLog << "Failed to train weakLearner!" << endl; return false; } //Compute the weighted error for this clasifier double e = 0; double positiveLabel = weakLearner->getPositiveClassLabel(); double numCorrect = 0; double numIncorrect = 0; for(UINT i=0; i<M; i++){ //Only penalize errors double prediction = weakLearner->predict( classData[i].getSample() ); if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) || //False positive (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){ //False negative e += weights[i]; //Increase the error proportional to the weight of the example errorMatrix[k][i] = 1; //Flag that there was an error numIncorrect++; }else{ errorMatrix[k][i] = 0; //Flag that there was no error numCorrect++; } } trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl; if( e < minError ){ minError = e; bestClassifierIndex = k; } } epsilon = minError; //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier alpha = 0.5 * log( (1.0-epsilon)/epsilon ); trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl; if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; } if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; } if( ++t >= numBoostingIterations ) keepBoosting = false; trainingResult.setClassificationResult(t, minError, this); trainingResults.push_back(trainingResult); trainingResultsObserverManager.notifyObservers( trainingResult ); if( keepBoosting ){ //Add the best weak classifier to the committee models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha ); //Update the weights for the next boosting iteration double reWeight = (1.0 - epsilon) / epsilon; double oldSum = 0; double newSum = 0; for(UINT i=0; i<M; i++){ oldSum += weights[i]; //Only update the weights that resulted in an incorrect prediction if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight; newSum += weights[i]; } //Normalize all the weights //This results to increasing the weights of the samples that were incorrectly labelled //While decreasing the weights of the samples that were correctly classified reWeight = oldSum/newSum; for(UINT i=0; i<M; i++){ weights[i] *= reWeight; } }else{ trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl; if( t-1 == 0 ){ //Add the best weak classifier to the committee (we have to add it as this is the first iteration) if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha ); } } } } //Normalize the weights for(UINT k=0; k<numClasses; k++){ models[k].normalizeWeights(); } //Flag that the model has been trained trained = true; //Setup the data for prediction predictedClassLabel = 0; maxLikelihood = 0; classLikelihoods.resize(numClasses); classDistances.resize(numClasses); return true; }
bool ANBC::train_(ClassificationData &labelledTrainingData){ //Clear any previous model clear(); const unsigned int M = labelledTrainingData.getNumSamples(); const unsigned int N = labelledTrainingData.getNumDimensions(); const unsigned int K = labelledTrainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl; return false; } if( weightsDataSet ){ if( weightsData.getNumDimensions() != N ){ errorLog << "train_(ClassificationData &labelledTrainingData) - The number of dimensions in the weights data (" << weightsData.getNumDimensions() << ") is not equal to the number of dimensions of the training data (" << N << ")" << endl; return false; } } numInputDimensions = N; numClasses = K; models.resize(K); classLabels.resize(K); ranges = labelledTrainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 labelledTrainingData.scale(0, 1); } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class label for the kth class UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel; //Set the kth class label classLabels[k] = classLabel; //Get the weights for this class VectorDouble weights(numInputDimensions); if( weightsDataSet ){ bool weightsFound = false; for(UINT i=0; i<weightsData.getNumSamples(); i++){ if( weightsData[i].getClassLabel() == classLabel ){ weights = weightsData[i].getSample(); weightsFound = true; break; } } if( !weightsFound ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to find the weights for class " << classLabel << endl; return false; } }else{ //If the weights data has not been set then all the weights are 1 for(UINT j=0; j<numInputDimensions; j++) weights[j] = 1.0; } //Get all the training data for this class ClassificationData classData = labelledTrainingData.getClassData(classLabel); MatrixDouble data(classData.getNumSamples(),N); //Copy the training data into a matrix for(UINT i=0; i<data.getNumRows(); i++){ for(UINT j=0; j<data.getNumCols(); j++){ data[i][j] = classData[i][j]; } } //Train the model for this class models[k].gamma = nullRejectionCoeff; if( !models[k].train(classLabel,data,weights) ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel << endl; //Try and work out why the training failed if( models[k].N == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - N == 0!" << endl; models.clear(); return false; } for(UINT j=0; j<numInputDimensions; j++){ if( models[k].mu[j] == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - The mean of column " << j+1 << " is zero! Check the training data" << endl; models.clear(); return false; } } models.clear(); return false; } } //Store the null rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++) { nullRejectionThresholds[k] = models[k].threshold; } //Flag that the models have been trained trained = true; return trained; }
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){ Float error = 0; Float errorSum = 0; Float lastErrorSum = 0; Float delta = 0; const UINT N = data.getNumDimensions(); const UINT M = data.getNumSamples(); UINT iter = 0; bool keepTraining = true; Random random; VectorFloat y(M); VectorFloat batchMean(N); Vector< UINT > randomTrainingOrder(M); Vector< VectorFloat > batchData(batchSize,VectorFloat(N)); //Init the model model.init( classLabel, N ); //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0) for(UINT i=0; i<M; i++){ y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0; } //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.) //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the //training samples. This random order is then used at each epoch. for(UINT i=0; i<M; i++){ randomTrainingOrder[i] = i; } std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end()); //Clear any previous training results trainingResults.clear(); trainingResults.reserve( maxNumEpochs ); TrainingResult epochResult; //Run the main stochastic gradient descent training algorithm while( keepTraining ){ //Run one epoch of training using stochastic gradient descent errorSum = 0; UINT m=0; while( m < M ){ //Get the batch data for this update UINT roundSize = m+batchSize < M ? batchSize : M-m; batchMean.fill(0.0); for(UINT i=0; i<roundSize; i++){ for(UINT j=0; j<N; j++){ batchData[i][j] = data[ randomTrainingOrder[m+i] ][j]; batchMean[j] += batchData[i][j]; } } for(UINT j=0; j<N; j++) batchMean[j] /= roundSize; //Compute the error on this batch, given the current weights error = 0.0; for(UINT i=0; i<roundSize; i++){ error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] ); } error /= roundSize; errorSum += error; //Update the weights for(UINT j=0; j<N; j++){ model.w[j] += learningRate * error * batchMean[j]; } model.w0 += learningRate * error; m += roundSize; } //Compute the error delta = fabs( errorSum-lastErrorSum ); lastErrorSum = errorSum; //Check to see if we should stop if( delta <= minChange ){ keepTraining = false; } if( ++iter >= maxNumEpochs ){ keepTraining = false; } trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl; epochResult.setClassificationResult( iter, errorSum, this ); trainingResults.push_back( epochResult ); } return true; }
void metrics_subset_data(){ ANBC anbc; anbc.enableScaling(true); anbc.enableNullRejection(true); MinDist minDist; minDist.setNumClusters(4); minDist.enableScaling(true); minDist.enableNullRejection(true); // ofstream opRecall("anbc-recall-nr-0-10.csv"); // opRecall <<"nrCoeff,class0,class1,class2,class3,class4,class5\n"; // // ofstream opInstanceRes("anbc-prediction-nr-2.csv"); // opInstanceRes <<"actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ\n"; // // ofstream opMetrics("anbc-precision-recall-fmeasure-nr-2.csv"); // opMetrics <<"class1,class2,class3,class4,class5\n"; // // ofstream opConfusion("anbc-confusion-nr-2.csv"); // opConfusion <<"class0,class1,class2,class3,class4,class5\n"; ofstream opRecall("mindist-recall-nr-0-10.csv"); opRecall <<"nrCoeff,class0,class1,class2,class3,class4,class5\n"; ofstream opInstanceRes("mindist-prediction-nr-2.csv"); opInstanceRes <<"actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ\n"; ofstream opMetrics("mindist-precision-recall-fmeasure-nr-2.csv"); opMetrics <<"class1,class2,class3,class4,class5\n"; ofstream opConfusion("mindist-confusion-nr-2.csv"); opConfusion <<"class0,class1,class2,class3,class4,class5\n"; // Training and test data ClassificationData trainingData; ClassificationData testData; ClassificationData nullGestureData; string file_path = "../../../data/"; if( !trainingData.loadDatasetFromFile(file_path + "train/grt/hri-training-dataset.txt") ){ std::cout <<"Failed to load training data!\n"; } if( !nullGestureData.loadDatasetFromFile(file_path + "test/grt/0.txt") ){ std::cout <<"Failed to load null gesture data!\n"; } testData = trainingData.partition(90); testData.sortClassLabels(); // testData.saveDatasetToFile("anbc-validation-subset.txt"); testData.saveDatasetToFile("mindist-validation-subset.txt"); for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){ // anbc.setNullRejectionCoeff(nullRejectionCoeff); // GestureRecognitionPipeline pipeline; // pipeline.setClassifier(anbc); minDist.setNullRejectionCoeff(nullRejectionCoeff); GestureRecognitionPipeline pipeline; pipeline.setClassifier(minDist); pipeline.train(trainingData); pipeline.test(testData); TestResult testRes = pipeline.getTestResults(); opRecall << nullRejectionCoeff << ","; //null rejection prediction double accuracy = 0; for(UINT i=0; i<nullGestureData.getNumSamples(); i++){ vector< double > inputVector = nullGestureData[i].getSample(); if( !pipeline.predict( inputVector )){ std::cout << "Failed to perform prediction for test sampel: " << i <<"\n"; } UINT predictedClassLabel = pipeline.getPredictedClassLabel(); if(predictedClassLabel == 0 ) accuracy++; } opRecall << accuracy/double(nullGestureData.getNumSamples()) << ","; // other classes prediction for(int cl = 0; cl < testRes.recall.size(); cl++ ){ opRecall << testRes.recall[cl]; if(cl < testRes.recall.size() - 1){ opRecall << ","; } } opRecall<< endl; // Calculate instance prediction, precision, recall, fmeasure and confusion matrix for nullRejection 2.0 if(AreDoubleSame(nullRejectionCoeff, 2.0)) { //instance prediction for(UINT i=0; i<testData.getNumSamples(); i++){ UINT actualClassLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector )){ std::cout << "Failed to perform prediction for test sampel: " << i <<"\n"; } UINT predictedClassLabel = pipeline.getPredictedClassLabel(); double maximumLikelihood = pipeline.getMaximumLikelihood(); opInstanceRes << actualClassLabel << "," << predictedClassLabel << "," << maximumLikelihood << "," << inputVector[0] << "," << inputVector[1] << "," << inputVector[2] << "," << inputVector[3] << "," << inputVector[4] << "," << inputVector[5] << "\n"; } //precision, recall, fmeasure for(int cl = 0; cl < testRes.precision.size(); cl++ ){ opMetrics << testRes.precision[cl]; if(cl < testRes.precision.size() - 1){ opMetrics << ","; } } opMetrics<< endl; for(int cl = 0; cl < testRes.recall.size(); cl++ ){ opMetrics << testRes.recall[cl]; if(cl < testRes.recall.size() - 1){ opMetrics << ","; } } opMetrics<< endl; for(int cl = 0; cl < testRes.fMeasure.size(); cl++ ){ opMetrics << testRes.fMeasure[cl]; if(cl < testRes.fMeasure.size() - 1){ opMetrics << ","; } } opMetrics<< endl; //confusion matrix MatrixDouble matrix = testRes.confusionMatrix; for(UINT i=0; i<matrix.getNumRows(); i++){ for(UINT j=0; j<matrix.getNumCols(); j++){ opConfusion << matrix[i][j]; if(j < matrix.getNumCols() - 1){ opConfusion << ","; } } opConfusion << endl; } opConfusion << endl; } } cout << "Done\n"; }
bool Softmax::train_(ClassificationData &trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numOutputDimensions = K; numClasses = K; models.resize(K); classLabels.resize(K); ranges = trainingData.getRanges(); ClassificationData validationData; //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } if( useValidationSet ){ validationData = trainingData.split( 100-validationSetSize ); } //Train a regression model for each class in the training data for(UINT k=0; k<numClasses; k++){ //Set the class label classLabels[k] = trainingData.getClassTracker()[k].classLabel; //Train the model if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){ errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabels[k] << std::endl; return false; } } //Flag that the models have been trained trained = true; converged = true; //Compute the final training stats trainingSetAccuracy = 0; validationSetAccuracy = 0; //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy bool scalingState = useScaling; useScaling = false; if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){ trained = false; converged = false; errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl; return false; } if( useValidationSet ){ if( !computeAccuracy( validationData, validationSetAccuracy ) ){ trained = false; converged = false; errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl; return false; } } trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl; if( useValidationSet ){ trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl; } //Reset the scaling state for future prediction useScaling = scalingState; return trained; }
void metrics_separate_data(){ // Training and test data ClassificationData trainingData; ClassificationData testData; string file_path = "../../../data/"; if( !trainingData.loadDatasetFromFile(file_path + "train/grt/12345.txt") ){ std::cout <<"Failed to load training data!\n"; } ANBC anbc; anbc.enableScaling(true); anbc.enableNullRejection(true); SVM svm(SVM::RBF_KERNEL); svm.enableScaling(true); svm.enableNullRejection(true); MinDist minDist; minDist.setNumClusters(4); minDist.enableScaling(true); minDist.enableNullRejection(true); ofstream outputFileStream("accuracy-mindist.csv"); outputFileStream << "classLabel,nullRejectionCoeff,accuracy, \n"; for(int class_name = 1; class_name<=5; class_name++){ if( !testData.loadDatasetFromFile(file_path + "test/grt/" + to_string(class_name) + ".txt") ){ std::cout <<"Failed to load training data!\n"; } for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){ // anbc.setNullRejectionCoeff(nullRejectionCoeff); // svm.setNullRejectionCoeff(nullRejectionCoeff); minDist.setNullRejectionCoeff(nullRejectionCoeff); GestureRecognitionPipeline pipeline; // pipeline.setClassifier(anbc); // pipeline.setClassifier(svm); pipeline.setClassifier(minDist); // Train the pipeline if( !pipeline.train( trainingData ) ){ std::cout << "Failed to train classifier!\n"; } // Evaluation double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ UINT actualClassLabel = testData[i].getClassLabel(); vector< double > inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector )){ std::cout << "Failed to perform prediction for test sampel: " << i <<"\n"; } UINT predictedClassLabel = pipeline.getPredictedClassLabel(); if( actualClassLabel == predictedClassLabel) accuracy++; } outputFileStream << class_name << ',' << nullRejectionCoeff << ',' << accuracy/double(testData.getNumSamples())*100.0 << '\n'; cout<< "Done" << endl; } } //---------------------- Null Gesture Test -----------------// int class_name = 0; if( !testData.loadDatasetFromFile(file_path + "test/grt/" + to_string(class_name) + ".txt") ){ std::cout <<"Failed to load training data!\n"; } for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){ // anbc.setNullRejectionCoeff(nullRejectionCoeff); // svm.setNullRejectionCoeff(nullRejectionCoeff); minDist.setNullRejectionCoeff(nullRejectionCoeff); GestureRecognitionPipeline pipeline; // pipeline.setClassifier(anbc); // pipeline.setClassifier(svm); pipeline.setClassifier(minDist); // Train the pipeline if( !pipeline.train( trainingData ) ){ std::cout << "Failed to train classifier!\n"; } // Evaluation double accuracy = 0; for(UINT i=0; i<testData.getNumSamples(); i++){ vector< double > inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector )){ std::cout << "Failed to perform prediction for test sampel: " << i <<"\n"; } UINT predictedClassLabel = pipeline.getPredictedClassLabel(); if(predictedClassLabel == 0 ) accuracy++; } outputFileStream << class_name << ',' << nullRejectionCoeff << ',' << accuracy/double(testData.getNumSamples())*100.0 << '\n'; cout<< "Done" << endl; } }
bool LDA::train(ClassificationData trainingData){ errorLog << "SORRY - this module is still under development and can't be used yet!" << std::endl; return false; //Reset any previous model numInputDimensions = 0; numClasses = 0; models.clear(); classLabels.clear(); trained = false; if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledClassificationData trainingData) - There is no training data to train the model!" << std::endl; return false; } numInputDimensions = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); //Calculate the between scatter matrix MatrixFloat SB = computeBetweenClassScatterMatrix( trainingData ); //Calculate the within scatter matrix MatrixFloat SW = computeWithinClassScatterMatrix( trainingData ); /* //Counters and stat containers vector< UINT > groupLabels(numClasses); VectorDouble groupCounters(numClasses); VectorDouble priorProb(numClasses); MatrixFloat groupMeans(numClasses,numFeatures); MatrixFloat pCov(numFeatures,numFeatures); MatrixFloat pCovInv(numFeatures,numFeatures); MatrixFloat modelCoeff(numClasses,numFeatures+1); pCov.setAllValues(0); modelCoeff.setAllValues(0); //Set the class labels and counters for(UINT k=0; k<numClasses; k++){ groupLabels[k] = trainingData.getClassTracker()[k].classLabel; groupCounters[k] = trainingData.getClassTracker()[k].counter; } //Loop over the classes to compute the group stats for(UINT k=0; k<numClasses; k++){ LabelledClassificationData classData = trainingData.getClassData( groupLabels[k] ); MatrixFloat cov(numFeatures,numFeatures); //Compute class mu for(UINT j=0; j<numFeatures; j++){ groupMeans[k][j] = 0; for(UINT i=0; i<classData.getNumSamples(); i++){ groupMeans[k][j] += classData[i][j]; } groupMeans[k][j] /= Float(classData.getNumSamples()); } //Compute the class covariance for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ cov[m][n] = 0; for(UINT i=0; i<classData.getNumSamples(); i++){ cov[m][n] += (classData[i][m]-groupMeans[k][m]) * (classData[i][n]-groupMeans[k][n]); } cov[m][n] /= Float(classData.getNumSamples()-1); } } debugLog << "Group Cov:\n"; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ debugLog << cov[m][n] << "\t"; }debugLog << "\n"; }debugLog << std::endl; //Set the prior probability for this class (which is just 1/numClasses) priorProb[k] = 1.0/Float(numClasses); //Update the main covariance matrix Float weight = ((classData.getNumSamples() - 1) / Float(trainingData.getNumSamples() - numClasses) ); debugLog << "Weight: " << weight << std::endl; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ pCov[m][n] += weight * cov[m][n]; } } } for(UINT k=0; k<numClasses; k++){ debugLog << "GroupMu: " << groupLabels[k] << "\t"; for(UINT j=0; j<numFeatures; j++){ debugLog << groupMeans[k][j] << "\t"; }debugLog << std::endl; } debugLog << "pCov:\n"; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ debugLog << pCov[m][n] << "\t"; }debugLog << "\n"; }debugLog << std::endl; //Invert the pCov matrix LUDecomposition matrixInverter(pCov); if( !matrixInverter.inverse(pCovInv) ){ errorLog << "Failed to invert pCov Matrix!" << std::endl; return false; } //Loop over classes to calculate linear discriminant coefficients Float sum = 0; vector< Float > temp(numFeatures); for(UINT k=0; k<numClasses; k++){ //Compute the temporary vector for(UINT j=0; j<numFeatures; j++){ temp[j] = 0; for(UINT m=0; m<numFeatures; m++){ temp[j] += groupMeans[k][m] * pCovInv[m][j]; } } //Compute the model coefficients sum = 0; for(UINT j=0; j<numFeatures; j++){ sum += temp[j]*groupMeans[k][j]; } modelCoeff[k][0] = -0.5 * sum + log( priorProb[k] ); for(UINT j=0; j<numFeatures; j++){ modelCoeff[k][j+1] = temp[j]; } } //Setup the models for realtime prediction models.resize(numClasses); classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = groupLabels[k]; models[k].classLabel = groupLabels[k]; models[k].priorProb = priorProb[k]; models[k].weights = modelCoeff.getRowVector(k); } //Flag that the models were successfully trained trained = true; */ return true; }
bool RandomForests::train_(ClassificationData &trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){ errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << endl; return false; } numInputDimensions = N; numClasses = K; classLabels = trainingData.getClassLabels(); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Flag that the main algorithm has been trained encase we need to trigger any callbacks trained = true; //Train the random forest forest.reserve( forestSize ); for(UINT i=0; i<forestSize; i++){ //Get a balanced bootstrapped dataset UINT datasetSize = (UINT)(trainingData.getNumSamples() * bootstrappedDatasetWeight); ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true ); DecisionTree tree; tree.setDecisionTreeNode( *decisionTreeNode ); tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again tree.setTrainingMode( trainingMode ); tree.setNumSplittingSteps( numRandomSplits ); tree.setMinNumSamplesPerNode( minNumSamplesPerNode ); tree.setMaxDepth( maxDepth ); tree.enableNullRejection( useNullRejection ); tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt ); trainingLog << "Training forest " << i+1 << "/" << forestSize << "..." << endl; //Train this tree if( !tree.train( data ) ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train tree at forest index: " << i << endl; clear(); return false; } //Deep copy the tree into the forest forest.push_back( tree.deepCopyTree() ); } return true; }