bool SwipeDetector::train_(ClassificationData &trainingData) { //Clear any previous models clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); if( M == 0 ) { errorLog << "train_(trainingData &labelledTrainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numClasses = 2; //This is always 2 for swipe detection [1 == swipe detected, everything else means no swipe detected] classLabels.resize( 2 ); classLabels[0] = 1; //Swipe classLabels[1] = 2; //No Swipe nullRejectionThresholds.resize(2,0); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ) { //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //We currently have no way to automatically train the swipe detection, user needs to manually set thresholds, so just flag the model is trained trained = true; return true; }
int main(void){ cout << "ClassificationData Test" << endl; ClassificationData cdata; // load data file that in Nick Gillian Format if(cdata.loadDatasetFromFile("irisNG.txt")){ cout << "error loading csv file" << endl; } cdata.printStats(); cout << "convert dataset to csv" << endl; //convert it to CSV. the first column indicate the class cdata.saveDatasetToCSVFile("irisCSVFromNG.txt"); //obviously we can load the data from CSV that we generated //note that class names are now lost cdata.loadDatasetFromCSVFile("irisCSVFromNG.txt"); cdata.printStats(); //try to load a CSV file that includes strings //cdata.loadDatasetFromCSVFile("irisCSV.txt", 4); //commented out because we get error while loading //load CSV file without strings but the classes are stored is the 5th column cdata.loadDatasetFromCSVFile("irisCSVNoText.txt", 4); cdata.printStats(); cdata.loadDatasetFromCSVFile("TestCSV.txt"); cdata.printStats(); return 0; }
bool SVM::convertClassificationDataToLIBSVMFormat(ClassificationData &trainingData){ //clear any previous problems deleteProblemSet(); const UINT numTrainingExamples = trainingData.getNumSamples(); numInputDimensions = trainingData.getNumDimensions(); //Compute the ranges encase the data should be scaled ranges = trainingData.getRanges(); //Init the memory prob.l = numTrainingExamples; prob.x = new svm_node*[numTrainingExamples]; prob.y = new double[numTrainingExamples]; problemSet = true; for(UINT i=0; i<numTrainingExamples; i++){ //Set the class ID prob.y[i] = trainingData[i].getClassLabel(); //Assign the memory for this training example, note that a dummy node is needed at the end of the vector prob.x[i] = new svm_node[numInputDimensions+1]; for(UINT j=0; j<numInputDimensions; j++){ prob.x[i][j].index = j+1; prob.x[i][j].value = trainingData[i].getSample()[j]; } prob.x[i][numInputDimensions].index = -1; //Assign the final node value prob.x[i][numInputDimensions].value = 0; } return true; }
void Forest::RefineLeafNodes(ClassificationData& data, int verbosityLevel) { // reset label distributions of all leaf nodes in the forest for(int t=0; t < nTrees; ++t) trees[t].ClearLeafNodes(); // refine for each label for(unsigned int i=0; i<labels.size(); i++) { int nPoints = 0; // load training data in chunks while((nPoints = data.LoadChunkForLabel(labels[i], MAX_DATAPOINTS_TO_LOAD)) > 0) { #pragma omp parallel { #pragma omp for nowait for(int t=0; t < nTrees; ++t) { trees[t].RefineLeafNodes(data, nPoints, i); } } } } // normalize distributions (account for inbalanced amount of available data per label) for(int t=0; t < nTrees; ++t) { trees[t].UpdateLeafNodes(labels, data.GetCountPerLabel()); } }
bool ClassificationData::merge(const ClassificationData &labelledData){ if( labelledData.getNumDimensions() != numDimensions ){ errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl; return false; } //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); //Reserve the memory reserve( getNumSamples() + labelledData.getNumSamples() ); //Add the data from the labelledData to this instance for(UINT i=0; i<labelledData.getNumSamples(); i++){ addSample(labelledData[i].getClassLabel(), labelledData[i].getSample()); } //Set the class names from the dataset vector< ClassTracker > classTracker = labelledData.getClassTracker(); for(UINT i=0; i<classTracker.size(); i++){ setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel); } //Sort the class labels sortClassLabels(); return true; }
int main (int argc, const char * argv[]) { //Create a new gesture recognition pipeline GestureRecognitionPipeline pipeline; //Add an ANBC module pipeline.setClassifier( ANBC() ); //Add a ClassLabelFilter as a post processing module with a minCount of 5 and a buffer size of 10 pipeline.addPostProcessingModule( ClassLabelFilter(5,10) ); //Load some training data to train and test the classifier ClassificationData trainingData; ClassificationData testData; if( !trainingData.loadDatasetFromFile("ClassLabelFilterTrainingData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } if( !testData.loadDatasetFromFile("ClassLabelFilterTestData.txt") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the classifier if( !pipeline.train( trainingData ) ){ cout << "Failed to train classifier!\n"; return EXIT_FAILURE; } //Use the test dataset to demonstrate the output of the ClassLabelFilter for(UINT i=0; i<testData.getNumSamples(); i++){ VectorDouble inputVector = testData[i].getSample(); if( !pipeline.predict( inputVector ) ){ cout << "Failed to perform prediction for test sampel: " << i <<"\n"; return EXIT_FAILURE; } //Get the predicted class label (this will be the processed class label) UINT predictedClassLabel = pipeline.getPredictedClassLabel(); //Get the unprocessed class label (i.e. the direct output of the classifier) UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel(); //Also print the results to the screen cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl; } return EXIT_SUCCESS; }
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples,bool balanceDataset) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); const UINT K = getNumClasses(); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<K; k++){ newDataset.addClass( classTracker[k].classLabel ); } if( balanceDataset ){ //Group the class indexs std::vector< std::vector< UINT > > classIndexs( K ); for(UINT i=0; i<totalNumSamples; i++){ classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i ); } //Get the class with the minimum number of examples UINT numSamplesPerClass = (UINT)floor( numSamples / double(K) ); //Randomly select the training samples from each class UINT classIndex = 0; UINT classCounter = 0; UINT randomIndex = 0; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() ); randomIndex = classIndexs[ classIndex ][ randomIndex ]; newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample()); if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){ classCounter = 0; classIndex++; } } }else{ //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() ); } } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
int main (int argc, const char * argv[]) { //Load the example data ClassificationData data; if( !data.loadDatasetFromFile("WiiAccShakeData.txt") ){ cout << "ERROR: Failed to load data from file!\n"; return EXIT_FAILURE; } //The variables used to initialize the zero crossing counter feature extraction UINT searchWindowSize = 20; double deadZoneThreshold = 0.01; UINT numDimensions = data.getNumDimensions(); UINT featureMode = ZeroCrossingCounter::INDEPENDANT_FEATURE_MODE; //This could also be ZeroCrossingCounter::COMBINED_FEATURE_MODE //Create a new instance of the ZeroCrossingCounter feature extraction ZeroCrossingCounter zeroCrossingCounter(searchWindowSize,deadZoneThreshold,numDimensions,featureMode); //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file for(UINT i=0; i<data.getNumSamples(); i++){ //Compute the features using this new sample zeroCrossingCounter.computeFeatures( data[i].getSample() ); //Write the data to the file cout << "InputVector: "; for(UINT j=0; j<data.getNumDimensions(); j++){ cout << data[i].getSample()[j] << "\t"; } //Get the latest feature vector VectorDouble featureVector = zeroCrossingCounter.getFeatureVector(); //Write the features to the file cout << "FeatureVector: "; for(UINT j=0; j<featureVector.size(); j++){ cout << featureVector[j]; if( j != featureVector.size()-1 ) cout << "\t"; } cout << endl; } //Save the zero crossing counter settings to a file zeroCrossingCounter.saveModelToFile("ZeroCrossingCounterSettings.txt"); //You can then load the settings again if you need them zeroCrossingCounter.loadModelFromFile("ZeroCrossingCounterSettings.txt"); return EXIT_SUCCESS; }
bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){ const UINT M = trainingData.getNumSamples(); const UINT N = features.getSize(); const UINT K = classLabels.getSize(); if( N == 0 ) return false; if( K == 0 ) return false; minError = grt_numeric_limits< Float >::max(); Random random; UINT bestFeatureIndex = 0; Float bestThreshold = 0; Float error = 0; Vector< UINT > groupIndex(M); Vector< MinMax > ranges = trainingData.getRanges(); MatrixDouble data(M,1); //This will store our temporary data for each dimension //Randomly select which features we want to use UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps; Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures ); //Loop over each random feature and try and find the best split point for(UINT n=0; n<numRandomFeatures; n++){ featureIndex = features[ randomFeatures[n] ]; //Use the data in this feature dimension to create a sum dataset for(UINT i=0; i<M; i++){ data[i][0] = trainingData[i][featureIndex]; } if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){ //Store the best threshold and feature index if( error < minError ){ minError = error; bestThreshold = threshold; bestFeatureIndex = featureIndex; } } } //Set the best feature index that will be returned to the DecisionTree that called this function featureIndex = bestFeatureIndex; //Store the node size, feature index, best threshold and class probabilities for this node set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) ); return true; }
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<getNumClasses(); k++){ newDataset.addClass( classTracker[k].classLabel ); } //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample()); } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
int main (int argc, const char * argv[]) { //Load the example data ClassificationData data; if( !data.load("WiiAccShakeData.grt") ){ cout << "ERROR: Failed to load data from file!\n"; return EXIT_FAILURE; } //The variables used to initialize the MovementIndex feature extraction UINT windowSize = 10; UINT numDimensions = data.getNumDimensions(); //Create a new instance of the MovementIndex feature extraction MovementIndex movementIndex(windowSize,numDimensions); //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file for(UINT i=0; i<data.getNumSamples(); i++){ //Compute the features using this new sample movementIndex.computeFeatures( data[i].getSample() ); //Write the data cout << "InputVector: "; for(UINT j=0; j<data.getNumDimensions(); j++){ cout << data[i].getSample()[j] << "\t"; } //Get the latest feature vector VectorFloat featureVector = movementIndex.getFeatureVector(); //Write the features cout << "FeatureVector: "; for(UINT j=0; j<featureVector.size(); j++){ cout << featureVector[j]; if( j != featureVector.size()-1 ) cout << "\t"; } cout << endl; } //Save the MovementIndex settings to a file movementIndex.save("MovementIndexSettings.grt"); //You can then load the settings again if you need them movementIndex.load("MovementIndexSettings.grt"); return EXIT_SUCCESS; }
bool SVM::train_(ClassificationData &trainingData){ //Clear any previous model clear(); if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } //Convert the labelled classification data into the LIBSVM data format if( !convertClassificationDataToLIBSVMFormat(trainingData) ){ errorLog << "train_(ClassificationData &trainingData) - Failed To Convert Labelled Classification Data To LIBSVM Format!" << endl; return false; } if( useAutoGamma ) param.gamma = 1.0/numInputDimensions; //Train the model bool trainingResult = trainSVM(); if(! trainingResult ){ errorLog << "train_(ClassificationData &trainingData) - Failed To Train SVM Model!" << endl; return false; } return true; }
ClassificationData ClassificationData::getTestFoldData(const UINT foldIndex) const{ ClassificationData testData; testData.setNumDimensions( numDimensions ); testData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ) return testData; if( foldIndex >= kFoldValue ) return testData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumSamples(); k++){ testData.addClass( classTracker[k].classLabel, classTracker[k].className ); } testData.reserve( (UINT)crossValidationIndexs[ foldIndex ].size() ); //Add the data to the test fold UINT index = 0; for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){ index = crossValidationIndexs[ foldIndex ][i]; testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } //Sort the class labels testData.sortClassLabels(); return testData; }
bool BAG::train_(ClassificationData &trainingData){ //Clear any previous models clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } numInputDimensions = N; numClasses = K; classLabels.resize(K); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } UINT ensembleSize = (UINT)ensemble.size(); if( ensembleSize == 0 ){ errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl; return false; } for(UINT i=0; i<ensembleSize; i++){ if( ensemble[i] == NULL ){ errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl; return false; } } //Train the ensemble for(UINT i=0; i<ensembleSize; i++){ ClassificationData boostedDataset = trainingData.getBootstrappedDataset(); trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << endl; //Train the classifier with the bootstrapped dataset if( !ensemble[i]->train( boostedDataset ) ){ errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << endl; return false; } } //Set the class labels classLabels = trainingData.getClassLabels(); //Flag that the model has been trained trained = true; return trained; }
bool ANBC::setWeights(const ClassificationData &weightsData){ if( weightsData.getNumSamples() > 0 ){ weightsDataSet = true; this->weightsData = weightsData; return true; } return false; }
bool HierarchicalClustering::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ return false; } //Convert the labelled training data to a training matrix M = trainingData.getNumSamples(); N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } return train_( data ); }
void Tree::RefineLeafNodes(ClassificationData& data, int nPoints, int labelIdx) { // for all available points in data, traverse through tree and add one point to // label distribution of resulting leaf node for(int i=0; i<nPoints; ++i) { int idx = GetResultingLeafNode(data.GetFeatures(i)); nodes[idx].AddToAbsLabelDistribution(labelIdx); } }
int main (int argc, const char * argv[]) { //Create a new KMeans instance KMeans kmeans; kmeans.setComputeTheta( true ); kmeans.setMinChange( 1.0e-10 ); kmeans.setMinNumEpochs( 10 ); kmeans.setMaxNumEpochs( 10000 ); //There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for //These are: //- with labelled training data (in the ClassificationData format) //- with unlablled training data (in the UnlabelledData format) //- with unlabelled training data (in a simple MatrixDouble format) //This example shows you how to train the algorithm with ClassificationData //Load some training data to train the KMeans algorithm ClassificationData trainingData; if( !trainingData.load("LabelledClusterData.csv") ){ cout << "Failed to load training data!\n"; return EXIT_FAILURE; } //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset if( !kmeans.train( trainingData ) ){ cout << "Failed to train model!\n"; return EXIT_FAILURE; } //Get the K clusters from the KMeans instance and print them cout << "\nClusters:\n"; MatrixFloat clusters = kmeans.getClusters(); for(unsigned int k=0; k<clusters.getNumRows(); k++){ for(unsigned int n=0; n<clusters.getNumCols(); n++){ cout << clusters[k][n] << "\t"; }cout << endl; } return EXIT_SUCCESS; }
ClassificationData TimeSeriesClassificationDataStream::getClassificationData( const bool includeNullGestures ) const { ClassificationData classificationData; classificationData.setNumDimensions( getNumDimensions() ); classificationData.setAllowNullGestureClass( includeNullGestures ); bool addSample = false; for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){ addSample = includeNullGestures ? true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL; if( addSample ){ MatrixDouble dataSegment = getTimeSeriesData( timeSeriesPositionTracker[i] ); for(UINT j=0; j<dataSegment.getNumRows(); j++){ classificationData.addSample(timeSeriesPositionTracker[i].getClassLabel(), dataSegment.getRowVector(j) ); } } } return classificationData; }
int main (int argc, const char * argv[]) { GestureRecognitionPipeline pipeline; ANBC anbc; ClassificationData trainingData; trainingData.loadDatasetFromFile("training-data.txt") pipeline.setClassifier(anbc); pipeline.train(trainingData); VectorDouble inputVector(SAMPLE_DIMENSION) = getDataFromSensor(); pipeline.predict(inputVector); UINT predictedClassLabel = pipeline.getPredictedClassLabel(); double maxLikelihood = pipeline.getMaximumLikelihood(); printf("predictedClassLabel : %d , MaximumLikelihood : %f \n", predictedClassLabel, maxLikelihood); return EXIT_SUCCESS; }
MatrixFloat LDA::computeBetweenClassScatterMatrix( ClassificationData &data ){ MatrixFloat sb(numInputDimensions,numInputDimensions); MatrixFloat classMean = data.getClassMean(); VectorDouble totalMean = data.getMean(); sb.setAllValues( 0 ); for(UINT k=0; k<numClasses; k++){ UINT numSamplesInClass = data.getClassTracker()[k].counter; for(UINT m=0; m<numInputDimensions; m++){ for(UINT n=0; n<numInputDimensions; n++){ sb[m][n] += (classMean[k][m]-totalMean[m]) * (classMean[k][n]-totalMean[n]) * Float(numSamplesInClass); } } } return sb; }
MatrixFloat LDA::computeWithinClassScatterMatrix( ClassificationData &data ){ MatrixFloat sw(numInputDimensions,numInputDimensions); sw.setAllValues( 0 ); for(UINT k=0; k<numClasses; k++){ //Compute the scatter matrix for class k ClassificationData classData = data.getClassData( data.getClassTracker()[k].classLabel ); MatrixFloat scatterMatrix = classData.getCovarianceMatrix(); //Add this to the main scatter matrix for(UINT m=0; m<numInputDimensions; m++){ for(UINT n=0; n<numInputDimensions; n++){ sw[m][n] += scatterMatrix[m][n]; } } } return sw; }
ClassificationData ClassificationData::getTrainingFoldData(const UINT foldIndex) const{ ClassificationData trainingData; trainingData.setNumDimensions( numDimensions ); trainingData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ){ errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) return trainingData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumSamples(); k++){ trainingData.addClass( classTracker[k].classLabel, classTracker[k].className ); } //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex UINT index = 0; for(UINT k=0; k<kFoldValue; k++){ if( k != foldIndex ){ for(UINT i=0; i<crossValidationIndexs[k].size(); i++){ index = crossValidationIndexs[k][i]; trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } } } //Sort the class labels trainingData.sortClassLabels(); return trainingData; }
ClassificationData ClassificationData::getClassData(const UINT classLabel) const{ ClassificationData classData; classData.setNumDimensions( this->numDimensions ); classData.setAllowNullGestureClass( allowNullGestureClass ); //Reserve the memory for the class data for(UINT i=0; i<classTracker.size(); i++){ if( classTracker[i].classLabel == classLabel ){ classData.reserve( classTracker[i].counter ); break; } } for(UINT i=0; i<totalNumSamples; i++){ if( data[i].getClassLabel() == classLabel ){ classData.addSample(classLabel, data[i].getSample()); } } return classData; }
bool KMeans::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl; return false; } //Set the numClusters as the number of classes in the training data numClusters = trainingData.getNumClasses(); //Convert the labelled training data to a training matrix UINT M = trainingData.getNumSamples(); UINT N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } //Run the K-Means algorithm return train_( data ); }
bool ClassificationData::generateGaussDataset( const std::string filename, const UINT numSamples, const UINT numClasses, const UINT numDimensions, const double range, const double sigma ){ Random random; //Generate a simple model that will be used to generate the main dataset MatrixDouble model(numClasses,numDimensions); for(UINT k=0; k<numClasses; k++){ for(UINT j=0; j<numDimensions; j++){ model[k][j] = random.getRandomNumberUniform(-range,range); } } //Use the model above to generate the main dataset ClassificationData data; data.setNumDimensions( numDimensions ); for(UINT i=0; i<numSamples; i++){ //Randomly select which class this sample belongs to UINT k = random.getRandomNumberInt( 0, numClasses ); //Generate a sample using the model (+ some Gaussian noise) vector< double > sample( numDimensions ); for(UINT j=0; j<numDimensions; j++){ sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma); } //By default in the GRT, the class label should not be 0, so add 1 UINT classLabel = k + 1; //Add the labeled sample to the dataset data.addSample( classLabel, sample ); } //Save the dataset to a CSV file return data.save( filename ); }
// Tests the learning algorithm on a basic dataset TEST(BAG, TrainBasicDataset) { BAG bag; //Check the module is not trained EXPECT_TRUE( !bag.getTrained() ); //Generate a basic dataset const UINT numSamples = 10000; const UINT numClasses = 10; const UINT numDimensions = 100; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Add an adaptive naive bayes classifier to the BAG ensemble bag.addClassifierToEnsemble( ANBC() ); //Add a MinDist classifier to the BAG ensemble, using two clusters MinDist min_dist_two_clusters; min_dist_two_clusters.setNumClusters(2); bag.addClassifierToEnsemble( min_dist_two_clusters ); //Add a MinDist classifier to the BAG ensemble, using five clusters MinDist min_dist_five_clusters; min_dist_five_clusters.setNumClusters(5); bag.addClassifierToEnsemble( min_dist_five_clusters ); //Train the classifier EXPECT_TRUE( bag.train( trainingData ) ); EXPECT_TRUE( bag.getTrained() ); EXPECT_TRUE( bag.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } EXPECT_TRUE( bag.save( "bag_model.grt" ) ); bag.clear(); EXPECT_TRUE( !bag.getTrained() ); EXPECT_TRUE( bag.load( "bag_model.grt" ) ); EXPECT_TRUE( bag.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( bag.predict( testData[i].getSample() ) ); } }
// Tests the learning algorithm on a basic dataset TEST(KNN, TrainBasicDataset) { KNN knn; //Check the module is not trained EXPECT_TRUE( !knn.getTrained() ); //Generate a basic dataset const UINT numSamples = 1000; const UINT numClasses = 10; const UINT numDimensions = 10; ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 ); ClassificationData trainingData; EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) ); ClassificationData testData = trainingData.split( 50 ); //Train the classifier EXPECT_TRUE( knn.train( trainingData ) ); EXPECT_TRUE( knn.getTrained() ); EXPECT_TRUE( knn.print() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } EXPECT_TRUE( knn.save( "knn_model.grt" ) ); knn.clear(); EXPECT_TRUE( !knn.getTrained() ); EXPECT_TRUE( knn.load( "knn_model.grt" ) ); EXPECT_TRUE( knn.getTrained() ); for(UINT i=0; i<testData.getNumSamples(); i++){ EXPECT_TRUE( knn.predict( testData[i].getSample() ) ); } }
bool Softmax::train_(ClassificationData &trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numClasses = K; models.resize(K); classLabels.resize(K); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Train a regression model for each class in the training data for(UINT k=0; k<numClasses; k++){ //Set the class label classLabels[k] = trainingData.getClassTracker()[k].classLabel; //Train the model if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){ errorLog << "train(ClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << std::endl; return false; } } //Flag that the algorithm has been trained trained = true; return trained; }
bool LDA::train(ClassificationData trainingData){ errorLog << "SORRY - this module is still under development and can't be used yet!" << std::endl; return false; //Reset any previous model numInputDimensions = 0; numClasses = 0; models.clear(); classLabels.clear(); trained = false; if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledClassificationData trainingData) - There is no training data to train the model!" << std::endl; return false; } numInputDimensions = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); //Calculate the between scatter matrix MatrixFloat SB = computeBetweenClassScatterMatrix( trainingData ); //Calculate the within scatter matrix MatrixFloat SW = computeWithinClassScatterMatrix( trainingData ); /* //Counters and stat containers vector< UINT > groupLabels(numClasses); VectorDouble groupCounters(numClasses); VectorDouble priorProb(numClasses); MatrixFloat groupMeans(numClasses,numFeatures); MatrixFloat pCov(numFeatures,numFeatures); MatrixFloat pCovInv(numFeatures,numFeatures); MatrixFloat modelCoeff(numClasses,numFeatures+1); pCov.setAllValues(0); modelCoeff.setAllValues(0); //Set the class labels and counters for(UINT k=0; k<numClasses; k++){ groupLabels[k] = trainingData.getClassTracker()[k].classLabel; groupCounters[k] = trainingData.getClassTracker()[k].counter; } //Loop over the classes to compute the group stats for(UINT k=0; k<numClasses; k++){ LabelledClassificationData classData = trainingData.getClassData( groupLabels[k] ); MatrixFloat cov(numFeatures,numFeatures); //Compute class mu for(UINT j=0; j<numFeatures; j++){ groupMeans[k][j] = 0; for(UINT i=0; i<classData.getNumSamples(); i++){ groupMeans[k][j] += classData[i][j]; } groupMeans[k][j] /= Float(classData.getNumSamples()); } //Compute the class covariance for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ cov[m][n] = 0; for(UINT i=0; i<classData.getNumSamples(); i++){ cov[m][n] += (classData[i][m]-groupMeans[k][m]) * (classData[i][n]-groupMeans[k][n]); } cov[m][n] /= Float(classData.getNumSamples()-1); } } debugLog << "Group Cov:\n"; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ debugLog << cov[m][n] << "\t"; }debugLog << "\n"; }debugLog << std::endl; //Set the prior probability for this class (which is just 1/numClasses) priorProb[k] = 1.0/Float(numClasses); //Update the main covariance matrix Float weight = ((classData.getNumSamples() - 1) / Float(trainingData.getNumSamples() - numClasses) ); debugLog << "Weight: " << weight << std::endl; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ pCov[m][n] += weight * cov[m][n]; } } } for(UINT k=0; k<numClasses; k++){ debugLog << "GroupMu: " << groupLabels[k] << "\t"; for(UINT j=0; j<numFeatures; j++){ debugLog << groupMeans[k][j] << "\t"; }debugLog << std::endl; } debugLog << "pCov:\n"; for(UINT m=0; m<numFeatures; m++){ for(UINT n=0; n<numFeatures; n++){ debugLog << pCov[m][n] << "\t"; }debugLog << "\n"; }debugLog << std::endl; //Invert the pCov matrix LUDecomposition matrixInverter(pCov); if( !matrixInverter.inverse(pCovInv) ){ errorLog << "Failed to invert pCov Matrix!" << std::endl; return false; } //Loop over classes to calculate linear discriminant coefficients Float sum = 0; vector< Float > temp(numFeatures); for(UINT k=0; k<numClasses; k++){ //Compute the temporary vector for(UINT j=0; j<numFeatures; j++){ temp[j] = 0; for(UINT m=0; m<numFeatures; m++){ temp[j] += groupMeans[k][m] * pCovInv[m][j]; } } //Compute the model coefficients sum = 0; for(UINT j=0; j<numFeatures; j++){ sum += temp[j]*groupMeans[k][j]; } modelCoeff[k][0] = -0.5 * sum + log( priorProb[k] ); for(UINT j=0; j<numFeatures; j++){ modelCoeff[k][j+1] = temp[j]; } } //Setup the models for realtime prediction models.resize(numClasses); classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = groupLabels[k]; models[k].classLabel = groupLabels[k]; models[k].priorProb = priorProb[k]; models[k].weights = modelCoeff.getRowVector(k); } //Flag that the models were successfully trained trained = true; */ return true; }