ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples,bool balanceDataset) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); const UINT K = getNumClasses(); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<K; k++){ newDataset.addClass( classTracker[k].classLabel ); } if( balanceDataset ){ //Group the class indexs std::vector< std::vector< UINT > > classIndexs( K ); for(UINT i=0; i<totalNumSamples; i++){ classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i ); } //Get the class with the minimum number of examples UINT numSamplesPerClass = (UINT)floor( numSamples / double(K) ); //Randomly select the training samples from each class UINT classIndex = 0; UINT classCounter = 0; UINT randomIndex = 0; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() ); randomIndex = classIndexs[ classIndex ][ randomIndex ]; newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample()); if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){ classCounter = 0; classIndex++; } } }else{ //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() ); } } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
ClassificationData ClassificationData::getTestFoldData(const UINT foldIndex) const{ ClassificationData testData; testData.setNumDimensions( numDimensions ); testData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ) return testData; if( foldIndex >= kFoldValue ) return testData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumSamples(); k++){ testData.addClass( classTracker[k].classLabel, classTracker[k].className ); } testData.reserve( (UINT)crossValidationIndexs[ foldIndex ].size() ); //Add the data to the test fold UINT index = 0; for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){ index = crossValidationIndexs[ foldIndex ][i]; testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } //Sort the class labels testData.sortClassLabels(); return testData; }
ClassificationData ClassificationData::getTrainingFoldData(const UINT foldIndex) const{ ClassificationData trainingData; trainingData.setNumDimensions( numDimensions ); trainingData.setAllowNullGestureClass( allowNullGestureClass ); if( !crossValidationSetup ){ errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) return trainingData; //Add the class labels to make sure they all exist for(UINT k=0; k<getNumSamples(); k++){ trainingData.addClass( classTracker[k].classLabel, classTracker[k].className ); } //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex UINT index = 0; for(UINT k=0; k<kFoldValue; k++){ if( k != foldIndex ){ for(UINT i=0; i<crossValidationIndexs[k].size(); i++){ index = crossValidationIndexs[k][i]; trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() ); } } } //Sort the class labels trainingData.sortClassLabels(); return trainingData; }
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples) const{ Random rand; ClassificationData newDataset; newDataset.setNumDimensions( getNumDimensions() ); newDataset.setAllowNullGestureClass( allowNullGestureClass ); newDataset.setExternalRanges( externalRanges, useExternalRanges ); if( numSamples == 0 ) numSamples = totalNumSamples; newDataset.reserve( numSamples ); //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels for(UINT k=0; k<getNumClasses(); k++){ newDataset.addClass( classTracker[k].classLabel ); } //Randomly select the training samples to add to the new data set UINT randomIndex; for(UINT i=0; i<numSamples; i++){ randomIndex = rand.getRandomNumberInt(0, totalNumSamples); newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample()); } //Sort the class labels so they are in order newDataset.sortClassLabels(); return newDataset; }
ClassificationData TimeSeriesClassificationDataStream::getClassificationData( const bool includeNullGestures ) const { ClassificationData classificationData; classificationData.setNumDimensions( getNumDimensions() ); classificationData.setAllowNullGestureClass( includeNullGestures ); bool addSample = false; for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){ addSample = includeNullGestures ? true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL; if( addSample ){ MatrixDouble dataSegment = getTimeSeriesData( timeSeriesPositionTracker[i] ); for(UINT j=0; j<dataSegment.getNumRows(); j++){ classificationData.addSample(timeSeriesPositionTracker[i].getClassLabel(), dataSegment.getRowVector(j) ); } } } return classificationData; }
ClassificationData ClassificationData::getClassData(const UINT classLabel) const{ ClassificationData classData; classData.setNumDimensions( this->numDimensions ); classData.setAllowNullGestureClass( allowNullGestureClass ); //Reserve the memory for the class data for(UINT i=0; i<classTracker.size(); i++){ if( classTracker[i].classLabel == classLabel ){ classData.reserve( classTracker[i].counter ); break; } } for(UINT i=0; i<totalNumSamples; i++){ if( data[i].getClassLabel() == classLabel ){ classData.addSample(classLabel, data[i].getSample()); } } return classData; }
bool ClassificationData::generateGaussDataset( const std::string filename, const UINT numSamples, const UINT numClasses, const UINT numDimensions, const double range, const double sigma ){ Random random; //Generate a simple model that will be used to generate the main dataset MatrixDouble model(numClasses,numDimensions); for(UINT k=0; k<numClasses; k++){ for(UINT j=0; j<numDimensions; j++){ model[k][j] = random.getRandomNumberUniform(-range,range); } } //Use the model above to generate the main dataset ClassificationData data; data.setNumDimensions( numDimensions ); for(UINT i=0; i<numSamples; i++){ //Randomly select which class this sample belongs to UINT k = random.getRandomNumberInt( 0, numClasses ); //Generate a sample using the model (+ some Gaussian noise) vector< double > sample( numDimensions ); for(UINT j=0; j<numDimensions; j++){ sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma); } //By default in the GRT, the class label should not be 0, so add 1 UINT classLabel = k + 1; //Add the labeled sample to the dataset data.addSample( classLabel, sample ); } //Save the dataset to a CSV file return data.save( filename ); }
bool AdaBoost::train_(ClassificationData &trainingData){ //Clear any previous model clear(); if( trainingData.getNumSamples() <= 1 ){ errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples() << endl; return false; } numInputDimensions = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); const UINT M = trainingData.getNumSamples(); const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL; const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL; double alpha = 0; const double beta = 0.001; double epsilon = 0; TrainingResult trainingResult; const UINT K = (UINT)weakClassifiers.size(); if( K == 0 ){ errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl; return false; } classLabels.resize(numClasses); models.resize(numClasses); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ trainingData.scale(ranges,0,1); } //Create the weights vector VectorDouble weights(M); //Create the error matrix MatrixDouble errorMatrix(K,M); for(UINT classIter=0; classIter<numClasses; classIter++){ //Get the class label for the current class classLabels[classIter] = trainingData.getClassLabels()[classIter]; //Set the class label of the current model models[ classIter ].setClassLabel( classLabels[classIter] ); //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2 ClassificationData classData; classData.setNumDimensions(trainingData.getNumDimensions()); for(UINT i=0; i<M; i++){ UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL; VectorDouble trainingSample = trainingData[i].getSample(); classData.addSample(label,trainingSample); } //Setup the initial training sample weights std::fill(weights.begin(),weights.end(),1.0/M); //Run the boosting loop bool keepBoosting = true; UINT t = 0; while( keepBoosting ){ //Pick the classifier from the family of classifiers that minimizes the total error UINT bestClassifierIndex = 0; double minError = numeric_limits<double>::max(); for(UINT k=0; k<K; k++){ //Get the k'th possible classifier WeakClassifier *weakLearner = weakClassifiers[k]; //Train the current classifier if( !weakLearner->train(classData,weights) ){ errorLog << "Failed to train weakLearner!" << endl; return false; } //Compute the weighted error for this clasifier double e = 0; double positiveLabel = weakLearner->getPositiveClassLabel(); double numCorrect = 0; double numIncorrect = 0; for(UINT i=0; i<M; i++){ //Only penalize errors double prediction = weakLearner->predict( classData[i].getSample() ); if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) || //False positive (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){ //False negative e += weights[i]; //Increase the error proportional to the weight of the example errorMatrix[k][i] = 1; //Flag that there was an error numIncorrect++; }else{ errorMatrix[k][i] = 0; //Flag that there was no error numCorrect++; } } trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl; if( e < minError ){ minError = e; bestClassifierIndex = k; } } epsilon = minError; //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier alpha = 0.5 * log( (1.0-epsilon)/epsilon ); trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl; if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; } if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; } if( ++t >= numBoostingIterations ) keepBoosting = false; trainingResult.setClassificationResult(t, minError, this); trainingResults.push_back(trainingResult); trainingResultsObserverManager.notifyObservers( trainingResult ); if( keepBoosting ){ //Add the best weak classifier to the committee models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha ); //Update the weights for the next boosting iteration double reWeight = (1.0 - epsilon) / epsilon; double oldSum = 0; double newSum = 0; for(UINT i=0; i<M; i++){ oldSum += weights[i]; //Only update the weights that resulted in an incorrect prediction if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight; newSum += weights[i]; } //Normalize all the weights //This results to increasing the weights of the samples that were incorrectly labelled //While decreasing the weights of the samples that were correctly classified reWeight = oldSum/newSum; for(UINT i=0; i<M; i++){ weights[i] *= reWeight; } }else{ trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl; if( t-1 == 0 ){ //Add the best weak classifier to the committee (we have to add it as this is the first iteration) if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha ); } } } } //Normalize the weights for(UINT k=0; k<numClasses; k++){ models[k].normalizeWeights(); } //Flag that the model has been trained trained = true; //Setup the data for prediction predictedClassLabel = 0; maxLikelihood = 0; classLikelihoods.resize(numClasses); classDistances.resize(numClasses); return true; }
int main (int argc, const char * argv[]) { //Create a new instance of the ClassificationData ClassificationData trainingData; //Set the dimensionality of the data (you need to do this before you can add any samples) trainingData.setNumDimensions( 3 ); //You can also give the dataset a name (the name should have no spaces) trainingData.setDatasetName("DummyData"); //You can also add some info text about the data trainingData.setInfoText("This data contains some dummy data"); //Here you would grab some data from your sensor and label it with the corresponding gesture it belongs to UINT gestureLabel = 1; VectorDouble sample(3); //For now we will just add some random data Random random; for(UINT i=0; i<100; i++){ sample[0] = random.getRandomNumberUniform(-1.0,1.0); sample[1] = random.getRandomNumberUniform(-1.0,1.0); sample[2] = random.getRandomNumberUniform(-1.0,1.0); //Add the sample to the training data trainingData.addSample( gestureLabel, sample ); } //After recording your training data you can then save it to a file if( !trainingData.saveDatasetToFile( "TrainingData.txt" ) ){ cout << "ERROR: Failed to save dataset to file!\n"; return EXIT_FAILURE; } //This can then be loaded later if( !trainingData.loadDatasetFromFile( "TrainingData.txt" ) ){ cout << "ERROR: Failed to load dataset from file!\n"; return EXIT_FAILURE; } //You can also save and load the training data to a CSV file //Each row will contain a sample, with the first column containing the class label and the remaining columns containing the data if( !trainingData.saveDatasetToCSVFile( "TrainingData.csv" ) ){ cout << "ERROR: Failed to save dataset to csv file!\n"; return EXIT_FAILURE; } if( !trainingData.loadDatasetFromCSVFile( "TrainingData.csv" ) ){ cout << "ERROR: Failed to load dataset from csv file!\n"; return EXIT_FAILURE; } //This is how you can get some stats from the training data string datasetName = trainingData.getDatasetName(); string infoText = trainingData.getInfoText(); UINT numSamples = trainingData.getNumSamples(); UINT numDimensions = trainingData.getNumDimensions(); UINT numClasses = trainingData.getNumClasses(); cout << "Dataset Name: " << datasetName << endl; cout << "InfoText: " << infoText << endl; cout << "NumberOfSamples: " << numSamples << endl; cout << "NumberOfDimensions: " << numDimensions << endl; cout << "NumberOfClasses: " << numClasses << endl; //You can also get the minimum and maximum ranges of the data vector< MinMax > ranges = trainingData.getRanges(); cout << "The ranges of the dataset are: \n"; for(UINT j=0; j<ranges.size(); j++){ cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl; } //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset ClassificationData testData = trainingData.partition( 80 ); //If you have multiple datasets that you want to merge together then use the merge function if( !trainingData.merge( testData ) ){ cout << "ERROR: Failed to save merge datasets!\n"; return EXIT_FAILURE; } //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used if( !trainingData.spiltDataIntoKFolds( 10, true ) ){ cout << "ERROR: Failed to spiltDataIntoKFolds!\n"; return EXIT_FAILURE; } //After you have called the spilt function you can then get the training and test sets for each fold for(UINT foldIndex=0; foldIndex<10; foldIndex++){ ClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex ); ClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex ); } //If need you can clear any training data that you have recorded trainingData.clear(); return EXIT_SUCCESS; }