예제 #1
0
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples,bool balanceDataset) const{
    
    Random rand;
    ClassificationData newDataset;
    newDataset.setNumDimensions( getNumDimensions() );
    newDataset.setAllowNullGestureClass( allowNullGestureClass );
    newDataset.setExternalRanges( externalRanges, useExternalRanges );
    
    if( numSamples == 0 ) numSamples = totalNumSamples;
    
    newDataset.reserve( numSamples );

    const UINT K = getNumClasses(); 
    
    //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels
    for(UINT k=0; k<K; k++){
        newDataset.addClass( classTracker[k].classLabel );
    }

    if( balanceDataset ){
        //Group the class indexs
        std::vector< std::vector< UINT > > classIndexs( K );
        for(UINT i=0; i<totalNumSamples; i++){
            classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
        }

        //Get the class with the minimum number of examples
        UINT numSamplesPerClass = (UINT)floor( numSamples / double(K) );

        //Randomly select the training samples from each class
        UINT classIndex = 0;
        UINT classCounter = 0;
        UINT randomIndex = 0;
        for(UINT i=0; i<numSamples; i++){
            randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() );
            randomIndex = classIndexs[ classIndex ][ randomIndex ];
            newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample());
            if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){
                classCounter = 0;
                classIndex++;
            }
        }

    }else{
        //Randomly select the training samples to add to the new data set
        UINT randomIndex;
        for(UINT i=0; i<numSamples; i++){
            randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
            newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() );
        }
    }

    //Sort the class labels so they are in order
    newDataset.sortClassLabels();
    
    return newDataset;
}
예제 #2
0
ClassificationData ClassificationData::getTestFoldData(const UINT foldIndex) const{
    
    ClassificationData testData;
    testData.setNumDimensions( numDimensions );
    testData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ) return testData;

    if( foldIndex >= kFoldValue ) return testData;

    //Add the class labels to make sure they all exist
    for(UINT k=0; k<getNumSamples(); k++){
        testData.addClass( classTracker[k].classLabel, classTracker[k].className );
    }
    
    testData.reserve( (UINT)crossValidationIndexs[ foldIndex ].size() );

    //Add the data to the test fold
    UINT index = 0;
	for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){

        index = crossValidationIndexs[ foldIndex ][i];
		testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
	}
	
    //Sort the class labels
	testData.sortClassLabels();

    return testData;
}
예제 #3
0
ClassificationData ClassificationData::getTrainingFoldData(const UINT foldIndex) const{
   
    ClassificationData trainingData;
    trainingData.setNumDimensions( numDimensions );
    trainingData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ){
        errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
       return trainingData;
    }

    if( foldIndex >= kFoldValue ) return trainingData;

    //Add the class labels to make sure they all exist
    for(UINT k=0; k<getNumSamples(); k++){
        trainingData.addClass( classTracker[k].classLabel, classTracker[k].className );
    }

    //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
    UINT index = 0;
    for(UINT k=0; k<kFoldValue; k++){
        if( k != foldIndex ){
            for(UINT i=0; i<crossValidationIndexs[k].size(); i++){

                index = crossValidationIndexs[k][i];
                trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
            }
        }
    }

    //Sort the class labels
    trainingData.sortClassLabels();

    return trainingData;
}
예제 #4
0
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples) const{
    
    Random rand;
    ClassificationData newDataset;
    newDataset.setNumDimensions( getNumDimensions() );
    newDataset.setAllowNullGestureClass( allowNullGestureClass );
    newDataset.setExternalRanges( externalRanges, useExternalRanges );
    
    if( numSamples == 0 ) numSamples = totalNumSamples;
    
    newDataset.reserve( numSamples );
    
    //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels
    for(UINT k=0; k<getNumClasses(); k++){
        newDataset.addClass( classTracker[k].classLabel );
    }
    
    //Randomly select the training samples to add to the new data set
    UINT randomIndex;
    for(UINT i=0; i<numSamples; i++){
        randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
        newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample());
    }

    //Sort the class labels so they are in order
	newDataset.sortClassLabels();
    
    return newDataset;
}
ClassificationData TimeSeriesClassificationDataStream::getClassificationData( const bool includeNullGestures ) const {
    
    ClassificationData classificationData;
    
    classificationData.setNumDimensions( getNumDimensions() );
    classificationData.setAllowNullGestureClass( includeNullGestures );

    bool addSample = false;
    for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
        addSample = includeNullGestures ? true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL;
        if( addSample ){
            MatrixDouble dataSegment = getTimeSeriesData( timeSeriesPositionTracker[i] );
            for(UINT j=0; j<dataSegment.getNumRows(); j++){
                classificationData.addSample(timeSeriesPositionTracker[i].getClassLabel(), dataSegment.getRowVector(j) );
            }
        }
    }
    
    return classificationData;
}
예제 #6
0
ClassificationData ClassificationData::getClassData(const UINT classLabel) const{
    
    ClassificationData classData;
    classData.setNumDimensions( this->numDimensions );
    classData.setAllowNullGestureClass( allowNullGestureClass );
    
    //Reserve the memory for the class data
    for(UINT i=0; i<classTracker.size(); i++){
        if( classTracker[i].classLabel == classLabel ){
            classData.reserve( classTracker[i].counter );
            break;
        }
    }

    for(UINT i=0; i<totalNumSamples; i++){
        if( data[i].getClassLabel() == classLabel ){
            classData.addSample(classLabel, data[i].getSample());
        }
    }

    return classData;
}
예제 #7
0
bool ClassificationData::generateGaussDataset( const std::string filename, const UINT numSamples, const UINT numClasses, const UINT numDimensions, const double range, const double sigma ){
    
    Random random;
    
    //Generate a simple model that will be used to generate the main dataset
    MatrixDouble model(numClasses,numDimensions);
    for(UINT k=0; k<numClasses; k++){
        for(UINT j=0; j<numDimensions; j++){
            model[k][j] = random.getRandomNumberUniform(-range,range);
        }
    }
    
    //Use the model above to generate the main dataset
    ClassificationData data;
    data.setNumDimensions( numDimensions );
    
    for(UINT i=0; i<numSamples; i++){
        
        //Randomly select which class this sample belongs to
        UINT k = random.getRandomNumberInt( 0, numClasses );
        
        //Generate a sample using the model (+ some Gaussian noise)
        vector< double > sample( numDimensions );
        for(UINT j=0; j<numDimensions; j++){
            sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma);
        }
        
        //By default in the GRT, the class label should not be 0, so add 1
        UINT classLabel = k + 1;
        
        //Add the labeled sample to the dataset
        data.addSample( classLabel, sample );
    }
    
    //Save the dataset to a CSV file
    return data.save( filename );
}
예제 #8
0
bool AdaBoost::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() <= 1 ){
        errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples()  << endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    const UINT M = trainingData.getNumSamples();
    const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL;
    const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL;
    double alpha = 0;
    const double beta = 0.001;
    double epsilon = 0;
    TrainingResult trainingResult;
    
    const UINT K = (UINT)weakClassifiers.size();
    if( K == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl;
        return false;
    }

    classLabels.resize(numClasses);
    models.resize(numClasses);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ){
        trainingData.scale(ranges,0,1);
    }
    
    //Create the weights vector
    VectorDouble weights(M);
    
    //Create the error matrix
    MatrixDouble errorMatrix(K,M);
    
    for(UINT classIter=0; classIter<numClasses; classIter++){
        
        //Get the class label for the current class
        classLabels[classIter] = trainingData.getClassLabels()[classIter];
        
        //Set the class label of the current model
        models[ classIter ].setClassLabel( classLabels[classIter] );
        
        //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2
        ClassificationData classData;
        classData.setNumDimensions(trainingData.getNumDimensions());
        for(UINT i=0; i<M; i++){
            UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL;
            VectorDouble trainingSample = trainingData[i].getSample();
            classData.addSample(label,trainingSample);
        }
        
        //Setup the initial training sample weights
        std::fill(weights.begin(),weights.end(),1.0/M);
        
        //Run the boosting loop
        bool keepBoosting = true;
        UINT t = 0;
        
        while( keepBoosting ){
            
            //Pick the classifier from the family of classifiers that minimizes the total error
            UINT bestClassifierIndex = 0;
            double minError = numeric_limits<double>::max();
            for(UINT k=0; k<K; k++){
                //Get the k'th possible classifier
                WeakClassifier *weakLearner = weakClassifiers[k];
                
                //Train the current classifier
                if( !weakLearner->train(classData,weights) ){
                    errorLog << "Failed to train weakLearner!" << endl;
                    return false;
                }
                
                //Compute the weighted error for this clasifier
                double e = 0;
                double positiveLabel = weakLearner->getPositiveClassLabel();
                double numCorrect = 0;
                double numIncorrect = 0;
                for(UINT i=0; i<M; i++){
                    //Only penalize errors
                    double prediction = weakLearner->predict( classData[i].getSample() );
                    
                    if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) ||        //False positive
                        (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){       //False negative
                        e += weights[i]; //Increase the error proportional to the weight of the example
                        errorMatrix[k][i] = 1; //Flag that there was an error
                        numIncorrect++;
                    }else{
                        errorMatrix[k][i] = 0; //Flag that there was no error
                        numCorrect++;
                    }
                }
                
                trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl;
                
                if( e < minError ){
                    minError = e;
                    bestClassifierIndex = k;
                }
                
            }
  
            epsilon = minError;
            
            //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier
            alpha = 0.5 * log( (1.0-epsilon)/epsilon );
            
            trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl;
            
            if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; }
            if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; }
            if( ++t >= numBoostingIterations ) keepBoosting = false;

            trainingResult.setClassificationResult(t, minError, this);
            trainingResults.push_back(trainingResult);
            trainingResultsObserverManager.notifyObservers( trainingResult );
            
            if( keepBoosting ){
                
                //Add the best weak classifier to the committee
                models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                
                //Update the weights for the next boosting iteration
                double reWeight = (1.0 - epsilon) / epsilon;
                double oldSum = 0;
                double newSum = 0;
                for(UINT i=0; i<M; i++){
                    oldSum += weights[i];
                    //Only update the weights that resulted in an incorrect prediction
                    if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight;
                    newSum += weights[i];
                }
                
                //Normalize all the weights
                //This results to increasing the weights of the samples that were incorrectly labelled
                //While decreasing the weights of the samples that were correctly classified
                reWeight = oldSum/newSum;
                for(UINT i=0; i<M; i++){
                    weights[i] *= reWeight;
                }
                
            }else{
                trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl;
                if( t-1 == 0 ){
                    //Add the best weak classifier to the committee (we have to add it as this is the first iteration)
                    if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct
                    models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                }
            }
            
        }
    }
    
    //Normalize the weights
    for(UINT k=0; k<numClasses; k++){
        models[k].normalizeWeights();
    }
    
    //Flag that the model has been trained
    trained = true;
    
    //Setup the data for prediction
    predictedClassLabel = 0;
    maxLikelihood = 0;
    classLikelihoods.resize(numClasses);
    classDistances.resize(numClasses);
    
    return true;
}
예제 #9
0
int main (int argc, const char * argv[])
{

    //Create a new instance of the ClassificationData
    ClassificationData trainingData;
    
    //Set the dimensionality of the data (you need to do this before you can add any samples)
    trainingData.setNumDimensions( 3 );
    
    //You can also give the dataset a name (the name should have no spaces)
    trainingData.setDatasetName("DummyData");
    
    //You can also add some info text about the data
    trainingData.setInfoText("This data contains some dummy data");
    
    //Here you would grab some data from your sensor and label it with the corresponding gesture it belongs to
    UINT gestureLabel = 1;
    VectorDouble sample(3);
    
    //For now we will just add some random data
    Random random;
    for(UINT i=0; i<100; i++){
        sample[0] = random.getRandomNumberUniform(-1.0,1.0);
        sample[1] = random.getRandomNumberUniform(-1.0,1.0); 
        sample[2] = random.getRandomNumberUniform(-1.0,1.0); 
        
        //Add the sample to the training data
        trainingData.addSample( gestureLabel, sample );
    }
    
    //After recording your training data you can then save it to a file
    if( !trainingData.saveDatasetToFile( "TrainingData.txt" ) ){
		cout << "ERROR: Failed to save dataset to file!\n";
		return EXIT_FAILURE;
	}
    
    //This can then be loaded later
    if( !trainingData.loadDatasetFromFile( "TrainingData.txt" ) ){
		cout << "ERROR: Failed to load dataset from file!\n";
		return EXIT_FAILURE;
	}
    
    //You can also save and load the training data to a CSV file
    //Each row will contain a sample, with the first column containing the class label and the remaining columns containing the data
    if( !trainingData.saveDatasetToCSVFile( "TrainingData.csv" ) ){
		cout << "ERROR: Failed to save dataset to csv file!\n";
		return EXIT_FAILURE;
	}
	
    if( !trainingData.loadDatasetFromCSVFile( "TrainingData.csv" ) ){
		cout << "ERROR: Failed to load dataset from csv file!\n";
		return EXIT_FAILURE;
	}
    
    //This is how you can get some stats from the training data
    string datasetName = trainingData.getDatasetName();
    string infoText = trainingData.getInfoText();
    UINT numSamples = trainingData.getNumSamples();
    UINT numDimensions = trainingData.getNumDimensions();
    UINT numClasses = trainingData.getNumClasses();
    
    cout << "Dataset Name: " << datasetName << endl;
    cout << "InfoText: " << infoText << endl;
    cout << "NumberOfSamples: " << numSamples << endl;
    cout << "NumberOfDimensions: " << numDimensions << endl;
    cout << "NumberOfClasses: " << numClasses << endl;
    
    //You can also get the minimum and maximum ranges of the data
    vector< MinMax > ranges = trainingData.getRanges();
    
    cout << "The ranges of the dataset are: \n";
    for(UINT j=0; j<ranges.size(); j++){
        cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl;
    }
    
    //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function
    //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset
    ClassificationData testData = trainingData.partition( 80 );
    
    //If you have multiple datasets that you want to merge together then use the merge function
    if( !trainingData.merge( testData ) ){
		cout << "ERROR: Failed to save merge datasets!\n";
		return EXIT_FAILURE;
	}
    
    //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds
    //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used
    if( !trainingData.spiltDataIntoKFolds( 10, true ) ){
		cout << "ERROR: Failed to spiltDataIntoKFolds!\n";
		return EXIT_FAILURE;
	}
    
    //After you have called the spilt function you can then get the training and test sets for each fold
    for(UINT foldIndex=0; foldIndex<10; foldIndex++){
        ClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex );
        ClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex );
    }
    
    //If need you can clear any training data that you have recorded
    trainingData.clear();
    
    return EXIT_SUCCESS;
}