bool LabelledClassificationData::merge(LabelledClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(LabelledClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

	sortClassLabels();

    return true;
}
LabelledClassificationData LabelledClassificationData::getTrainingFoldData(UINT foldIndex){
   
    LabelledClassificationData trainingData;
    trainingData.setNumDimensions( numDimensions );
    trainingData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ){
        errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
       return trainingData;
    }

    if( foldIndex >= kFoldValue ) return trainingData;

    //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
    UINT index = 0;
    for(UINT k=0; k<kFoldValue; k++){
        if( k != foldIndex ){
            for(UINT i=0; i<crossValidationIndexs[k].size(); i++){

                index = crossValidationIndexs[k][i];
                trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
            }
        }
    }

	trainingData.sortClassLabels();

    return trainingData;
}
LabelledClassificationData LabelledClassificationData::getClassData(UINT classLabel) const{
    
    LabelledClassificationData classData;
    classData.setNumDimensions( this->numDimensions );
    classData.setAllowNullGestureClass( allowNullGestureClass );

    for(UINT i=0; i<totalNumSamples; i++){
        if( data[i].getClassLabel() == classLabel ){
            classData.addSample(classLabel, data[i].getSample());
        }
    }

    return classData;
}
LabelledClassificationData LabelledClassificationData::getBootstrappedDataset(UINT numSamples){
    
    Random rand;
    LabelledClassificationData newDataset;
    newDataset.setNumDimensions( getNumDimensions() );
    newDataset.setAllowNullGestureClass( allowNullGestureClass );
    
    if( numSamples == 0 ) numSamples = totalNumSamples;
    
    for(UINT i=0; i<numSamples; i++){
        UINT randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
        newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample());
    }
    
    return newDataset;
}
Esempio n. 5
0
bool ANBC::setWeights(LabelledClassificationData weightsData) {

    if( weightsData.getNumSamples() > 0 ) {
        weightsDataSet = true;
        this->weightsData = weightsData;
        return true;
    }
    return false;
}
LabelledClassificationData LabelledClassificationData::getTestFoldData(UINT foldIndex){
    
    LabelledClassificationData testData;
    testData.setNumDimensions( numDimensions );
    testData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ) return testData;

    if( foldIndex >= kFoldValue ) return testData;

    //Add the data to the training
    UINT index = 0;
	for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){

        index = crossValidationIndexs[ foldIndex ][i];
		testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
	}

    return testData;
}
bool RandomForests::train(LabelledClassificationData trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train(LabelledClassificationData labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels = trainingData.getClassLabels();
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Train the random forest
    forestSize = 10;
    Random random;
    
    DecisionTree tree;
    tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
    tree.setTrainingMode( DecisionTree::BEST_RANDOM_SPLIT );
    tree.setNumSplittingSteps( numRandomSplits );
    tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
    tree.setMaxDepth( maxDepth );
    
    for(UINT i=0; i<forestSize; i++){
        LabelledClassificationData data = trainingData.getBootstrappedDataset();
        
        if( !tree.train( data ) ){
            errorLog << "train(LabelledClassificationData labelledTrainingData) - Failed to train tree at forest index: " << i << endl;
            return false;
        }
        
        //Deep copy the tree into the forest
        forest.push_back( tree.deepCopyTree() );
    }
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
Esempio n. 8
0
bool BAG::train(LabelledClassificationData trainingData){
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    trained = false;
    classLabels.clear();
    
    if( M == 0 ){
        errorLog << "train(LabelledClassificationData trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numFeatures = N;
    numClasses = K;
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    UINT ensembleSize = (UINT)ensemble.size();
    
    if( ensembleSize == 0 ){
        errorLog << "train(LabelledClassificationData trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl;
        return false;
    }
    
    for(UINT i=0; i<ensembleSize; i++){
        if( ensemble[i] == NULL ){
            errorLog << "train(LabelledClassificationData trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl;
            return false;
        }
    }

    //Train the ensemble
    for(UINT i=0; i<ensembleSize; i++){
        LabelledClassificationData boostedDataset = trainingData.getBootstrappedDataset();
        
        //Train the classifier with the bootstrapped dataset
        if( !ensemble[i]->train( boostedDataset ) ){
            errorLog << "train(LabelledClassificationData trainingData) - The classifier at ensemble index " << i << " failed training!" << endl;
            return false;
        }
    }
    
    //Set the class labels
    classLabels = trainingData.getClassLabels();
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
Esempio n. 9
0
bool Softmax::train(LabelledClassificationData trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train(LabelledClassificationData labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numFeatures = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << "train(LabelledClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << endl;
            return false;
        }
    }
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
int main (int argc, const char * argv[])
{

    //We are going to use the Iris dataset, you can find more about the orginal dataset at: http://en.wikipedia.org/wiki/Iris_flower_data_set
    
    //Create a new instance of LabelledClassificationData to hold the training data
    LabelledClassificationData trainingData;
    
    //Load the training dataset from a file, the file should be in the same directory as this program
    if( !trainingData.loadDatasetFromFile("IrisData.txt") ){
        cout << "Failed to load Iris data from file!\n";
        return EXIT_FAILURE;
    }
    
    //Print some basic stats about the dataset we have loaded
    trainingData.printStats();
    
    //Partition the training dataset into a training dataset and test dataset
    //We will use 60% of the data to train the algorithm and 40% of the data to test it
    //The true parameter flags that we want to use stratified sampling, which means there 
    //should be an equal class distribution between the training and test datasets
    LabelledClassificationData testData = trainingData.partition( 60, true );
    
    //Setup the gesture recognition pipeline
    GestureRecognitionPipeline pipeline;
    
    //Add a KNN classification algorithm as the main classifier with a K value of 10
    pipeline.setClassifier( KNN(10) );
    
    //Train the KNN algorithm using the training dataset
    if( !pipeline.train( trainingData ) ){
        cout << "Failed to train the pipeline!\n";
        return EXIT_FAILURE;
    }
    
    //Test the KNN model using the test dataset
    if( !pipeline.test( testData ) ){
        cout << "Failed to test the pipeline!\n";
        return EXIT_FAILURE;
    }
    
    //Print some metrics about how successful the classification was
    //Print the accuracy
    cout << "The classification accuracy was: " << pipeline.getTestAccuracy() << "%\n" << endl;
    
    //Print the precision for each class
    for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){
        UINT classLabel = pipeline.getClassLabels()[k];
        double classPrecision = pipeline.getTestPrecision( classLabel );
        cout << "The precision for class " << classLabel << " was " << classPrecision << endl;
    }
    cout << endl;
    
    //Print the recall for each class
    for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){
        UINT classLabel = pipeline.getClassLabels()[k];
        double classRecall = pipeline.getTestRecall( classLabel );
        cout << "The recall for class " << classLabel << " was " << classRecall << endl;
    }
    cout << endl;
    
    //Print the confusion matrix
    Matrix< double > confusionMatrix = pipeline.getTestConfusionMatrix();
    cout << "Confusion Matrix: \n";
    for(UINT i=0; i<confusionMatrix.getNumRows(); i++){
        for(UINT j=0; j<confusionMatrix.getNumCols(); j++){
            cout << confusionMatrix[i][j] << "\t";
        }
        cout << endl;
        
    }
    cout << endl;

    return EXIT_SUCCESS;
}
Esempio n. 11
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,LabelledClassificationData &data){
    
    double error = 0;
    double errorSum = 0;
    double lastErrorSum = 0;
    double delta = 0;
    UINT N = data.getNumDimensions();
    UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorDouble y(M);
    vector< UINT > randomTrainingOrder(M);
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            error = y[i] - model.compute( data[i].getSample() );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                model.w[j] += learningRate  * error * data[i][j];
            }
            model.w0 += learningRate  * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumIterations ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << endl;
    }
    
    return true;
}
Esempio n. 12
0
bool KNN::train(LabelledClassificationData &trainingData){

	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        LabelledClassificationData trainingSet(trainingData);
        LabelledClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                vector< double > sample = testSet[i].getSample();

                if( !predict( sample ) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
Esempio n. 13
0
bool KNN::train_(LabelledClassificationData &trainingData,UINT K){

    //Clear any previous models
    clear();

    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }

    //Set the dimensionality of the input data
    this->K = K;
    this->numFeatures = trainingData.getNumDimensions();
    this->numClasses = trainingData.getNumClasses();

    //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
    this->trainingData = trainingData;

    if( useScaling ){
        ranges = this->trainingData.getRanges();
        this->trainingData.scale(ranges, 0, 1);
    }

    //Set the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
    }

    //Flag that the algorithm has been trained so we can compute the rejection thresholds
    trained = true;
    
    //If null rejection is enabled then compute the null rejection thresholds
    if( useNullRejection ){

        //Set the null rejection to false so we can compute the values for it (this will be set back to its current value later)
        bool tempUseNullRejection = useNullRejection;
        useNullRejection = false;
        rejectionThresholds.clear();

        //Compute the rejection thresholds for each of the K classes
        VectorDouble counter(numClasses,0);
        trainingMu.resize( numClasses, 0 );
        trainingSigma.resize( numClasses, 0 );
        rejectionThresholds.resize( numClasses, 0 );

        //Compute Mu for each of the classes
        const unsigned int numTrainingExamples = trainingData.getNumSamples();
        vector< IndexedDouble > predictionResults( numTrainingExamples );
        for(UINT i=0; i<numTrainingExamples; i++){
            predict( trainingData[i].getSample(), K);

            UINT classLabelIndex = 0;
            for(UINT k=0; k<numClasses; k++){
                if( predictedClassLabel == classLabels[k] ){
                    classLabelIndex = k;
                    break;
                }
            }

            predictionResults[ i ].index = classLabelIndex;
            predictionResults[ i ].value = classDistances[ classLabelIndex ];

            trainingMu[ classLabelIndex ] += predictionResults[ i ].value;
            counter[ classLabelIndex ]++;
        }

        for(UINT j=0; j<numClasses; j++){
            trainingMu[j] /= counter[j];
        }

        //Compute Sigma for each of the classes
        for(UINT i=0; i<numTrainingExamples; i++){
            trainingSigma[predictionResults[i].index] += SQR(predictionResults[i].value - trainingMu[predictionResults[i].index]);
        }

        for(UINT j=0; j<numClasses; j++){
            double count = counter[j];
            if( count > 1 ){
                trainingSigma[ j ] = sqrt( trainingSigma[j] / (count-1) );
            }else{
                trainingSigma[ j ] = 1.0;
            }
        }

        //Check to see if any of the mu or sigma values are zero or NaN
        bool errorFound = false;
        for(UINT j=0; j<numClasses; j++){
            if( trainingMu[j] == 0 ){
                warningLog << "TrainingMu[ " << j << " ] is zero for a K value of " << K << endl;
            }
            if( trainingSigma[j] == 0 ){
                warningLog << "TrainingSigma[ " << j << " ] is zero for a K value of " << K << endl;
            }
            if( isnan( trainingMu[j] ) ){
                errorLog << "TrainingMu[ " << j << " ] is NAN for a K value of " << K << endl;
                errorFound = true;
            }
            if( isnan( trainingSigma[j] ) ){
                errorLog << "TrainingSigma[ " << j << " ] is NAN for a K value of " << K << endl;
                errorFound = true;
            }
        }

        if( errorFound ){
            trained = false;
            return false;
        }

        //Recompute the rejection thresholds
        recomputeNullRejectionThresholds();

        //Restore the actual state of the null rejection
        useNullRejection = tempUseNullRejection;
        
    }else{
        //Resize the rejection thresholds but set the values to 0
        rejectionThresholds.clear();
        rejectionThresholds.resize( numClasses, 0 );
    }

    return true;
}
bool DecisionStump::train(LabelledClassificationData &trainingData, VectorDouble &weights){
    
    trained = false;
    numInputDimensions = trainingData.getNumDimensions();
    
    //There should only be two classes in the dataset, the positive class (classLable==1) and the negative class (classLabel==2)
    if( trainingData.getNumClasses() != 2 ){
        errorLog << "train(LabelledClassificationData &trainingData, VectorDouble &weights) - There should only be 2 classes in the training data, but there are : " << trainingData.getNumClasses() << endl;
        return false;
    }
    
    //There should be one weight for every training sample
    if( trainingData.getNumSamples() != weights.size() ){
        errorLog << "train(LabelledClassificationData &trainingData, VectorDouble &weights) - There number of examples in the training data (" << trainingData.getNumSamples() << ") does not match the lenght of the weights vector (" << weights.size() << ")" << endl;
        return false;
    }
    
    //Pick the training sample to use as the stump feature
    const UINT M = trainingData.getNumSamples();
    UINT bestFeatureIndex = 0;
    vector< MinMax > ranges = trainingData.getRanges();
    double minError = numeric_limits<double>::max();
    double minRange = 0;
    double maxRange = 0;
    double step = 0;
    double threshold = 0;
    double bestThreshold = 0;
    
    for(UINT n=0; n<numInputDimensions; n++){
        minRange = ranges[n].minValue;
        maxRange = ranges[n].maxValue;
        step = (maxRange-minRange)/double(numSteps);
        threshold = minRange;
        while( threshold <= maxRange ){
            
            //Compute the error using the current threshold on the current input dimension
            //We need to check both sides of the threshold
            double rhsError = 0;
            double lhsError = 0;
            for(UINT i=0; i<M; i++){
                bool positiveClass = trainingData[ i ].getClassLabel() == WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL;
                bool rhs = trainingData[ i ][ n ] >= threshold;
                bool lhs = trainingData[ i ][ n ] <= threshold;
                if( (rhs && !positiveClass) || (!rhs && positiveClass) ) rhsError += weights[ i ];
                if( (lhs && !positiveClass) || (!lhs && positiveClass) ) lhsError += weights[ i ];
            }
            
            //Check to see if either the rhsError or lhsError beats the minError, if so then store the results
            if( rhsError < minError ){
                minError = rhsError;
                bestFeatureIndex = n;
                bestThreshold = threshold;
                direction = 1; //1 means rhs
            }
            if( lhsError < minError ){
                minError = lhsError;
                bestFeatureIndex = n;
                bestThreshold = threshold;
                direction = 0; //0 means lhs
            }
            
            //Update the threshold
            threshold += step;
        }
    }
    
    decisionFeatureIndex = bestFeatureIndex;
    decisionValue = bestThreshold;
    trained = true;
    
    //cout << "Best Feature Index: " << decisionFeatureIndex << " Value: " << decisionValue << " Direction: " << direction << " Error: " << minError << endl;
    return true;
}
Esempio n. 15
0
bool GMM::train(LabelledClassificationData trainingData){
    
    //Clear any old models
    models.clear();
    trained = false;
    numFeatures = 0;
    numClasses = 0;
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledClassificationData &trainingData) - Training data is empty!" << endl;
        return false;
    }
    
    //Set the number of features and number of classes and resize the models buffer
    numFeatures = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    models.resize(numClasses);
    
    if( numFeatures >= 6 ){
        warningLog << "train(LabelledClassificationData &trainingData) - The number of features in your training data is high (" << numFeatures << ").  The GMMClassifier does not work well with high dimensional data, you might get better results from one of the other classifiers." << endl;
    }
    
    //Get the ranges of the training data if the training data is going to be scaled
    if( useScaling ){
        ranges = trainingData.getRanges();
    }

    //Fit a Mixture Model to each class (independently)
    for(UINT k=0; k<numClasses; k++){
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledClassificationData classData = trainingData.getClassData( classLabel );
        
        //Scale the training data if needed
        if( useScaling ){
            if( !classData.scale(ranges,GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE) ){
                errorLog << "train(LabelledClassificationData &trainingData) - Failed to scale training data!" << endl;
                return false;

            }
        }
        
        //Convert the labelled data to unlabelled data
        UnlabelledClassificationData unlabelledData = classData.reformatAsUnlabelledClassificationData();
        
        //Train the Mixture Model for this class
        GaussianMixtureModels gaussianMixtureModel;
        gaussianMixtureModel.setMinChange( minChange );
        gaussianMixtureModel.setMaxIter( maxIter );
        if( !gaussianMixtureModel.train(unlabelledData, numMixtureModels) ){
            errorLog << "train(LabelledClassificationData &trainingData) - Failed to train Mixture Model for class " << classLabel << endl;
            return false;
        }
        
        //Setup the model container
        models[k].resize( numMixtureModels );
        models[k].setClassLabel( classLabel );
        
        //Store the mixture model in the container
        for(UINT j=0; j<numMixtureModels; j++){
            models[k][j].mu = gaussianMixtureModel.getMu().getRowVector(j);
            models[k][j].sigma = gaussianMixtureModel.getSigma()[j];
            
            //Compute the determinant and invSigma for the realtime prediction
            LUDecomposition ludcmp(models[k][j].sigma);
            if( !ludcmp.inverse( models[k][j].invSigma ) ){
                models.clear();
                errorLog << "train(LabelledClassificationData &trainingData) - Failed to invert Matrix for class " << classLabel << "!" << endl;
                return false;
            }
            models[k][j].det = ludcmp.det();
        }
        
        //Compute the normalize factor
        models[k].recomputeNormalizationFactor();
        
        //Compute the rejection thresholds
        double mu = 0;
        double sigma = 0;
        VectorDouble predictionResults(classData.getNumSamples(),0);
        for(UINT i=0; i<classData.getNumSamples(); i++){
            vector< double > sample = classData[i].getSample();
            predictionResults[i] = models[k].computeMixtureLikelihood( sample );
            mu += predictionResults[i];
        }
        
        //Update mu
        mu /= double( classData.getNumSamples() );
        
        //Calculate the standard deviation
        for(UINT i=0; i<classData.getNumSamples(); i++) 
            sigma += SQR( (predictionResults[i]-mu) );
        sigma = sqrt( sigma / (double(classData.getNumSamples())-1.0) );
        sigma = 0.2;
        
        //Set the models training mu and sigma 
        models[k].setTrainingMuAndSigma(mu,sigma);
        
        if( !models[k].recomputeNullRejectionThreshold(nullRejectionCoeff) && useNullRejection ){
            warningLog << "train(LabelledClassificationData &trainingData) - Failed to recompute rejection threshold for class " << classLabel << " - the nullRjectionCoeff value is too high!" << endl;
        }
        
        //cout << "Training Mu: " << mu << " TrainingSigma: " << sigma << " RejectionThreshold: " << models[k].getNullRejectionThreshold() << endl;
        //models[k].printModelValues();
    }
    
    //Reset the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = models[k].getClassLabel();
    }
    
    //Resize the rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        nullRejectionThresholds[k] = models[k].getNullRejectionThreshold();
    }
    
    //Flag that the models have been trained
    trained = true;
    
    return true;
}
int main (int argc, const char * argv[])
{

    //Create a new KNN classifier with a K value of 10
    KNN knn(10);
    knn.setNullRejectionCoeff( 10 );
    knn.enableScaling( true );
    knn.enableNullRejection( true );
    
    //Train the classifier with some training data
    LabelledClassificationData trainingData;
    
    if( !trainingData.loadDatasetFromFile("KNNTrainingData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    //Use 20% of the training dataset to create a test dataset
    LabelledClassificationData testData = trainingData.partition( 80 );
    
    //Train the classifier
    bool trainSuccess = knn.train( trainingData );
    
    if( !trainSuccess ){
        cout << "Failed to train classifier!\n";
        return EXIT_FAILURE;
    }
    
    //Save the knn model to a file
    bool saveSuccess = knn.saveModelToFile("KNNModel.txt");
    
    if( !saveSuccess ){
        cout << "Failed to save the classifier model!\n";
        return EXIT_FAILURE;
    }
    
    //Load the knn model from a file
    bool loadSuccess = knn.loadModelFromFile("KNNModel.txt");
    
    if( !loadSuccess ){
        cout << "Failed to load the classifier model!\n";
        return EXIT_FAILURE;
    }
    
    //Use the test dataset to test the KNN model
    double accuracy = 0;
    for(UINT i=0; i<testData.getNumSamples(); i++){
        //Get the i'th test sample
        UINT classLabel = testData[i].getClassLabel();
        vector< double > inputVector = testData[i].getSample();
        
        //Perform a prediction using the classifier
        bool predictSuccess = knn.predict( inputVector );
        
        if( !predictSuccess ){
            cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            return EXIT_FAILURE;
        }
        
        //Get the predicted class label
        UINT predictedClassLabel = knn.getPredictedClassLabel();
        vector< double > classLikelihoods = knn.getClassLikelihoods();
        vector< double > classDistances = knn.getClassDistances();
        
        //Update the accuracy
        if( classLabel == predictedClassLabel ) accuracy++;
        
        cout << "TestSample: " << i <<  " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl;
    }
    
    cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl;
    
    return EXIT_SUCCESS;

}
bool MinDist::train(LabelledClassificationData &labelledTrainingData,double gamma){
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    trained = false;
    models.clear();
    classLabels.clear();
    
    if( M == 0 ){
        errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl;
        return false;
    }

    numFeatures = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Train each of the models
	for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        LabelledClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix, scaling the training data if needed
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                if( useScaling ){
                    data[i][j] = scale(classData[i][j],ranges[j].minValue,ranges[j].maxValue,0,1);
                }else data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
		models[k].setGamma( gamma );
		if( !models[k].train(classLabel,data,numClusters) ){
            errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl;
            models.clear();
            return false;
        }
        
	}
    
    trained = true;
    return true;
}
Esempio n. 18
0
bool ANBC::train(LabelledClassificationData &labelledTrainingData,double gamma) {

    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    trained = false;
    models.clear();
    classLabels.clear();

    if( M == 0 ) {
        errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - Training data has zero samples!" << endl;
        return false;
    }

    if( weightsDataSet ) {
        if( weightsData.getNumDimensions() != N ) {
            errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - The number of dimensions in the weights data (" << weightsData.getNumDimensions() << ") is not equal to the number of dimensions of the training data (" << N << ")" << endl;
            return false;
        }
    }

    numFeatures = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = labelledTrainingData.getRanges();

    //Train each of the models
    for(UINT k=0; k<numClasses; k++) {

        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;

        //Get the weights for this class
        VectorDouble weights(numFeatures);
        if( weightsDataSet ) {
            bool weightsFound = false;
            for(UINT i=0; i<weightsData.getNumSamples(); i++) {
                if( weightsData[i].getClassLabel() == classLabel ) {
                    weights = weightsData[i].getSample();
                    weightsFound = true;
                    break;
                }
            }

            if( !weightsFound ) {
                errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - Failed to find the weights for class " << classLabel << endl;
                return false;
            }
        } else {
            //If the weights data has not been set then all the weights are 1
            for(UINT j=0; j<numFeatures; j++) weights[j] = 1.0;
        }

        //Get all the training data for this class
        LabelledClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);

        //Copy the training data into a matrix, scaling the training data if needed
        for(UINT i=0; i<data.getNumRows(); i++) {
            for(UINT j=0; j<data.getNumCols(); j++) {
                if( useScaling ) {
                    data[i][j] = scale(classData[i][j],ranges[j].minValue,ranges[j].maxValue,MIN_SCALE_VALUE,MAX_SCALE_VALUE);
                } else data[i][j] = classData[i][j];
            }
        }

        //Train the model for this class
        models[k].gamma = gamma;
        if( !models[k].train(classLabel,data,weights) ) {
            errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - Failed to train model for class: " << classLabel << endl;

            //Try and work out why the training failed
            if( models[k].N == 0 ) {
                errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - N == 0!" << endl;
                models.clear();
                return false;
            }
            for(UINT j=0; j<numFeatures; j++) {
                if( models[k].mu[j] == 0 ) {
                    errorLog << "train(LabelledClassificationData &labelledTrainingData,double gamma) - The mean of column " << j+1 << " is zero! Check the training data" << endl;
                    models.clear();
                    return false;
                }
            }
            models.clear();
            return false;
        }

    }

    //Store the null rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++) {
        nullRejectionThresholds[k] = models[k].threshold;
    }

    //Flag that the models have been trained
    trained = true;
    return trained;

}
bool KMeansQuantizer::train(LabelledClassificationData &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train( data );
}
int main (int argc, const char * argv[])
{
    //Load some training data from a file
    LabelledClassificationData trainingData;
    
    if( !trainingData.loadDatasetFromFile("HelloWorldTrainingData.txt") ){
        cout << "ERROR: Failed to load training data from file\n";
        return EXIT_FAILURE;
    }
    
    cout << "Data Loaded\n";
    
    //Print out some stats about the training data
    trainingData.printStats();
    
    //Partition the training data into a training dataset and a test dataset. 80 means that 80% 
    //of the data will be used for the training data and 20% will be returned as the test dataset
    LabelledClassificationData testData = trainingData.partition(80);
    
    //Create a new Gesture Recognition Pipeline using an Adaptive Naive Bayes Classifier
    GestureRecognitionPipeline pipeline;
    pipeline.setClassifier( ANBC() );
    
    //Train the pipeline using the training data
    if( !pipeline.train( trainingData ) ){
        cout << "ERROR: Failed to train the pipeline!\n";
        return EXIT_FAILURE;
    }
    
    //Test the pipeline using the test data
    if( !pipeline.test( testData ) ){
        cout << "ERROR: Failed to test the pipeline!\n";
        return EXIT_FAILURE;
    }
    
    //Print some stats about the testing
    cout << "Test Accuracy: " << pipeline.getTestAccuracy() << endl;
    
    cout << "Precision: ";
    for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){
        UINT classLabel = pipeline.getClassLabels()[k];
        cout << "\t" << pipeline.getTestPrecision(classLabel);
    }cout << endl;
    
    cout << "Recall: ";
    for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){
        UINT classLabel = pipeline.getClassLabels()[k];
        cout << "\t" << pipeline.getTestRecall(classLabel);
    }cout << endl;
    
    cout << "FMeasure: ";
    for(UINT k=0; k<pipeline.getNumClassesInModel(); k++){
        UINT classLabel = pipeline.getClassLabels()[k];
        cout << "\t" << pipeline.getTestFMeasure(classLabel);
    }cout << endl;
    
    Matrix< double > confusionMatrix = pipeline.getTestConfusionMatrix();
    cout << "ConfusionMatrix: \n";
    for(UINT i=0; i<confusionMatrix.getNumRows(); i++){
        for(UINT j=0; j<confusionMatrix.getNumCols(); j++){
            cout << confusionMatrix[i][j] << "\t";
        }cout << endl;
    }
    
    return EXIT_SUCCESS;
}