예제 #1
0
파일: LDA.cpp 프로젝트: CV-IP/grt
MatrixFloat LDA::computeWithinClassScatterMatrix( ClassificationData &data ){
	
	MatrixFloat sw(numInputDimensions,numInputDimensions);
	sw.setAllValues( 0 );
	
	for(UINT k=0; k<numClasses; k++){
		
		//Compute the scatter matrix for class k
		ClassificationData classData = data.getClassData( data.getClassTracker()[k].classLabel );
		MatrixFloat scatterMatrix = classData.getCovarianceMatrix();
		
		//Add this to the main scatter matrix
		for(UINT m=0; m<numInputDimensions; m++){
			for(UINT n=0; n<numInputDimensions; n++){
				sw[m][n] += scatterMatrix[m][n];
			}
		}
	}
	
	return sw;
}
예제 #2
0
파일: MinDist.cpp 프로젝트: ios4u/grt
bool MinDist::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    nullRejectionThresholds.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].setGamma( nullRejectionCoeff );
        if( !models[k].train(classLabel,data,numClusters) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl;
            models.clear();
            return false;
        }
        
        //Set the null rejection threshold
        nullRejectionThresholds[k] = models[k].getRejectionThreshold();
        
    }
    
    trained = true;
    return true;
}
예제 #3
0
파일: Softmax.cpp 프로젝트: nickgillian/grt
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = K;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    ClassificationData validationData;
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationData = trainingData.split( 100-validationSetSize );
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabels[k] << std::endl;
                return false;
        }
    }

    //Flag that the models have been trained
    trained = true;
    converged = true;

    //Compute the final training stats
    trainingSetAccuracy = 0;
    validationSetAccuracy = 0;

    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
    bool scalingState = useScaling;
    useScaling = false;
    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
        trained = false;
        converged = false;
        errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
        return false;
    }
    
    if( useValidationSet ){
        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
            trained = false;
            converged = false;
            errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
            return false;
        }
    }

    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;

    if( useValidationSet ){
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
    }

    //Reset the scaling state for future prediction
    useScaling = scalingState;

    return trained;
}
예제 #4
0
파일: ANBC.cpp 프로젝트: jdelfes/grt
bool ANBC::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( weightsDataSet ){
        if( weightsData.getNumDimensions() != N ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - The number of dimensions in the weights data (" << weightsData.getNumDimensions() << ") is not equal to the number of dimensions of the training data (" << N << ")" << endl;
            return false;
        }
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get the weights for this class
        VectorDouble weights(numInputDimensions);
        if( weightsDataSet ){
            bool weightsFound = false;
            for(UINT i=0; i<weightsData.getNumSamples(); i++){
                if( weightsData[i].getClassLabel() == classLabel ){
                    weights = weightsData[i].getSample();
                    weightsFound = true;
                    break;
                }
            }
            
            if( !weightsFound ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to find the weights for class " << classLabel << endl;
                return false;
            }
        }else{
            //If the weights data has not been set then all the weights are 1
            for(UINT j=0; j<numInputDimensions; j++) weights[j] = 1.0;
        }
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].gamma = nullRejectionCoeff;
        if( !models[k].train(classLabel,data,weights) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel << endl;
            
            //Try and work out why the training failed
            if( models[k].N == 0 ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - N == 0!" << endl;
                models.clear();
                return false;
            }
            for(UINT j=0; j<numInputDimensions; j++){
                if( models[k].mu[j] == 0 ){
                    errorLog << "train_(ClassificationData &labelledTrainingData) - The mean of column " << j+1 << " is zero! Check the training data" << endl;
                    models.clear();
                    return false;
                }
            }
            models.clear();
            return false;
        }
        
    }
    
    //Store the null rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++) {
        nullRejectionThresholds[k] = models[k].threshold;
    }
    
    //Flag that the models have been trained
    trained = true;
    return trained;
    
}
예제 #5
0
파일: GMM.cpp 프로젝트: BryanBo-Cao/grt
bool GMM::train_(ClassificationData &trainingData){
    
    //Clear any old models
	clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data is empty!" << std::endl;
        return false;
    }
    
    //Set the number of features and number of classes and resize the models buffer
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    models.resize(numClasses);
    
    if( numInputDimensions >= 6 ){
        warningLog << "train_(ClassificationData &trainingData) - The number of features in your training data is high (" << numInputDimensions << ").  The GMMClassifier does not work well with high dimensional data, you might get better results from one of the other classifiers." << std::endl;
    }
    
    //Get the ranges of the training data if the training data is going to be scaled
	ranges = trainingData.getRanges();
    if( !trainingData.scale(GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE) ){
        errorLog << "train_(ClassificationData &trainingData) - Failed to scale training data!" << std::endl;
        return false;
    }

    //Fit a Mixture Model to each class (independently)
    for(UINT k=0; k<numClasses; k++){
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        ClassificationData classData = trainingData.getClassData( classLabel );
        
        //Train the Mixture Model for this class
        GaussianMixtureModels gaussianMixtureModel;
        gaussianMixtureModel.setNumClusters( numMixtureModels );
        gaussianMixtureModel.setMinChange( minChange );
        gaussianMixtureModel.setMaxNumEpochs( maxIter );
        
        if( !gaussianMixtureModel.train( classData.getDataAsMatrixFloat() ) ){
            errorLog << "train_(ClassificationData &trainingData) - Failed to train Mixture Model for class " << classLabel << std::endl;
            return false;
        }
        
        //Setup the model container
        models[k].resize( numMixtureModels );
        models[k].setClassLabel( classLabel );
        
        //Store the mixture model in the container
        for(UINT j=0; j<numMixtureModels; j++){
            models[k][j].mu = gaussianMixtureModel.getMu().getRowVector(j);
            models[k][j].sigma = gaussianMixtureModel.getSigma()[j];
            
            //Compute the determinant and invSigma for the realtime prediction
            LUDecomposition ludcmp( models[k][j].sigma );
            if( !ludcmp.inverse( models[k][j].invSigma ) ){
                models.clear();
                errorLog << "train_(ClassificationData &trainingData) - Failed to invert Matrix for class " << classLabel << "!" << std::endl;
                return false;
            }
            models[k][j].det = ludcmp.det();
        }
        
        //Compute the normalize factor
        models[k].recomputeNormalizationFactor();
        
        //Compute the rejection thresholds
        Float mu = 0;
        Float sigma = 0;
        VectorFloat predictionResults(classData.getNumSamples(),0);
        for(UINT i=0; i<classData.getNumSamples(); i++){
            VectorFloat sample = classData[i].getSample();
            predictionResults[i] = models[k].computeMixtureLikelihood( sample );
            mu += predictionResults[i];
        }
        
        //Update mu
        mu /= Float( classData.getNumSamples() );
        
        //Calculate the standard deviation
        for(UINT i=0; i<classData.getNumSamples(); i++) 
            sigma += grt_sqr( (predictionResults[i]-mu) );
        sigma = grt_sqrt( sigma / (Float(classData.getNumSamples())-1.0) );
        sigma = 0.2;
        
        //Set the models training mu and sigma 
        models[k].setTrainingMuAndSigma(mu,sigma);
        
        if( !models[k].recomputeNullRejectionThreshold(nullRejectionCoeff) && useNullRejection ){
            warningLog << "train_(ClassificationData &trainingData) - Failed to recompute rejection threshold for class " << classLabel << " - the nullRjectionCoeff value is too high!" << std::endl;
        }
        
        //cout << "Training Mu: " << mu << " TrainingSigma: " << sigma << " RejectionThreshold: " << models[k].getNullRejectionThreshold() << std::endl;
        //models[k].printModelValues();
    }
    
    //Reset the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = models[k].getClassLabel();
    }
    
    //Resize the rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        nullRejectionThresholds[k] = models[k].getNullRejectionThreshold();
    }
    
    //Flag that the models have been trained
    trained = true;
    
    return true;
}
예제 #6
0
bool KNN::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Get the ranges of the data
    ranges = trainingData.getRanges();
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Store the number of features, classes and the training data
    this->numInputDimensions = trainingData.getNumDimensions();
    this->numClasses = trainingData.getNumClasses();
    
    //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
    this->trainingData = trainingData;
    
    //Set the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
    }

    //If we do not need to search for the best K value, then call the sub training function and return the result
	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    //If we have got this far then we are going to search for the best K value
    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        ClassificationData trainingSet(trainingData);
        ClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                VectorDouble sample = testSet[i].getSample();

                if( !predict( sample , k) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        //We now need to train the model again to make sure all the training metrics are computed correctly
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
예제 #7
0
파일: MinDist.cpp 프로젝트: nickgillian/grt
bool MinDist::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << __GRT_LOG__ << " There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = K;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    nullRejectionThresholds.resize(K);
    ranges = trainingData.getRanges();
    ClassificationData validationData;
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationData = trainingData.split( 100-validationSetSize );
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        trainingLog << "Training model for class: " << trainingData.getClassTracker()[k].classLabel << std::endl;

        //Pass the logging state onto the kmeans algorithm
        models[k].setTrainingLoggingEnabled( this->getTrainingLoggingEnabled() );
            
        //Get the class label for the kth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        ClassificationData classData = trainingData.getClassData(classLabel);
        MatrixFloat data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].setGamma( nullRejectionCoeff );
        if( !models[k].train(classLabel,data,numClusters,minChange,maxNumEpochs) ){
            errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << std::endl;
            models.clear();
            return false;
        }
            
        //Set the null rejection threshold
        nullRejectionThresholds[k] = models[k].getRejectionThreshold();
    }

    //Flag that the models have been trained
    trained = true;
    converged = true;

    //Compute the final training stats
    trainingSetAccuracy = 0;
    validationSetAccuracy = 0;

    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
    bool scalingState = useScaling;
    useScaling = false;
    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
        trained = false;
        converged = false;
        errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
        return false;
    }
    
    if( useValidationSet ){
        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
            trained = false;
            converged = false;
            errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
            return false;
        }
        
    }

    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;

    if( useValidationSet ){
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
    }

    //Reset the scaling state for future prediction
    useScaling = scalingState;

    return trained;
}
예제 #8
0
bool train( CommandLineParser &parser ){

    string trainDatasetFilename = "";
    string modelFilename = "";
    unsigned int forestSize = 0;
    unsigned int maxDepth = 0;
    unsigned int minNodeSize = 0;
    unsigned int numSplits = 0;
    bool removeFeatures = false;
    double bootstrapWeight = 0.0;

    //Get the filename
    if( !parser.get("filename",trainDatasetFilename) ){
        errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl;
        printUsage();
        return false;
    }

    //Get the model filename
    parser.get("model-filename",modelFilename);

    //Get the forest size
    parser.get("forest-size",forestSize);

    //Get the max depth
    parser.get("max-depth",maxDepth);

    //Get the min node size
    parser.get("min-node-size",minNodeSize);

    //Get the number of random splits
    parser.get("num-splits",numSplits);
    
    //Get the remove features
    parser.get("remove-features",removeFeatures);
   
    //Get the bootstrap weight 
    parser.get("bootstrap-weight",bootstrapWeight);

    //Load some training data to train the classifier
    ClassificationData trainingData;

    infoLog << "- Loading Training Data..." << endl;
    if( !trainingData.load( trainDatasetFilename ) ){
        errorLog << "Failed to load training data!\n";
        return false;
    }

    const unsigned int N = trainingData.getNumDimensions();
    Vector< ClassTracker > tracker = trainingData.getClassTracker();
    infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl;
    infoLog << "- Num dimensions: " << N << endl;
    infoLog << "- Num classes: " << trainingData.getNumClasses() << endl;
    infoLog << "- Class stats: " << endl;
    for(unsigned int i=0; i<tracker.getSize(); i++){
        infoLog << "- class " << tracker[i].classLabel << " number of samples: " << tracker[i].counter << endl;
    }
    
    //Create a new RandomForests instance
    RandomForests forest;

    //Set the decision tree node that will be used for each tree in the forest
    string nodeType = "cluster-node"; //TODO: make this a command line option in the future
    if( nodeType == "cluster-node" ){
        forest.setDecisionTreeNode( DecisionTreeClusterNode() );
    }
    if( nodeType == "threshold-node" ){
        forest.setTrainingMode( Tree::BEST_RANDOM_SPLIT );
        forest.setDecisionTreeNode( DecisionTreeThresholdNode() );
    }

    //Set the number of trees in the forest
    forest.setForestSize( forestSize );

    //Set the maximum depth of the tree
    forest.setMaxDepth( maxDepth );

    //Set the minimum number of samples allowed per node
    forest.setMinNumSamplesPerNode( minNodeSize );

    //Set the number of random splits used per node
    forest.setNumRandomSplits( numSplits );

    //Set if selected features should be removed at each node
    forest.setRemoveFeaturesAtEachSplit( removeFeatures );

    //Set the bootstrap weight
    forest.setBootstrappedDatasetWeight( bootstrapWeight );

    //Add the classifier to a pipeline
    GestureRecognitionPipeline pipeline;
    pipeline.setClassifier( forest );

    infoLog << "- Training model..." << endl;

    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        errorLog << "Failed to train classifier!" << endl;
        return false;
    }

    infoLog << "- Model trained!" << endl;
    infoLog << "- Training time: " << (pipeline.getTrainingTime() * 0.001) / 60.0 << " (minutes)" << endl;
    infoLog << "- Saving model to: " << modelFilename << endl;

    //Save the pipeline
    if( !pipeline.save( modelFilename ) ){
        warningLog << "Failed to save model to file: " << modelFilename << endl;
    } 

    return true;
}