예제 #1
0
bool ClassificationData::merge(const ClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();
    
    //Reserve the memory
    reserve( getNumSamples() + labelledData.getNumSamples() );

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    //Sort the class labels
    sortClassLabels();

    return true;
}
예제 #2
0
파일: BAGTest.cpp 프로젝트: sgrignard/grt
// Tests the learning algorithm on a basic dataset
TEST(BAG, TrainBasicDataset) {
  
  BAG bag;

  //Check the module is not trained
  EXPECT_TRUE( !bag.getTrained() );

  //Generate a basic dataset
  const UINT numSamples = 10000;
  const UINT numClasses = 10;
  const UINT numDimensions = 100;
  ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 );
  ClassificationData trainingData;
  EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) );

  ClassificationData testData = trainingData.split( 50 );

  //Add an adaptive naive bayes classifier to the BAG ensemble
  bag.addClassifierToEnsemble( ANBC() );
  
  //Add a MinDist classifier to the BAG ensemble, using two clusters
  MinDist min_dist_two_clusters;
  min_dist_two_clusters.setNumClusters(2);
  bag.addClassifierToEnsemble( min_dist_two_clusters );
  
  //Add a MinDist classifier to the BAG ensemble, using five clusters
  MinDist min_dist_five_clusters;
  min_dist_five_clusters.setNumClusters(5);
  bag.addClassifierToEnsemble( min_dist_five_clusters );

  //Train the classifier
  EXPECT_TRUE( bag.train( trainingData ) );

  EXPECT_TRUE( bag.getTrained() );

  EXPECT_TRUE( bag.print() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( bag.predict( testData[i].getSample() ) );
  }

  EXPECT_TRUE( bag.save( "bag_model.grt" ) );

  bag.clear();
  EXPECT_TRUE( !bag.getTrained() );

  EXPECT_TRUE( bag.load( "bag_model.grt" ) );

  EXPECT_TRUE( bag.getTrained() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( bag.predict( testData[i].getSample() ) );
  }


}
bool SVM::convertClassificationDataToLIBSVMFormat(ClassificationData &trainingData){
    
    //clear any previous problems
    deleteProblemSet();
    
    const UINT numTrainingExamples = trainingData.getNumSamples();
    numInputDimensions = trainingData.getNumDimensions();
    
    //Compute the ranges encase the data should be scaled
    ranges = trainingData.getRanges();
    
    //Init the memory
    prob.l = numTrainingExamples;
    prob.x = new svm_node*[numTrainingExamples];
    prob.y = new double[numTrainingExamples];
    problemSet = true;
    
    for(UINT i=0; i<numTrainingExamples; i++){
        //Set the class ID
        prob.y[i] = trainingData[i].getClassLabel();
        
        //Assign the memory for this training example, note that a dummy node is needed at the end of the vector
        prob.x[i] = new svm_node[numInputDimensions+1];
        for(UINT j=0; j<numInputDimensions; j++){
            prob.x[i][j].index = j+1;
            prob.x[i][j].value = trainingData[i].getSample()[j];
        }
        prob.x[i][numInputDimensions].index = -1; //Assign the final node value
        prob.x[i][numInputDimensions].value = 0;
    }
    
    return true;
}
bool SVM::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Convert the labelled classification data into the LIBSVM data format
    if( !convertClassificationDataToLIBSVMFormat(trainingData) ){
        errorLog << "train_(ClassificationData &trainingData) - Failed To Convert Labelled Classification Data To LIBSVM Format!" << endl;
        return false;
    }
    
    if( useAutoGamma ) param.gamma = 1.0/numInputDimensions;
    
	//Train the model
	bool trainingResult = trainSVM();
    
	if(! trainingResult ){
        errorLog << "train_(ClassificationData &trainingData) - Failed To Train SVM Model!" << endl;
		return false;
	}
    
    return true;
}
예제 #5
0
파일: BAG.cpp 프로젝트: eboix/Myo-Gesture
bool BAG::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    UINT ensembleSize = (UINT)ensemble.size();
    
    if( ensembleSize == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl;
        return false;
    }
    
    for(UINT i=0; i<ensembleSize; i++){
        if( ensemble[i] == NULL ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl;
            return false;
        }
    }

    //Train the ensemble
    for(UINT i=0; i<ensembleSize; i++){
        ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
        
        trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << endl;
        
        //Train the classifier with the bootstrapped dataset
        if( !ensemble[i]->train( boostedDataset ) ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << endl;
            return false;
        }
    }
    
    //Set the class labels
    classLabels = trainingData.getClassLabels();
    
    //Flag that the model has been trained
    trained = true;
    
    return trained;
}
예제 #6
0
bool SwipeDetector::train_(ClassificationData &trainingData) {

    //Clear any previous models
    clear();

    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();

    if( M == 0 ) {
        errorLog << "train_(trainingData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }

    numInputDimensions = N;
    numClasses = 2; //This is always 2 for swipe detection [1 == swipe detected, everything else means no swipe detected]
    classLabels.resize( 2 );
    classLabels[0] = 1; //Swipe
    classLabels[1] = 2; //No Swipe
    nullRejectionThresholds.resize(2,0);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ) {
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    //We currently have no way to automatically train the swipe detection, user needs to manually set thresholds, so just flag the model is trained
    trained = true;

    return true;
}
예제 #7
0
파일: ANBC.cpp 프로젝트: jdelfes/grt
bool ANBC::setWeights(const ClassificationData &weightsData){
    
    if( weightsData.getNumSamples() > 0 ){
        weightsDataSet = true;
        this->weightsData = weightsData;
        return true;
    }
    return false;
}
예제 #8
0
파일: KNNTest.cpp 프로젝트: sgrignard/grt
// Tests the learning algorithm on a basic dataset
TEST(KNN, TrainBasicDataset) {
  
  KNN knn;

  //Check the module is not trained
  EXPECT_TRUE( !knn.getTrained() );

  //Generate a basic dataset
  const UINT numSamples = 1000;
  const UINT numClasses = 10;
  const UINT numDimensions = 10;
  ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 );
  ClassificationData trainingData;
  EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) );

  ClassificationData testData = trainingData.split( 50 );

  //Train the classifier
  EXPECT_TRUE( knn.train( trainingData ) );

  EXPECT_TRUE( knn.getTrained() );

  EXPECT_TRUE( knn.print() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( knn.predict( testData[i].getSample() ) );
  }

  EXPECT_TRUE( knn.save( "knn_model.grt" ) );

  knn.clear();

  EXPECT_TRUE( !knn.getTrained() );

  EXPECT_TRUE( knn.load( "knn_model.grt" ) );

  EXPECT_TRUE( knn.getTrained() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( knn.predict( testData[i].getSample() ) );
  }


}
예제 #9
0
bool HierarchicalClustering::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
		return false;
	}

    //Convert the labelled training data to a training matrix
	M = trainingData.getNumSamples();
    N = trainingData.getNumDimensions();

    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

	return train_( data );
}
예제 #10
0
int main (int argc, const char * argv[])
{ 
    //Create a new gesture recognition pipeline
    GestureRecognitionPipeline pipeline;
    
    //Add an ANBC module
    pipeline.setClassifier( ANBC() );
    
    //Add a ClassLabelFilter as a post processing module with a minCount of 5 and a buffer size of 10
    pipeline.addPostProcessingModule( ClassLabelFilter(5,10) );
    
    //Load some training data to train and test the classifier
    ClassificationData trainingData;
    ClassificationData testData;
    
    if( !trainingData.loadDatasetFromFile("ClassLabelFilterTrainingData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    if( !testData.loadDatasetFromFile("ClassLabelFilterTestData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        cout << "Failed to train classifier!\n";
        return EXIT_FAILURE;
    }
    
    //Use the test dataset to demonstrate the output of the ClassLabelFilter    
    for(UINT i=0; i<testData.getNumSamples(); i++){
        VectorDouble inputVector = testData[i].getSample();
        
        if( !pipeline.predict( inputVector ) ){
            cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            return EXIT_FAILURE;
        }
        
        //Get the predicted class label (this will be the processed class label)
        UINT predictedClassLabel = pipeline.getPredictedClassLabel();
        
        //Get the unprocessed class label (i.e. the direct output of the classifier)
        UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel();
        
        //Also print the results to the screen
        cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl;

    }
    
    return EXIT_SUCCESS;
}
예제 #11
0
int main (int argc, const char * argv[])
{
    
    //Load the example data
    ClassificationData data;
    
    if( !data.loadDatasetFromFile("WiiAccShakeData.txt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the zero crossing counter feature extraction
    UINT searchWindowSize = 20;
    double deadZoneThreshold = 0.01;
    UINT numDimensions = data.getNumDimensions();
    UINT featureMode = ZeroCrossingCounter::INDEPENDANT_FEATURE_MODE; //This could also be ZeroCrossingCounter::COMBINED_FEATURE_MODE
    
    //Create a new instance of the ZeroCrossingCounter feature extraction
    ZeroCrossingCounter zeroCrossingCounter(searchWindowSize,deadZoneThreshold,numDimensions,featureMode);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        zeroCrossingCounter.computeFeatures( data[i].getSample() );
        
        //Write the data to the file
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorDouble featureVector = zeroCrossingCounter.getFeatureVector();
        
        //Write the features to the file
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the zero crossing counter settings to a file
    zeroCrossingCounter.saveModelToFile("ZeroCrossingCounterSettings.txt");
    
    //You can then load the settings again if you need them
    zeroCrossingCounter.loadModelFromFile("ZeroCrossingCounterSettings.txt");
    
    return EXIT_SUCCESS;
}
예제 #12
0
bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){

    const UINT M = trainingData.getNumSamples();
    const UINT N = features.getSize();
    const UINT K = classLabels.getSize();

    if( N == 0 ) return false;
    if( K == 0 ) return false;

    minError = grt_numeric_limits< Float >::max();
    Random random;
    UINT bestFeatureIndex = 0;
    Float bestThreshold = 0;
    Float error = 0;
    Vector< UINT > groupIndex(M);
    Vector< MinMax > ranges = trainingData.getRanges();
    MatrixDouble data(M,1); //This will store our temporary data for each dimension

    //Randomly select which features we want to use
    UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
    Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );

    //Loop over each random feature and try and find the best split point
    for(UINT n=0; n<numRandomFeatures; n++){

        featureIndex = features[ randomFeatures[n] ];

        //Use the data in this feature dimension to create a sum dataset
        for(UINT i=0; i<M; i++){
            data[i][0] = trainingData[i][featureIndex];
        }

        if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
            //Store the best threshold and feature index
            if( error < minError ){
                minError = error;
                bestThreshold = threshold;
                bestFeatureIndex = featureIndex;
            }
        }
     }

     //Set the best feature index that will be returned to the DecisionTree that called this function
     featureIndex = bestFeatureIndex;

     //Store the node size, feature index, best threshold and class probabilities for this node
     set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );

     return true;
}
예제 #13
0
파일: KMeans.cpp 프로젝트: BryanBo-Cao/grt
bool KMeans::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl;
		return false;
	}
	
	//Set the numClusters as the number of classes in the training data
	numClusters = trainingData.getNumClasses();

    //Convert the labelled training data to a training matrix
	UINT M = trainingData.getNumSamples();
    UINT N = trainingData.getNumDimensions();
    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

    //Run the K-Means algorithm
    return train_( data );
}
예제 #14
0
int main (int argc, const char * argv[])
{
    //Load the example data
    ClassificationData data;
    
    if( !data.load("WiiAccShakeData.grt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the MovementIndex feature extraction
    UINT windowSize = 10;
    UINT numDimensions = data.getNumDimensions();

    //Create a new instance of the MovementIndex feature extraction
    MovementIndex movementIndex(windowSize,numDimensions);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        movementIndex.computeFeatures( data[i].getSample() );
        
        //Write the data
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorFloat featureVector = movementIndex.getFeatureVector();
        
        //Write the features
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the MovementIndex settings to a file
    movementIndex.save("MovementIndexSettings.grt");
    
    //You can then load the settings again if you need them
    movementIndex.load("MovementIndexSettings.grt");
    
    return EXIT_SUCCESS;
}
예제 #15
0
파일: Softmax.cpp 프로젝트: BryanBo-Cao/grt
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << "train(ClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << std::endl;
            return false;
        }
    }
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
예제 #16
0
int main(int argc, char * argv[])
{

    if( argc < 3 ){
        errorLog << "Not enough input arguments!" << endl;
        printUsage();
        return EXIT_FAILURE;
    }

    const string inputDirectory = argv[1];
    const string outputFilename = argv[2];

    //Parse the data directory for files
    vector< string > filenames;
    infoLog << "- Parsing data directory: " << inputDirectory << endl;

    if( !Util::parseDirectory( inputDirectory, ".csv", filenames ) ){
        errorLog << "Failed to parse input directory: " << inputDirectory << endl;
        return EXIT_FAILURE; 
    }

    if( filenames.size() == 0 ){
        errorLog << "Failed to find any files in the input directory: " << inputDirectory << endl;
        return EXIT_FAILURE; 
    }

    ClassificationData data;
    unsigned int numFiles = (unsigned int)filenames.size();
    bool dataLoaded = false;
    for(unsigned int i=0; i<numFiles; i++){
        //Load the data
        infoLog << "- Loading data " << i+1 << " of " << numFiles << endl;
        
        ClassificationData tmp;
        if( tmp.load( filenames[i] ) ){
            if( i==0 ){
                data.setNumDimensions( tmp.getNumDimensions() );
            }
            dataLoaded = true;

            infoLog << "- Data loaded.  Number of samples: " << tmp.getNumSamples() << endl;

            data.merge( tmp );
        }else{
            warningLog << "- Failed to load data!" << endl;
        }
    }

    if( dataLoaded ){

        infoLog << "- Merged data to generate new dataset with " << data.getNumSamples() << " samples" << endl;

        //Save the new datasets
        infoLog << "- Saving main dataset to file: " << outputFilename << endl;
        if( !data.save( outputFilename ) ){
            errorLog << "Failed to save output data: " << outputFilename << endl;
            return EXIT_FAILURE; 
        }

    }else{
        warningLog << "- Failed to load any data!" << endl;
        return EXIT_FAILURE; 
    } 

    return EXIT_SUCCESS;
}
예제 #17
0
int main (int argc, const char * argv[])
{
    
    //Create a new Softmax instance
    Softmax softmax;
    
    //Load some training data to train the classifier
    ClassificationData trainingData;
    
    if( !trainingData.loadDatasetFromFile("SoftmaxTrainingData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    //Use 20% of the training dataset to create a test dataset
    ClassificationData testData = trainingData.partition( 80 );
    
    //Train the classifier
    if( !softmax.train( trainingData ) ){
        cout << "Failed to train classifier!\n";
        return EXIT_FAILURE;
    }
    
    //Save the Softmax model to a file
    if( !softmax.saveModelToFile("SoftmaxModel.txt") ){
        cout << "Failed to save the classifier model!\n";
        return EXIT_FAILURE;
    }
    
    //Load the Softmax model from a file
    if( !softmax.loadModelFromFile("SoftmaxModel.txt") ){
        cout << "Failed to load the classifier model!\n";
        return EXIT_FAILURE;
    }
    
    //Use the test dataset to test the softmax model
    double accuracy = 0;
    for(UINT i=0; i<testData.getNumSamples(); i++){
        //Get the i'th test sample
        UINT classLabel = testData[i].getClassLabel();
        vector< double > inputVector = testData[i].getSample();
        
        //Perform a prediction using the classifier
        if( !softmax.predict( inputVector ) ){
            cout << "Failed to perform prediction for test sample: " << i <<"\n";
            return EXIT_FAILURE;
        }
        
        //Get the predicted class label
        UINT predictedClassLabel = softmax.getPredictedClassLabel();
        vector< double > classLikelihoods = softmax.getClassLikelihoods();
        vector< double > classDistances = softmax.getClassDistances();
        
        //Update the accuracy
        if( classLabel == predictedClassLabel ) accuracy++;
        
        cout << "TestSample: " << i <<  " ClassLabel: " << classLabel << " PredictedClassLabel: " << predictedClassLabel << endl;
    }
    
    cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl;
    
    return EXIT_SUCCESS;
}
예제 #18
0
bool DecisionTreeClusterNode::computeError( const ClassificationData &trainingData, MatrixFloat &data, const Vector< UINT > &classLabels, Vector< MinMax > ranges, Vector< UINT > groupIndex, const UINT featureIndex, Float &threshold, Float &error ){

    error = 0;
    threshold = 0;

    const UINT M = trainingData.getNumSamples();
    const UINT K = (UINT)classLabels.size();

    Float giniIndexL = 0;
    Float giniIndexR = 0;
    Float weightL = 0;
    Float weightR = 0;
    VectorFloat groupCounter(2,0);
    MatrixFloat classProbabilities(K,2);

    //Use this data to train a KMeans cluster with 2 clusters
    KMeans kmeans;
    kmeans.setNumClusters( 2 );
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-5 );
    kmeans.setMinNumEpochs( 1 );
    kmeans.setMaxNumEpochs( 100 );

    //Disable the logging to clean things up
    kmeans.setTrainingLoggingEnabled( false );

    if( !kmeans.train_( data ) ){
        errorLog << __GRT_LOG__ << " Failed to train KMeans model for feature: " << featureIndex << std::endl;
        return false;
    }

    //Set the split threshold as the mid point between the two clusters
    const MatrixFloat &clusters = kmeans.getClusters();
    threshold = 0;
    for(UINT i=0; i<clusters.getNumRows(); i++){
        threshold += clusters[i][0];
    }
    threshold /= clusters.getNumRows();

    //Iterate over each sample and work out if it should be in the lhs (0) or rhs (1) group based on the current threshold
    groupCounter[0] = groupCounter[1] = 0;
    classProbabilities.setAllValues(0);
    for(UINT i=0; i<M; i++){
        groupIndex[i] = trainingData[ i ][ featureIndex ] >= threshold ? 1 : 0;
        groupCounter[ groupIndex[i] ]++;
        classProbabilities[ getClassLabelIndexValue(trainingData[i].getClassLabel(),classLabels) ][ groupIndex[i] ]++;
    }

    //Compute the class probabilities for the lhs group and rhs group
    for(UINT k=0; k<K; k++){
        classProbabilities[k][0] = groupCounter[0]>0 ? classProbabilities[k][0]/groupCounter[0] : 0;
        classProbabilities[k][1] = groupCounter[1]>0 ? classProbabilities[k][1]/groupCounter[1] : 0;
    }

    //Compute the Gini index for the lhs and rhs groups
    giniIndexL = giniIndexR = 0;
    for(UINT k=0; k<K; k++){
        giniIndexL += classProbabilities[k][0] * (1.0-classProbabilities[k][0]);
        giniIndexR += classProbabilities[k][1] * (1.0-classProbabilities[k][1]);
    }
    weightL = groupCounter[0]/M;
    weightR = groupCounter[1]/M;
    error = (giniIndexL*weightL) + (giniIndexR*weightR);

    return true;
}
예제 #19
0
파일: MinDist.cpp 프로젝트: ios4u/grt
bool MinDist::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    nullRejectionThresholds.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].setGamma( nullRejectionCoeff );
        if( !models[k].train(classLabel,data,numClusters) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl;
            models.clear();
            return false;
        }
        
        //Set the null rejection threshold
        nullRejectionThresholds[k] = models[k].getRejectionThreshold();
        
    }
    
    trained = true;
    return true;
}
int main (int argc, const char * argv[])
{
    //Create a new AdaBoost instance
    AdaBoost adaBoost;

    //Set the weak classifier you want to use
    adaBoost.setWeakClassifier( DecisionStump() );

    //Load some training data to train the classifier
    ClassificationData trainingData;

    if( !trainingData.load("AdaBoostTrainingData.grt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }

    //Use 20% of the training dataset to create a test dataset
    ClassificationData testData = trainingData.partition( 80 );

    //Train the classifier
    if( !adaBoost.train( trainingData ) ){
        cout << "Failed to train classifier!\n";
        return EXIT_FAILURE;
    }

    //Save the model to a file
    if( !adaBoost.save("AdaBoostModel.grt") ){
        cout << "Failed to save the classifier model!\n";
        return EXIT_FAILURE;
    }

    //Load the model from a file
    if( !adaBoost.load("AdaBoostModel.grt") ){
        cout << "Failed to load the classifier model!\n";
        return EXIT_FAILURE;
    }

    //Use the test dataset to test the AdaBoost model
    double accuracy = 0;
    for(UINT i=0; i<testData.getNumSamples(); i++){
        //Get the i'th test sample
        UINT classLabel = testData[i].getClassLabel();
        vector< double > inputVector = testData[i].getSample();

        //Perform a prediction using the classifier
        if( !adaBoost.predict( inputVector ) ){
            cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            return EXIT_FAILURE;
        }

        //Get the predicted class label
        UINT predictedClassLabel = adaBoost.getPredictedClassLabel();
        double maximumLikelhood = adaBoost.getMaximumLikelihood();
        vector< double > classLikelihoods = adaBoost.getClassLikelihoods();
        vector< double > classDistances = adaBoost.getClassDistances();

        //Update the accuracy
        if( classLabel == predictedClassLabel ) accuracy++;

        cout << "TestSample: " << i <<  " ClassLabel: " << classLabel;
        cout << " PredictedClassLabel: " << predictedClassLabel << " Likelihood: " << maximumLikelhood;
        cout << endl;
    }

    cout << "Test Accuracy: " << accuracy/double(testData.getNumSamples())*100.0 << "%" << endl;

    return EXIT_SUCCESS;
}
void prediction_axis_data(){
    
    // Training and test data
    ClassificationData trainingData;
    ClassificationData testData;
    
    string file_path = "../../../data/";
    string class_name = "5";
    
    if( !trainingData.loadDatasetFromFile(file_path +  "train/grt/" + class_name + ".txt") ){
        std::cout <<"Failed to load training data!\n";
    }
    
    if( !testData.loadDatasetFromFile(file_path +  "test/grt/" + class_name + ".txt") ){
        std::cout <<"Failed to load training data!\n";
    }
    
    
    // Pipeline setup
    ANBC anbc;
    anbc.setNullRejectionCoeff(1);
    anbc.enableScaling(true);
    anbc.enableNullRejection(true);
    
    GestureRecognitionPipeline pipeline;
    pipeline.setClassifier(anbc);
    
    
    // Train the pipeline
    if( !pipeline.train( trainingData ) ){
        std::cout << "Failed to train classifier!\n";
    }
    
    
    // File stream
    ofstream outputFileStream(class_name + ".csv");
    
    
    // Evaluation
    double accuracy = 0;
    
    outputFileStream << "actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ \n";
    
    for(UINT i=0; i<testData.getNumSamples(); i++){
        
        UINT actualClassLabel = testData[i].getClassLabel();
        vector< double > inputVector = testData[i].getSample();
        
        if( !pipeline.predict( inputVector )){
            std::cout << "Failed to perform prediction for test sampel: " << i <<"\n";
        }
        
        UINT predictedClassLabel = pipeline.getPredictedClassLabel();
        double maximumLikelihood = pipeline.getMaximumLikelihood();
        
        outputFileStream << actualClassLabel << "," << predictedClassLabel << "," << maximumLikelihood << ","
        << inputVector[0] << "," << inputVector[1] << ","  << inputVector[2] << ","  << inputVector[3] << ","  << inputVector[4] << ","  << inputVector[5] << "\n";
        
        if( actualClassLabel == predictedClassLabel) accuracy++;
        
    }
    
    std::cout << "Test Accuracy testHandsUp : " << accuracy/double(testData.getNumSamples())*100.0 << " %\n";
    
}
예제 #22
0
파일: Softmax.cpp 프로젝트: BryanBo-Cao/grt
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    UINT N = data.getNumDimensions();
    UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    Vector< UINT > randomTrainingOrder(M);
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            error = y[i] - model.compute( data[i].getSample() );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                model.w[j] += learningRate  * error * data[i][j];
            }
            model.w0 += learningRate  * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
    }
    
    return true;
}
예제 #23
0
bool AdaBoost::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() <= 1 ){
        errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples()  << endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    const UINT M = trainingData.getNumSamples();
    const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL;
    const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL;
    double alpha = 0;
    const double beta = 0.001;
    double epsilon = 0;
    TrainingResult trainingResult;
    
    const UINT K = (UINT)weakClassifiers.size();
    if( K == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl;
        return false;
    }

    classLabels.resize(numClasses);
    models.resize(numClasses);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ){
        trainingData.scale(ranges,0,1);
    }
    
    //Create the weights vector
    VectorDouble weights(M);
    
    //Create the error matrix
    MatrixDouble errorMatrix(K,M);
    
    for(UINT classIter=0; classIter<numClasses; classIter++){
        
        //Get the class label for the current class
        classLabels[classIter] = trainingData.getClassLabels()[classIter];
        
        //Set the class label of the current model
        models[ classIter ].setClassLabel( classLabels[classIter] );
        
        //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2
        ClassificationData classData;
        classData.setNumDimensions(trainingData.getNumDimensions());
        for(UINT i=0; i<M; i++){
            UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL;
            VectorDouble trainingSample = trainingData[i].getSample();
            classData.addSample(label,trainingSample);
        }
        
        //Setup the initial training sample weights
        std::fill(weights.begin(),weights.end(),1.0/M);
        
        //Run the boosting loop
        bool keepBoosting = true;
        UINT t = 0;
        
        while( keepBoosting ){
            
            //Pick the classifier from the family of classifiers that minimizes the total error
            UINT bestClassifierIndex = 0;
            double minError = numeric_limits<double>::max();
            for(UINT k=0; k<K; k++){
                //Get the k'th possible classifier
                WeakClassifier *weakLearner = weakClassifiers[k];
                
                //Train the current classifier
                if( !weakLearner->train(classData,weights) ){
                    errorLog << "Failed to train weakLearner!" << endl;
                    return false;
                }
                
                //Compute the weighted error for this clasifier
                double e = 0;
                double positiveLabel = weakLearner->getPositiveClassLabel();
                double numCorrect = 0;
                double numIncorrect = 0;
                for(UINT i=0; i<M; i++){
                    //Only penalize errors
                    double prediction = weakLearner->predict( classData[i].getSample() );
                    
                    if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) ||        //False positive
                        (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){       //False negative
                        e += weights[i]; //Increase the error proportional to the weight of the example
                        errorMatrix[k][i] = 1; //Flag that there was an error
                        numIncorrect++;
                    }else{
                        errorMatrix[k][i] = 0; //Flag that there was no error
                        numCorrect++;
                    }
                }
                
                trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl;
                
                if( e < minError ){
                    minError = e;
                    bestClassifierIndex = k;
                }
                
            }
  
            epsilon = minError;
            
            //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier
            alpha = 0.5 * log( (1.0-epsilon)/epsilon );
            
            trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl;
            
            if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; }
            if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; }
            if( ++t >= numBoostingIterations ) keepBoosting = false;

            trainingResult.setClassificationResult(t, minError, this);
            trainingResults.push_back(trainingResult);
            trainingResultsObserverManager.notifyObservers( trainingResult );
            
            if( keepBoosting ){
                
                //Add the best weak classifier to the committee
                models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                
                //Update the weights for the next boosting iteration
                double reWeight = (1.0 - epsilon) / epsilon;
                double oldSum = 0;
                double newSum = 0;
                for(UINT i=0; i<M; i++){
                    oldSum += weights[i];
                    //Only update the weights that resulted in an incorrect prediction
                    if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight;
                    newSum += weights[i];
                }
                
                //Normalize all the weights
                //This results to increasing the weights of the samples that were incorrectly labelled
                //While decreasing the weights of the samples that were correctly classified
                reWeight = oldSum/newSum;
                for(UINT i=0; i<M; i++){
                    weights[i] *= reWeight;
                }
                
            }else{
                trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl;
                if( t-1 == 0 ){
                    //Add the best weak classifier to the committee (we have to add it as this is the first iteration)
                    if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct
                    models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                }
            }
            
        }
    }
    
    //Normalize the weights
    for(UINT k=0; k<numClasses; k++){
        models[k].normalizeWeights();
    }
    
    //Flag that the model has been trained
    trained = true;
    
    //Setup the data for prediction
    predictedClassLabel = 0;
    maxLikelihood = 0;
    classLikelihoods.resize(numClasses);
    classDistances.resize(numClasses);
    
    return true;
}
예제 #24
0
파일: ANBC.cpp 프로젝트: jdelfes/grt
bool ANBC::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( weightsDataSet ){
        if( weightsData.getNumDimensions() != N ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - The number of dimensions in the weights data (" << weightsData.getNumDimensions() << ") is not equal to the number of dimensions of the training data (" << N << ")" << endl;
            return false;
        }
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get the weights for this class
        VectorDouble weights(numInputDimensions);
        if( weightsDataSet ){
            bool weightsFound = false;
            for(UINT i=0; i<weightsData.getNumSamples(); i++){
                if( weightsData[i].getClassLabel() == classLabel ){
                    weights = weightsData[i].getSample();
                    weightsFound = true;
                    break;
                }
            }
            
            if( !weightsFound ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to find the weights for class " << classLabel << endl;
                return false;
            }
        }else{
            //If the weights data has not been set then all the weights are 1
            for(UINT j=0; j<numInputDimensions; j++) weights[j] = 1.0;
        }
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].gamma = nullRejectionCoeff;
        if( !models[k].train(classLabel,data,weights) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel << endl;
            
            //Try and work out why the training failed
            if( models[k].N == 0 ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - N == 0!" << endl;
                models.clear();
                return false;
            }
            for(UINT j=0; j<numInputDimensions; j++){
                if( models[k].mu[j] == 0 ){
                    errorLog << "train_(ClassificationData &labelledTrainingData) - The mean of column " << j+1 << " is zero! Check the training data" << endl;
                    models.clear();
                    return false;
                }
            }
            models.clear();
            return false;
        }
        
    }
    
    //Store the null rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++) {
        nullRejectionThresholds[k] = models[k].threshold;
    }
    
    //Flag that the models have been trained
    trained = true;
    return trained;
    
}
예제 #25
0
파일: Softmax.cpp 프로젝트: nickgillian/grt
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    const UINT N = data.getNumDimensions();
    const UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    VectorFloat batchMean(N);
    Vector< UINT > randomTrainingOrder(M);
    Vector< VectorFloat > batchData(batchSize,VectorFloat(N));
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Clear any previous training results
    trainingResults.clear();
    trainingResults.reserve( maxNumEpochs );
    TrainingResult epochResult;

    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){

        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        UINT m=0;
        while( m < M ){
          //Get the batch data for this update
          UINT roundSize = m+batchSize < M ? batchSize : M-m;
          batchMean.fill(0.0);
          for(UINT i=0; i<roundSize; i++){
            for(UINT j=0; j<N; j++){
              batchData[i][j] = data[ randomTrainingOrder[m+i] ][j];
              batchMean[j] += batchData[i][j];
            }
          }

          for(UINT j=0; j<N; j++) batchMean[j] /= roundSize;

          //Compute the error on this batch, given the current weights
          error = 0.0;
          for(UINT i=0; i<roundSize; i++){
            error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] );
          }
          error /= roundSize;
          errorSum += error;

          //Update the weights
          for(UINT j=0; j<N; j++){
            model.w[j] += learningRate  * error * batchMean[j];
          }
          model.w0 += learningRate  * error;

          m += roundSize;
        }

        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;

        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
        epochResult.setClassificationResult( iter, errorSum, this );
        trainingResults.push_back( epochResult );
    }
    
    return true;
}
void metrics_subset_data(){
    
    
    ANBC anbc;
    anbc.enableScaling(true);
    anbc.enableNullRejection(true);
    
    MinDist minDist;
    minDist.setNumClusters(4);
    minDist.enableScaling(true);
    minDist.enableNullRejection(true);
    
    //    ofstream opRecall("anbc-recall-nr-0-10.csv");
    //    opRecall <<"nrCoeff,class0,class1,class2,class3,class4,class5\n";
    //
    //    ofstream opInstanceRes("anbc-prediction-nr-2.csv");
    //    opInstanceRes <<"actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ\n";
    //
    //    ofstream opMetrics("anbc-precision-recall-fmeasure-nr-2.csv");
    //    opMetrics <<"class1,class2,class3,class4,class5\n";
    //
    //    ofstream opConfusion("anbc-confusion-nr-2.csv");
    //    opConfusion <<"class0,class1,class2,class3,class4,class5\n";
    
    
    ofstream opRecall("mindist-recall-nr-0-10.csv");
    opRecall <<"nrCoeff,class0,class1,class2,class3,class4,class5\n";
    
    ofstream opInstanceRes("mindist-prediction-nr-2.csv");
    opInstanceRes <<"actualClass,predictedClass,maximumLikelihood,lZ,lY,lZ,rZ,rY,rZ\n";
    
    ofstream opMetrics("mindist-precision-recall-fmeasure-nr-2.csv");
    opMetrics <<"class1,class2,class3,class4,class5\n";
    
    ofstream opConfusion("mindist-confusion-nr-2.csv");
    opConfusion <<"class0,class1,class2,class3,class4,class5\n";
    
    // Training and test data
    ClassificationData trainingData;
    ClassificationData testData;
    ClassificationData nullGestureData;
    
    string file_path = "../../../data/";
    
    if( !trainingData.loadDatasetFromFile(file_path +  "train/grt/hri-training-dataset.txt") ){
        std::cout <<"Failed to load training data!\n";
    }
    
    if( !nullGestureData.loadDatasetFromFile(file_path +  "test/grt/0.txt") ){
        std::cout <<"Failed to load null gesture data!\n";
    }
    
    
    testData = trainingData.partition(90);
    testData.sortClassLabels();
//    testData.saveDatasetToFile("anbc-validation-subset.txt");
    testData.saveDatasetToFile("mindist-validation-subset.txt");
    
    
    for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){
        
        //        anbc.setNullRejectionCoeff(nullRejectionCoeff);
        //        GestureRecognitionPipeline pipeline;
        //        pipeline.setClassifier(anbc);
        
        minDist.setNullRejectionCoeff(nullRejectionCoeff);
        GestureRecognitionPipeline pipeline;
        pipeline.setClassifier(minDist);
        
        pipeline.train(trainingData);
        
        pipeline.test(testData);
        TestResult testRes = pipeline.getTestResults();
        
        opRecall << nullRejectionCoeff << ",";
        
        
        //null rejection prediction
        double accuracy = 0;
        for(UINT i=0; i<nullGestureData.getNumSamples(); i++){
            
            vector< double > inputVector = nullGestureData[i].getSample();
            
            if( !pipeline.predict( inputVector )){
                std::cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            }
            
            UINT predictedClassLabel = pipeline.getPredictedClassLabel();
            if(predictedClassLabel == 0 ) accuracy++;
        }
        
        opRecall << accuracy/double(nullGestureData.getNumSamples()) << ",";
        
        
        // other classes prediction
        for(int cl = 0; cl < testRes.recall.size(); cl++ ){
            opRecall << testRes.recall[cl];
            if(cl < testRes.recall.size() - 1){
                opRecall << ",";
            }
        }
        
        opRecall<< endl;
        
        
        // Calculate instance prediction, precision, recall, fmeasure and confusion matrix for nullRejection 2.0
        if(AreDoubleSame(nullRejectionCoeff, 2.0))
        {
            //instance prediction
            for(UINT i=0; i<testData.getNumSamples(); i++){
                
                UINT actualClassLabel = testData[i].getClassLabel();
                vector< double > inputVector = testData[i].getSample();
                
                if( !pipeline.predict( inputVector )){
                    std::cout << "Failed to perform prediction for test sampel: " << i <<"\n";
                }
                
                UINT predictedClassLabel = pipeline.getPredictedClassLabel();
                double maximumLikelihood = pipeline.getMaximumLikelihood();
                
                opInstanceRes << actualClassLabel << "," << predictedClassLabel << "," << maximumLikelihood << ","
                << inputVector[0] << "," << inputVector[1] << ","  << inputVector[2] << ","  << inputVector[3] << ","  << inputVector[4] << ","  << inputVector[5] << "\n";
                
            }
            
            //precision, recall, fmeasure
            for(int cl = 0; cl < testRes.precision.size(); cl++ ){
                opMetrics << testRes.precision[cl];
                if(cl < testRes.precision.size() - 1){
                    opMetrics << ",";
                }
            }
            opMetrics<< endl;
            
            for(int cl = 0; cl < testRes.recall.size(); cl++ ){
                opMetrics << testRes.recall[cl];
                
                if(cl < testRes.recall.size() - 1){
                    opMetrics << ",";
                }
            }
            opMetrics<< endl;
            
            for(int cl = 0; cl < testRes.fMeasure.size(); cl++ ){
                opMetrics << testRes.fMeasure[cl];
                
                if(cl < testRes.fMeasure.size() - 1){
                    opMetrics << ",";
                }
            }
            opMetrics<< endl;
            
            //confusion matrix
            MatrixDouble matrix = testRes.confusionMatrix;
            for(UINT i=0; i<matrix.getNumRows(); i++){
                for(UINT j=0; j<matrix.getNumCols(); j++){
                    opConfusion << matrix[i][j];
                    
                    if(j < matrix.getNumCols() - 1){
                        opConfusion << ",";
                    }
                    
                }
                opConfusion << endl;
            }
            opConfusion << endl;
            
        }
        
        
        
    }
    
    cout << "Done\n";
}
예제 #27
0
파일: Softmax.cpp 프로젝트: nickgillian/grt
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = K;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    ClassificationData validationData;
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationData = trainingData.split( 100-validationSetSize );
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabels[k] << std::endl;
                return false;
        }
    }

    //Flag that the models have been trained
    trained = true;
    converged = true;

    //Compute the final training stats
    trainingSetAccuracy = 0;
    validationSetAccuracy = 0;

    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
    bool scalingState = useScaling;
    useScaling = false;
    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
        trained = false;
        converged = false;
        errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
        return false;
    }
    
    if( useValidationSet ){
        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
            trained = false;
            converged = false;
            errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
            return false;
        }
    }

    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;

    if( useValidationSet ){
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
    }

    //Reset the scaling state for future prediction
    useScaling = scalingState;

    return trained;
}
void metrics_separate_data(){
    
    // Training and test data
    ClassificationData trainingData;
    ClassificationData testData;
    
    string file_path = "../../../data/";
    
    if( !trainingData.loadDatasetFromFile(file_path +  "train/grt/12345.txt") ){
        std::cout <<"Failed to load training data!\n";
    }
    
    ANBC anbc;
    anbc.enableScaling(true);
    anbc.enableNullRejection(true);
    
    SVM svm(SVM::RBF_KERNEL);
    svm.enableScaling(true);
    svm.enableNullRejection(true);
    
    MinDist minDist;
    minDist.setNumClusters(4);
    minDist.enableScaling(true);
    minDist.enableNullRejection(true);
    
    ofstream outputFileStream("accuracy-mindist.csv");
    outputFileStream << "classLabel,nullRejectionCoeff,accuracy, \n";
    
    
    for(int class_name = 1; class_name<=5; class_name++){
        
        if( !testData.loadDatasetFromFile(file_path +  "test/grt/" + to_string(class_name)  + ".txt") ){
            std::cout <<"Failed to load training data!\n";
        }
        
        
        
        for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){
            //            anbc.setNullRejectionCoeff(nullRejectionCoeff);
            //            svm.setNullRejectionCoeff(nullRejectionCoeff);
            minDist.setNullRejectionCoeff(nullRejectionCoeff);
            
            GestureRecognitionPipeline pipeline;
            //            pipeline.setClassifier(anbc);
            //            pipeline.setClassifier(svm);
            pipeline.setClassifier(minDist);
            
            
            // Train the pipeline
            if( !pipeline.train( trainingData ) ){
                std::cout << "Failed to train classifier!\n";
            }
            
            
            // Evaluation
            double accuracy = 0;
            for(UINT i=0; i<testData.getNumSamples(); i++){
                
                UINT actualClassLabel = testData[i].getClassLabel();
                vector< double > inputVector = testData[i].getSample();
                
                if( !pipeline.predict( inputVector )){
                    std::cout << "Failed to perform prediction for test sampel: " << i <<"\n";
                }
                
                UINT predictedClassLabel = pipeline.getPredictedClassLabel();
                if( actualClassLabel == predictedClassLabel) accuracy++;
            }
            
            outputFileStream << class_name << ',' << nullRejectionCoeff << ',' << accuracy/double(testData.getNumSamples())*100.0 << '\n';
            
            cout<< "Done" << endl;
        }
        
        
    }
    
    
    //---------------------- Null Gesture Test -----------------//
    int class_name = 0;
    
    if( !testData.loadDatasetFromFile(file_path +  "test/grt/" + to_string(class_name)  + ".txt") ){
        std::cout <<"Failed to load training data!\n";
    }
    
    
    for(double nullRejectionCoeff = 0; nullRejectionCoeff <= 10; nullRejectionCoeff=nullRejectionCoeff+0.2){
        //            anbc.setNullRejectionCoeff(nullRejectionCoeff);
        //            svm.setNullRejectionCoeff(nullRejectionCoeff);
        minDist.setNullRejectionCoeff(nullRejectionCoeff);
        
        GestureRecognitionPipeline pipeline;
        //            pipeline.setClassifier(anbc);
        //            pipeline.setClassifier(svm);
        pipeline.setClassifier(minDist);
        
        
        // Train the pipeline
        if( !pipeline.train( trainingData ) ){
            std::cout << "Failed to train classifier!\n";
        }
        
        
        // Evaluation
        double accuracy = 0;
        for(UINT i=0; i<testData.getNumSamples(); i++){
            
            vector< double > inputVector = testData[i].getSample();
            
            if( !pipeline.predict( inputVector )){
                std::cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            }
            
            UINT predictedClassLabel = pipeline.getPredictedClassLabel();
            if(predictedClassLabel == 0 ) accuracy++;
        }
        
        outputFileStream << class_name << ',' << nullRejectionCoeff << ',' << accuracy/double(testData.getNumSamples())*100.0 << '\n';
        
        cout<< "Done" << endl;
        
        
    }
    
}
예제 #29
0
파일: LDA.cpp 프로젝트: CV-IP/grt
bool LDA::train(ClassificationData trainingData){
    
    errorLog << "SORRY - this module is still under development and can't be used yet!" << std::endl;
    return false;
    
    //Reset any previous model
    numInputDimensions = 0;
    numClasses = 0;
    models.clear();
    classLabels.clear();
    trained = false;
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledClassificationData trainingData) - There is no training data to train the model!" << std::endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();

	//Calculate the between scatter matrix
	MatrixFloat SB = computeBetweenClassScatterMatrix( trainingData );
	
	//Calculate the within scatter matrix
	MatrixFloat SW = computeWithinClassScatterMatrix( trainingData );


   /*

    
    //Counters and stat containers
    vector< UINT > groupLabels(numClasses);
    VectorDouble groupCounters(numClasses);
    VectorDouble priorProb(numClasses);
    MatrixFloat groupMeans(numClasses,numFeatures);
    MatrixFloat pCov(numFeatures,numFeatures);
    MatrixFloat pCovInv(numFeatures,numFeatures);
    MatrixFloat modelCoeff(numClasses,numFeatures+1);
    
    pCov.setAllValues(0);
    modelCoeff.setAllValues(0);
    
    //Set the class labels and counters
    for(UINT k=0; k<numClasses; k++){
        groupLabels[k] = trainingData.getClassTracker()[k].classLabel;
        groupCounters[k] = trainingData.getClassTracker()[k].counter;
    }
    
    //Loop over the classes to compute the group stats
    for(UINT k=0; k<numClasses; k++){
        LabelledClassificationData classData = trainingData.getClassData( groupLabels[k] );
        MatrixFloat cov(numFeatures,numFeatures);
        
        //Compute class mu
        for(UINT j=0; j<numFeatures; j++){
            groupMeans[k][j] = 0;
            for(UINT i=0; i<classData.getNumSamples(); i++){
                groupMeans[k][j] += classData[i][j];
            }
            groupMeans[k][j] /= Float(classData.getNumSamples());
        }
        
        //Compute the class covariance
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                cov[m][n] = 0;
                for(UINT i=0; i<classData.getNumSamples(); i++){
                    cov[m][n] += (classData[i][m]-groupMeans[k][m]) * (classData[i][n]-groupMeans[k][n]);
                }
                cov[m][n] /= Float(classData.getNumSamples()-1);
            }
        }
        
        debugLog << "Group Cov:\n";
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                debugLog << cov[m][n] << "\t";
            }debugLog << "\n";
        }debugLog << std::endl;
        
        //Set the prior probability for this class (which is just 1/numClasses)
        priorProb[k] = 1.0/Float(numClasses);
        
        //Update the main covariance matrix
        Float weight = ((classData.getNumSamples() - 1) / Float(trainingData.getNumSamples() - numClasses) );
        debugLog << "Weight: " << weight << std::endl;
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                pCov[m][n] += weight * cov[m][n];
            }
        }
    }
    
    for(UINT k=0; k<numClasses; k++){
        debugLog << "GroupMu: " << groupLabels[k] << "\t";
        for(UINT j=0; j<numFeatures; j++){
            debugLog << groupMeans[k][j] << "\t";
        }debugLog << std::endl;
    }
    
    debugLog << "pCov:\n";
    for(UINT m=0; m<numFeatures; m++){
        for(UINT n=0; n<numFeatures; n++){
            debugLog << pCov[m][n] << "\t";
        }debugLog << "\n";
    }debugLog << std::endl;
    
    //Invert the pCov matrix
    LUDecomposition matrixInverter(pCov);
    if( !matrixInverter.inverse(pCovInv) ){
        errorLog << "Failed to invert pCov Matrix!" << std::endl;
        return false;
    }
    
    //Loop over classes to calculate linear discriminant coefficients
    Float sum = 0;
    vector< Float > temp(numFeatures);
    for(UINT k=0; k<numClasses; k++){
        //Compute the temporary vector
        for(UINT j=0; j<numFeatures; j++){
            temp[j] = 0;
            for(UINT m=0; m<numFeatures; m++){
                    temp[j] += groupMeans[k][m] * pCovInv[m][j];
            }
        }
        
        //Compute the model coefficients
        sum = 0;
        for(UINT j=0; j<numFeatures; j++){
            sum += temp[j]*groupMeans[k][j];
        }
        modelCoeff[k][0] = -0.5 * sum + log( priorProb[k] );
        
        for(UINT j=0; j<numFeatures; j++){
            modelCoeff[k][j+1] = temp[j];
        }
    }
    
    //Setup the models for realtime prediction
    models.resize(numClasses);
    classLabels.resize(numClasses);
    
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = groupLabels[k];
        models[k].classLabel = groupLabels[k];
        models[k].priorProb = priorProb[k];
        models[k].weights = modelCoeff.getRowVector(k);
    }
    
    //Flag that the models were successfully trained
    trained = true;
    */
    
    return true;
}
예제 #30
0
bool RandomForests::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }

    if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
        errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels = trainingData.getClassLabels();
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Flag that the main algorithm has been trained encase we need to trigger any callbacks
    trained = true;
    
    //Train the random forest
    forest.reserve( forestSize );
    for(UINT i=0; i<forestSize; i++){
        
        //Get a balanced bootstrapped dataset
        UINT datasetSize = (UINT)(trainingData.getNumSamples() * bootstrappedDatasetWeight);
        ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );
 
        DecisionTree tree;
        tree.setDecisionTreeNode( *decisionTreeNode );
        tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
        tree.setTrainingMode( trainingMode );
        tree.setNumSplittingSteps( numRandomSplits );
        tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
        tree.setMaxDepth( maxDepth );
        tree.enableNullRejection( useNullRejection );
        tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt );

        trainingLog << "Training forest " << i+1 << "/" << forestSize << "..." << endl;
        
        //Train this tree
        if( !tree.train( data ) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train tree at forest index: " << i << endl;
            clear();
            return false;
        }
        
        //Deep copy the tree into the forest
        forest.push_back( tree.deepCopyTree() );
    }

    return true;
}