Example #1
0
bool SwipeDetector::train_(ClassificationData &trainingData) {

    //Clear any previous models
    clear();

    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();

    if( M == 0 ) {
        errorLog << "train_(trainingData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }

    numInputDimensions = N;
    numClasses = 2; //This is always 2 for swipe detection [1 == swipe detected, everything else means no swipe detected]
    classLabels.resize( 2 );
    classLabels[0] = 1; //Swipe
    classLabels[1] = 2; //No Swipe
    nullRejectionThresholds.resize(2,0);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ) {
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    //We currently have no way to automatically train the swipe detection, user needs to manually set thresholds, so just flag the model is trained
    trained = true;

    return true;
}
int main(void){
	cout << "ClassificationData Test" << endl;
	ClassificationData cdata;

	// load data file that in Nick Gillian Format
	if(cdata.loadDatasetFromFile("irisNG.txt")){
	 	cout << "error loading csv file" << endl;
	}

	cdata.printStats();

	cout << "convert dataset to csv" << endl;

	//convert it to CSV. the first column indicate the class
	cdata.saveDatasetToCSVFile("irisCSVFromNG.txt");

	//obviously we can load the data from CSV that we generated
	//note that class names are now lost
	cdata.loadDatasetFromCSVFile("irisCSVFromNG.txt");

	cdata.printStats();

	//try to load a CSV file that includes strings
	//cdata.loadDatasetFromCSVFile("irisCSV.txt", 4);
	//commented out because we get error while loading

	//load CSV file without strings but the classes are stored is the 5th column 
	cdata.loadDatasetFromCSVFile("irisCSVNoText.txt", 4);
	cdata.printStats();

	cdata.loadDatasetFromCSVFile("TestCSV.txt");
	cdata.printStats();

	return 0;
}
bool SVM::convertClassificationDataToLIBSVMFormat(ClassificationData &trainingData){
    
    //clear any previous problems
    deleteProblemSet();
    
    const UINT numTrainingExamples = trainingData.getNumSamples();
    numInputDimensions = trainingData.getNumDimensions();
    
    //Compute the ranges encase the data should be scaled
    ranges = trainingData.getRanges();
    
    //Init the memory
    prob.l = numTrainingExamples;
    prob.x = new svm_node*[numTrainingExamples];
    prob.y = new double[numTrainingExamples];
    problemSet = true;
    
    for(UINT i=0; i<numTrainingExamples; i++){
        //Set the class ID
        prob.y[i] = trainingData[i].getClassLabel();
        
        //Assign the memory for this training example, note that a dummy node is needed at the end of the vector
        prob.x[i] = new svm_node[numInputDimensions+1];
        for(UINT j=0; j<numInputDimensions; j++){
            prob.x[i][j].index = j+1;
            prob.x[i][j].value = trainingData[i].getSample()[j];
        }
        prob.x[i][numInputDimensions].index = -1; //Assign the final node value
        prob.x[i][numInputDimensions].value = 0;
    }
    
    return true;
}
Example #4
0
void Forest::RefineLeafNodes(ClassificationData& data, int verbosityLevel)
{
  // reset label distributions of all leaf nodes in the forest
  for(int t=0; t < nTrees; ++t)
	trees[t].ClearLeafNodes();	  
  
  // refine for each label
  for(unsigned int i=0; i<labels.size(); i++)
  {
	int nPoints = 0;
	
	// load training data in chunks
	while((nPoints = data.LoadChunkForLabel(labels[i], MAX_DATAPOINTS_TO_LOAD)) > 0)
	{
	  #pragma omp parallel
	  {		  
		#pragma omp for nowait
		for(int t=0; t < nTrees; ++t)
		{
		  trees[t].RefineLeafNodes(data, nPoints, i);	  
		}
	  }
	}
  }
  
  // normalize distributions (account for inbalanced amount of available data per label)
  for(int t=0; t < nTrees; ++t)
  {
	trees[t].UpdateLeafNodes(labels, data.GetCountPerLabel());
  }
}
Example #5
0
bool ClassificationData::merge(const ClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();
    
    //Reserve the memory
    reserve( getNumSamples() + labelledData.getNumSamples() );

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    //Sort the class labels
    sortClassLabels();

    return true;
}
int main (int argc, const char * argv[])
{ 
    //Create a new gesture recognition pipeline
    GestureRecognitionPipeline pipeline;
    
    //Add an ANBC module
    pipeline.setClassifier( ANBC() );
    
    //Add a ClassLabelFilter as a post processing module with a minCount of 5 and a buffer size of 10
    pipeline.addPostProcessingModule( ClassLabelFilter(5,10) );
    
    //Load some training data to train and test the classifier
    ClassificationData trainingData;
    ClassificationData testData;
    
    if( !trainingData.loadDatasetFromFile("ClassLabelFilterTrainingData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    if( !testData.loadDatasetFromFile("ClassLabelFilterTestData.txt") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
    
    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        cout << "Failed to train classifier!\n";
        return EXIT_FAILURE;
    }
    
    //Use the test dataset to demonstrate the output of the ClassLabelFilter    
    for(UINT i=0; i<testData.getNumSamples(); i++){
        VectorDouble inputVector = testData[i].getSample();
        
        if( !pipeline.predict( inputVector ) ){
            cout << "Failed to perform prediction for test sampel: " << i <<"\n";
            return EXIT_FAILURE;
        }
        
        //Get the predicted class label (this will be the processed class label)
        UINT predictedClassLabel = pipeline.getPredictedClassLabel();
        
        //Get the unprocessed class label (i.e. the direct output of the classifier)
        UINT unprocessedClassLabel = pipeline.getUnProcessedPredictedClassLabel();
        
        //Also print the results to the screen
        cout << "Processed Class Label: \t" << predictedClassLabel << "\tUnprocessed Class Label: \t" << unprocessedClassLabel << endl;

    }
    
    return EXIT_SUCCESS;
}
Example #7
0
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples,bool balanceDataset) const{
    
    Random rand;
    ClassificationData newDataset;
    newDataset.setNumDimensions( getNumDimensions() );
    newDataset.setAllowNullGestureClass( allowNullGestureClass );
    newDataset.setExternalRanges( externalRanges, useExternalRanges );
    
    if( numSamples == 0 ) numSamples = totalNumSamples;
    
    newDataset.reserve( numSamples );

    const UINT K = getNumClasses(); 
    
    //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels
    for(UINT k=0; k<K; k++){
        newDataset.addClass( classTracker[k].classLabel );
    }

    if( balanceDataset ){
        //Group the class indexs
        std::vector< std::vector< UINT > > classIndexs( K );
        for(UINT i=0; i<totalNumSamples; i++){
            classIndexs[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
        }

        //Get the class with the minimum number of examples
        UINT numSamplesPerClass = (UINT)floor( numSamples / double(K) );

        //Randomly select the training samples from each class
        UINT classIndex = 0;
        UINT classCounter = 0;
        UINT randomIndex = 0;
        for(UINT i=0; i<numSamples; i++){
            randomIndex = rand.getRandomNumberInt(0, (UINT)classIndexs[ classIndex ].size() );
            randomIndex = classIndexs[ classIndex ][ randomIndex ];
            newDataset.addSample(data[ randomIndex ].getClassLabel(), data[ randomIndex ].getSample());
            if( classCounter++ >= numSamplesPerClass && classIndex+1 < K ){
                classCounter = 0;
                classIndex++;
            }
        }

    }else{
        //Randomly select the training samples to add to the new data set
        UINT randomIndex;
        for(UINT i=0; i<numSamples; i++){
            randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
            newDataset.addSample( data[randomIndex].getClassLabel(), data[randomIndex].getSample() );
        }
    }

    //Sort the class labels so they are in order
    newDataset.sortClassLabels();
    
    return newDataset;
}
int main (int argc, const char * argv[])
{
    
    //Load the example data
    ClassificationData data;
    
    if( !data.loadDatasetFromFile("WiiAccShakeData.txt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the zero crossing counter feature extraction
    UINT searchWindowSize = 20;
    double deadZoneThreshold = 0.01;
    UINT numDimensions = data.getNumDimensions();
    UINT featureMode = ZeroCrossingCounter::INDEPENDANT_FEATURE_MODE; //This could also be ZeroCrossingCounter::COMBINED_FEATURE_MODE
    
    //Create a new instance of the ZeroCrossingCounter feature extraction
    ZeroCrossingCounter zeroCrossingCounter(searchWindowSize,deadZoneThreshold,numDimensions,featureMode);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        zeroCrossingCounter.computeFeatures( data[i].getSample() );
        
        //Write the data to the file
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorDouble featureVector = zeroCrossingCounter.getFeatureVector();
        
        //Write the features to the file
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the zero crossing counter settings to a file
    zeroCrossingCounter.saveModelToFile("ZeroCrossingCounterSettings.txt");
    
    //You can then load the settings again if you need them
    zeroCrossingCounter.loadModelFromFile("ZeroCrossingCounterSettings.txt");
    
    return EXIT_SUCCESS;
}
bool DecisionTreeClusterNode::computeSplit( const UINT &numSplittingSteps, const ClassificationData &trainingData, const Vector< UINT > &features, const Vector< UINT > &classLabels, UINT &featureIndex, Float &minError ){

    const UINT M = trainingData.getNumSamples();
    const UINT N = features.getSize();
    const UINT K = classLabels.getSize();

    if( N == 0 ) return false;
    if( K == 0 ) return false;

    minError = grt_numeric_limits< Float >::max();
    Random random;
    UINT bestFeatureIndex = 0;
    Float bestThreshold = 0;
    Float error = 0;
    Vector< UINT > groupIndex(M);
    Vector< MinMax > ranges = trainingData.getRanges();
    MatrixDouble data(M,1); //This will store our temporary data for each dimension

    //Randomly select which features we want to use
    UINT numRandomFeatures = numSplittingSteps > N ? N : numSplittingSteps;
    Vector< UINT > randomFeatures = random.getRandomSubset( 0, N, numRandomFeatures );

    //Loop over each random feature and try and find the best split point
    for(UINT n=0; n<numRandomFeatures; n++){

        featureIndex = features[ randomFeatures[n] ];

        //Use the data in this feature dimension to create a sum dataset
        for(UINT i=0; i<M; i++){
            data[i][0] = trainingData[i][featureIndex];
        }

        if( computeError( trainingData, data, classLabels, ranges, groupIndex, featureIndex, threshold, error ) ){
            //Store the best threshold and feature index
            if( error < minError ){
                minError = error;
                bestThreshold = threshold;
                bestFeatureIndex = featureIndex;
            }
        }
     }

     //Set the best feature index that will be returned to the DecisionTree that called this function
     featureIndex = bestFeatureIndex;

     //Store the node size, feature index, best threshold and class probabilities for this node
     set( M, featureIndex, bestThreshold, trainingData.getClassProbabilities(classLabels) );

     return true;
}
ClassificationData ClassificationData::getBootstrappedDataset(UINT numSamples) const{
    
    Random rand;
    ClassificationData newDataset;
    newDataset.setNumDimensions( getNumDimensions() );
    newDataset.setAllowNullGestureClass( allowNullGestureClass );
    newDataset.setExternalRanges( externalRanges, useExternalRanges );
    
    if( numSamples == 0 ) numSamples = totalNumSamples;
    
    newDataset.reserve( numSamples );
    
    //Add all the class labels to the new dataset to ensure the dataset has a list of all the labels
    for(UINT k=0; k<getNumClasses(); k++){
        newDataset.addClass( classTracker[k].classLabel );
    }
    
    //Randomly select the training samples to add to the new data set
    UINT randomIndex;
    for(UINT i=0; i<numSamples; i++){
        randomIndex = rand.getRandomNumberInt(0, totalNumSamples);
        newDataset.addSample(data[randomIndex].getClassLabel(), data[randomIndex].getSample());
    }

    //Sort the class labels so they are in order
	newDataset.sortClassLabels();
    
    return newDataset;
}
Example #11
0
int main (int argc, const char * argv[])
{
    //Load the example data
    ClassificationData data;
    
    if( !data.load("WiiAccShakeData.grt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the MovementIndex feature extraction
    UINT windowSize = 10;
    UINT numDimensions = data.getNumDimensions();

    //Create a new instance of the MovementIndex feature extraction
    MovementIndex movementIndex(windowSize,numDimensions);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        movementIndex.computeFeatures( data[i].getSample() );
        
        //Write the data
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorFloat featureVector = movementIndex.getFeatureVector();
        
        //Write the features
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the MovementIndex settings to a file
    movementIndex.save("MovementIndexSettings.grt");
    
    //You can then load the settings again if you need them
    movementIndex.load("MovementIndexSettings.grt");
    
    return EXIT_SUCCESS;
}
bool SVM::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Convert the labelled classification data into the LIBSVM data format
    if( !convertClassificationDataToLIBSVMFormat(trainingData) ){
        errorLog << "train_(ClassificationData &trainingData) - Failed To Convert Labelled Classification Data To LIBSVM Format!" << endl;
        return false;
    }
    
    if( useAutoGamma ) param.gamma = 1.0/numInputDimensions;
    
	//Train the model
	bool trainingResult = trainSVM();
    
	if(! trainingResult ){
        errorLog << "train_(ClassificationData &trainingData) - Failed To Train SVM Model!" << endl;
		return false;
	}
    
    return true;
}
Example #13
0
ClassificationData ClassificationData::getTestFoldData(const UINT foldIndex) const{
    
    ClassificationData testData;
    testData.setNumDimensions( numDimensions );
    testData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ) return testData;

    if( foldIndex >= kFoldValue ) return testData;

    //Add the class labels to make sure they all exist
    for(UINT k=0; k<getNumSamples(); k++){
        testData.addClass( classTracker[k].classLabel, classTracker[k].className );
    }
    
    testData.reserve( (UINT)crossValidationIndexs[ foldIndex ].size() );

    //Add the data to the test fold
    UINT index = 0;
	for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){

        index = crossValidationIndexs[ foldIndex ][i];
		testData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
	}
	
    //Sort the class labels
	testData.sortClassLabels();

    return testData;
}
Example #14
0
bool BAG::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    UINT ensembleSize = (UINT)ensemble.size();
    
    if( ensembleSize == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl;
        return false;
    }
    
    for(UINT i=0; i<ensembleSize; i++){
        if( ensemble[i] == NULL ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl;
            return false;
        }
    }

    //Train the ensemble
    for(UINT i=0; i<ensembleSize; i++){
        ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
        
        trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << endl;
        
        //Train the classifier with the bootstrapped dataset
        if( !ensemble[i]->train( boostedDataset ) ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << endl;
            return false;
        }
    }
    
    //Set the class labels
    classLabels = trainingData.getClassLabels();
    
    //Flag that the model has been trained
    trained = true;
    
    return trained;
}
Example #15
0
File: ANBC.cpp Project: jdelfes/grt
bool ANBC::setWeights(const ClassificationData &weightsData){
    
    if( weightsData.getNumSamples() > 0 ){
        weightsDataSet = true;
        this->weightsData = weightsData;
        return true;
    }
    return false;
}
bool HierarchicalClustering::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
		return false;
	}

    //Convert the labelled training data to a training matrix
	M = trainingData.getNumSamples();
    N = trainingData.getNumDimensions();

    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

	return train_( data );
}
Example #17
0
File: tree.cpp Project: Cerarus/v4r
void Tree::RefineLeafNodes(ClassificationData& data, int nPoints, int labelIdx)
{
  // for all available points in data, traverse through tree and add one point to
  // label distribution of resulting leaf node
  for(int i=0; i<nPoints; ++i)
  {
	int idx = GetResultingLeafNode(data.GetFeatures(i));
	nodes[idx].AddToAbsLabelDistribution(labelIdx);	
  }  
}
Example #18
0
int main (int argc, const char * argv[])
{
    //Create a new KMeans instance
    KMeans kmeans;
    kmeans.setComputeTheta( true );
    kmeans.setMinChange( 1.0e-10 );
    kmeans.setMinNumEpochs( 10 );
	kmeans.setMaxNumEpochs( 10000 );

	//There are a number of ways of training the KMeans algorithm, depending on what you need the KMeans for
	//These are:
	//- with labelled training data (in the ClassificationData format)
	//- with unlablled training data (in the UnlabelledData format)
	//- with unlabelled training data (in a simple MatrixDouble format)
	
	//This example shows you how to train the algorithm with ClassificationData
	
	//Load some training data to train the KMeans algorithm
    ClassificationData trainingData;
    
    if( !trainingData.load("LabelledClusterData.csv") ){
        cout << "Failed to load training data!\n";
        return EXIT_FAILURE;
    }
	
    //Train the KMeans algorithm - K will automatically be set to the number of classes in the training dataset
    if( !kmeans.train( trainingData ) ){
        cout << "Failed to train model!\n";
        return EXIT_FAILURE;
    }
	
	//Get the K clusters from the KMeans instance and print them
	cout << "\nClusters:\n";
	MatrixFloat clusters = kmeans.getClusters();
    for(unsigned int k=0; k<clusters.getNumRows(); k++){
		for(unsigned int n=0; n<clusters.getNumCols(); n++){
			cout << clusters[k][n] << "\t";
		}cout << endl;
	}
	
    return EXIT_SUCCESS;
}
ClassificationData TimeSeriesClassificationDataStream::getClassificationData( const bool includeNullGestures ) const {
    
    ClassificationData classificationData;
    
    classificationData.setNumDimensions( getNumDimensions() );
    classificationData.setAllowNullGestureClass( includeNullGestures );

    bool addSample = false;
    for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
        addSample = includeNullGestures ? true : timeSeriesPositionTracker[i].getClassLabel() != GRT_DEFAULT_NULL_CLASS_LABEL;
        if( addSample ){
            MatrixDouble dataSegment = getTimeSeriesData( timeSeriesPositionTracker[i] );
            for(UINT j=0; j<dataSegment.getNumRows(); j++){
                classificationData.addSample(timeSeriesPositionTracker[i].getClassLabel(), dataSegment.getRowVector(j) );
            }
        }
    }
    
    return classificationData;
}
int main (int argc, const char * argv[])
{    
    GestureRecognitionPipeline pipeline;    
    ANBC anbc;    
    ClassificationData trainingData;
  
    trainingData.loadDatasetFromFile("training-data.txt") 
    pipeline.setClassifier(anbc);
    pipeline.train(trainingData);
    
    VectorDouble inputVector(SAMPLE_DIMENSION) = getDataFromSensor();

    pipeline.predict(inputVector);
    
    UINT predictedClassLabel = pipeline.getPredictedClassLabel();
    double maxLikelihood =  pipeline.getMaximumLikelihood();
    printf("predictedClassLabel : %d , MaximumLikelihood : %f \n", predictedClassLabel, maxLikelihood);
   
    return EXIT_SUCCESS;
}
Example #21
0
File: LDA.cpp Project: CV-IP/grt
MatrixFloat LDA::computeBetweenClassScatterMatrix( ClassificationData &data ){
	
	MatrixFloat sb(numInputDimensions,numInputDimensions);
	MatrixFloat classMean = data.getClassMean();
	VectorDouble totalMean = data.getMean();
	sb.setAllValues( 0 );
	
	for(UINT k=0; k<numClasses; k++){
		
		UINT numSamplesInClass = data.getClassTracker()[k].counter;
	
		for(UINT m=0; m<numInputDimensions; m++){
			for(UINT n=0; n<numInputDimensions; n++){
				sb[m][n] += (classMean[k][m]-totalMean[m]) * (classMean[k][n]-totalMean[n]) * Float(numSamplesInClass);
			}
		}
	}
	
	return sb;
}
Example #22
0
File: LDA.cpp Project: CV-IP/grt
MatrixFloat LDA::computeWithinClassScatterMatrix( ClassificationData &data ){
	
	MatrixFloat sw(numInputDimensions,numInputDimensions);
	sw.setAllValues( 0 );
	
	for(UINT k=0; k<numClasses; k++){
		
		//Compute the scatter matrix for class k
		ClassificationData classData = data.getClassData( data.getClassTracker()[k].classLabel );
		MatrixFloat scatterMatrix = classData.getCovarianceMatrix();
		
		//Add this to the main scatter matrix
		for(UINT m=0; m<numInputDimensions; m++){
			for(UINT n=0; n<numInputDimensions; n++){
				sw[m][n] += scatterMatrix[m][n];
			}
		}
	}
	
	return sw;
}
Example #23
0
ClassificationData ClassificationData::getTrainingFoldData(const UINT foldIndex) const{
   
    ClassificationData trainingData;
    trainingData.setNumDimensions( numDimensions );
    trainingData.setAllowNullGestureClass( allowNullGestureClass );

    if( !crossValidationSetup ){
        errorLog << "getTrainingFoldData(const UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
       return trainingData;
    }

    if( foldIndex >= kFoldValue ) return trainingData;

    //Add the class labels to make sure they all exist
    for(UINT k=0; k<getNumSamples(); k++){
        trainingData.addClass( classTracker[k].classLabel, classTracker[k].className );
    }

    //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
    UINT index = 0;
    for(UINT k=0; k<kFoldValue; k++){
        if( k != foldIndex ){
            for(UINT i=0; i<crossValidationIndexs[k].size(); i++){

                index = crossValidationIndexs[k][i];
                trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getSample() );
            }
        }
    }

    //Sort the class labels
    trainingData.sortClassLabels();

    return trainingData;
}
Example #24
0
ClassificationData ClassificationData::getClassData(const UINT classLabel) const{
    
    ClassificationData classData;
    classData.setNumDimensions( this->numDimensions );
    classData.setAllowNullGestureClass( allowNullGestureClass );
    
    //Reserve the memory for the class data
    for(UINT i=0; i<classTracker.size(); i++){
        if( classTracker[i].classLabel == classLabel ){
            classData.reserve( classTracker[i].counter );
            break;
        }
    }

    for(UINT i=0; i<totalNumSamples; i++){
        if( data[i].getClassLabel() == classLabel ){
            classData.addSample(classLabel, data[i].getSample());
        }
    }

    return classData;
}
Example #25
0
bool KMeans::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl;
		return false;
	}
	
	//Set the numClusters as the number of classes in the training data
	numClusters = trainingData.getNumClasses();

    //Convert the labelled training data to a training matrix
	UINT M = trainingData.getNumSamples();
    UINT N = trainingData.getNumDimensions();
    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

    //Run the K-Means algorithm
    return train_( data );
}
Example #26
0
bool ClassificationData::generateGaussDataset( const std::string filename, const UINT numSamples, const UINT numClasses, const UINT numDimensions, const double range, const double sigma ){
    
    Random random;
    
    //Generate a simple model that will be used to generate the main dataset
    MatrixDouble model(numClasses,numDimensions);
    for(UINT k=0; k<numClasses; k++){
        for(UINT j=0; j<numDimensions; j++){
            model[k][j] = random.getRandomNumberUniform(-range,range);
        }
    }
    
    //Use the model above to generate the main dataset
    ClassificationData data;
    data.setNumDimensions( numDimensions );
    
    for(UINT i=0; i<numSamples; i++){
        
        //Randomly select which class this sample belongs to
        UINT k = random.getRandomNumberInt( 0, numClasses );
        
        //Generate a sample using the model (+ some Gaussian noise)
        vector< double > sample( numDimensions );
        for(UINT j=0; j<numDimensions; j++){
            sample[j] = model[k][j] + random.getRandomNumberGauss(0,sigma);
        }
        
        //By default in the GRT, the class label should not be 0, so add 1
        UINT classLabel = k + 1;
        
        //Add the labeled sample to the dataset
        data.addSample( classLabel, sample );
    }
    
    //Save the dataset to a CSV file
    return data.save( filename );
}
Example #27
0
// Tests the learning algorithm on a basic dataset
TEST(BAG, TrainBasicDataset) {
  
  BAG bag;

  //Check the module is not trained
  EXPECT_TRUE( !bag.getTrained() );

  //Generate a basic dataset
  const UINT numSamples = 10000;
  const UINT numClasses = 10;
  const UINT numDimensions = 100;
  ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 );
  ClassificationData trainingData;
  EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) );

  ClassificationData testData = trainingData.split( 50 );

  //Add an adaptive naive bayes classifier to the BAG ensemble
  bag.addClassifierToEnsemble( ANBC() );
  
  //Add a MinDist classifier to the BAG ensemble, using two clusters
  MinDist min_dist_two_clusters;
  min_dist_two_clusters.setNumClusters(2);
  bag.addClassifierToEnsemble( min_dist_two_clusters );
  
  //Add a MinDist classifier to the BAG ensemble, using five clusters
  MinDist min_dist_five_clusters;
  min_dist_five_clusters.setNumClusters(5);
  bag.addClassifierToEnsemble( min_dist_five_clusters );

  //Train the classifier
  EXPECT_TRUE( bag.train( trainingData ) );

  EXPECT_TRUE( bag.getTrained() );

  EXPECT_TRUE( bag.print() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( bag.predict( testData[i].getSample() ) );
  }

  EXPECT_TRUE( bag.save( "bag_model.grt" ) );

  bag.clear();
  EXPECT_TRUE( !bag.getTrained() );

  EXPECT_TRUE( bag.load( "bag_model.grt" ) );

  EXPECT_TRUE( bag.getTrained() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( bag.predict( testData[i].getSample() ) );
  }


}
Example #28
0
// Tests the learning algorithm on a basic dataset
TEST(KNN, TrainBasicDataset) {
  
  KNN knn;

  //Check the module is not trained
  EXPECT_TRUE( !knn.getTrained() );

  //Generate a basic dataset
  const UINT numSamples = 1000;
  const UINT numClasses = 10;
  const UINT numDimensions = 10;
  ClassificationData::generateGaussDataset( "gauss_data.csv", numSamples, numClasses, numDimensions, 10, 1 );
  ClassificationData trainingData;
  EXPECT_TRUE( trainingData.load( "gauss_data.csv" ) );

  ClassificationData testData = trainingData.split( 50 );

  //Train the classifier
  EXPECT_TRUE( knn.train( trainingData ) );

  EXPECT_TRUE( knn.getTrained() );

  EXPECT_TRUE( knn.print() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( knn.predict( testData[i].getSample() ) );
  }

  EXPECT_TRUE( knn.save( "knn_model.grt" ) );

  knn.clear();

  EXPECT_TRUE( !knn.getTrained() );

  EXPECT_TRUE( knn.load( "knn_model.grt" ) );

  EXPECT_TRUE( knn.getTrained() );

  for(UINT i=0; i<testData.getNumSamples(); i++){
    EXPECT_TRUE( knn.predict( testData[i].getSample() ) );
  }


}
Example #29
0
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << "train(ClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << std::endl;
            return false;
        }
    }
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
Example #30
0
File: LDA.cpp Project: CV-IP/grt
bool LDA::train(ClassificationData trainingData){
    
    errorLog << "SORRY - this module is still under development and can't be used yet!" << std::endl;
    return false;
    
    //Reset any previous model
    numInputDimensions = 0;
    numClasses = 0;
    models.clear();
    classLabels.clear();
    trained = false;
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledClassificationData trainingData) - There is no training data to train the model!" << std::endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();

	//Calculate the between scatter matrix
	MatrixFloat SB = computeBetweenClassScatterMatrix( trainingData );
	
	//Calculate the within scatter matrix
	MatrixFloat SW = computeWithinClassScatterMatrix( trainingData );


   /*

    
    //Counters and stat containers
    vector< UINT > groupLabels(numClasses);
    VectorDouble groupCounters(numClasses);
    VectorDouble priorProb(numClasses);
    MatrixFloat groupMeans(numClasses,numFeatures);
    MatrixFloat pCov(numFeatures,numFeatures);
    MatrixFloat pCovInv(numFeatures,numFeatures);
    MatrixFloat modelCoeff(numClasses,numFeatures+1);
    
    pCov.setAllValues(0);
    modelCoeff.setAllValues(0);
    
    //Set the class labels and counters
    for(UINT k=0; k<numClasses; k++){
        groupLabels[k] = trainingData.getClassTracker()[k].classLabel;
        groupCounters[k] = trainingData.getClassTracker()[k].counter;
    }
    
    //Loop over the classes to compute the group stats
    for(UINT k=0; k<numClasses; k++){
        LabelledClassificationData classData = trainingData.getClassData( groupLabels[k] );
        MatrixFloat cov(numFeatures,numFeatures);
        
        //Compute class mu
        for(UINT j=0; j<numFeatures; j++){
            groupMeans[k][j] = 0;
            for(UINT i=0; i<classData.getNumSamples(); i++){
                groupMeans[k][j] += classData[i][j];
            }
            groupMeans[k][j] /= Float(classData.getNumSamples());
        }
        
        //Compute the class covariance
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                cov[m][n] = 0;
                for(UINT i=0; i<classData.getNumSamples(); i++){
                    cov[m][n] += (classData[i][m]-groupMeans[k][m]) * (classData[i][n]-groupMeans[k][n]);
                }
                cov[m][n] /= Float(classData.getNumSamples()-1);
            }
        }
        
        debugLog << "Group Cov:\n";
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                debugLog << cov[m][n] << "\t";
            }debugLog << "\n";
        }debugLog << std::endl;
        
        //Set the prior probability for this class (which is just 1/numClasses)
        priorProb[k] = 1.0/Float(numClasses);
        
        //Update the main covariance matrix
        Float weight = ((classData.getNumSamples() - 1) / Float(trainingData.getNumSamples() - numClasses) );
        debugLog << "Weight: " << weight << std::endl;
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                pCov[m][n] += weight * cov[m][n];
            }
        }
    }
    
    for(UINT k=0; k<numClasses; k++){
        debugLog << "GroupMu: " << groupLabels[k] << "\t";
        for(UINT j=0; j<numFeatures; j++){
            debugLog << groupMeans[k][j] << "\t";
        }debugLog << std::endl;
    }
    
    debugLog << "pCov:\n";
    for(UINT m=0; m<numFeatures; m++){
        for(UINT n=0; n<numFeatures; n++){
            debugLog << pCov[m][n] << "\t";
        }debugLog << "\n";
    }debugLog << std::endl;
    
    //Invert the pCov matrix
    LUDecomposition matrixInverter(pCov);
    if( !matrixInverter.inverse(pCovInv) ){
        errorLog << "Failed to invert pCov Matrix!" << std::endl;
        return false;
    }
    
    //Loop over classes to calculate linear discriminant coefficients
    Float sum = 0;
    vector< Float > temp(numFeatures);
    for(UINT k=0; k<numClasses; k++){
        //Compute the temporary vector
        for(UINT j=0; j<numFeatures; j++){
            temp[j] = 0;
            for(UINT m=0; m<numFeatures; m++){
                    temp[j] += groupMeans[k][m] * pCovInv[m][j];
            }
        }
        
        //Compute the model coefficients
        sum = 0;
        for(UINT j=0; j<numFeatures; j++){
            sum += temp[j]*groupMeans[k][j];
        }
        modelCoeff[k][0] = -0.5 * sum + log( priorProb[k] );
        
        for(UINT j=0; j<numFeatures; j++){
            modelCoeff[k][j+1] = temp[j];
        }
    }
    
    //Setup the models for realtime prediction
    models.resize(numClasses);
    classLabels.resize(numClasses);
    
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = groupLabels[k];
        models[k].classLabel = groupLabels[k];
        models[k].priorProb = priorProb[k];
        models[k].weights = modelCoeff.getRowVector(k);
    }
    
    //Flag that the models were successfully trained
    trained = true;
    */
    
    return true;
}