int main (int argc, const char * argv[])
{
    
    //Load the example data
    ClassificationData data;
    
    if( !data.loadDatasetFromFile("WiiAccShakeData.txt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the zero crossing counter feature extraction
    UINT searchWindowSize = 20;
    double deadZoneThreshold = 0.01;
    UINT numDimensions = data.getNumDimensions();
    UINT featureMode = ZeroCrossingCounter::INDEPENDANT_FEATURE_MODE; //This could also be ZeroCrossingCounter::COMBINED_FEATURE_MODE
    
    //Create a new instance of the ZeroCrossingCounter feature extraction
    ZeroCrossingCounter zeroCrossingCounter(searchWindowSize,deadZoneThreshold,numDimensions,featureMode);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        zeroCrossingCounter.computeFeatures( data[i].getSample() );
        
        //Write the data to the file
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorDouble featureVector = zeroCrossingCounter.getFeatureVector();
        
        //Write the features to the file
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the zero crossing counter settings to a file
    zeroCrossingCounter.saveModelToFile("ZeroCrossingCounterSettings.txt");
    
    //You can then load the settings again if you need them
    zeroCrossingCounter.loadModelFromFile("ZeroCrossingCounterSettings.txt");
    
    return EXIT_SUCCESS;
}
Esempio n. 2
0
int main (int argc, const char * argv[])
{
    //Load the example data
    ClassificationData data;
    
    if( !data.load("WiiAccShakeData.grt") ){
        cout << "ERROR: Failed to load data from file!\n";
        return EXIT_FAILURE;
    }

    //The variables used to initialize the MovementIndex feature extraction
    UINT windowSize = 10;
    UINT numDimensions = data.getNumDimensions();

    //Create a new instance of the MovementIndex feature extraction
    MovementIndex movementIndex(windowSize,numDimensions);
    
    //Loop over the accelerometer data, at each time sample (i) compute the features using the new sample and then write the results to a file
    for(UINT i=0; i<data.getNumSamples(); i++){
        
        //Compute the features using this new sample
        movementIndex.computeFeatures( data[i].getSample() );
        
        //Write the data
        cout << "InputVector: ";
        for(UINT j=0; j<data.getNumDimensions(); j++){
           cout << data[i].getSample()[j] << "\t";
        }
        
        //Get the latest feature vector
        VectorFloat featureVector = movementIndex.getFeatureVector();
        
        //Write the features
        cout << "FeatureVector: ";
        for(UINT j=0; j<featureVector.size(); j++){
            cout << featureVector[j];
            if( j != featureVector.size()-1 ) cout << "\t";
        }
        cout << endl;
    }
    
    //Save the MovementIndex settings to a file
    movementIndex.save("MovementIndexSettings.grt");
    
    //You can then load the settings again if you need them
    movementIndex.load("MovementIndexSettings.grt");
    
    return EXIT_SUCCESS;
}
Esempio n. 3
0
bool BAG::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    UINT ensembleSize = (UINT)ensemble.size();
    
    if( ensembleSize == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The ensemble size is zero! You need to add some classifiers to the ensemble first." << endl;
        return false;
    }
    
    for(UINT i=0; i<ensembleSize; i++){
        if( ensemble[i] == NULL ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " has not been set!" << endl;
            return false;
        }
    }

    //Train the ensemble
    for(UINT i=0; i<ensembleSize; i++){
        ClassificationData boostedDataset = trainingData.getBootstrappedDataset();
        
        trainingLog << "Training ensemble " << i+1 << ". Ensemble type: " << ensemble[i]->getClassType() << endl;
        
        //Train the classifier with the bootstrapped dataset
        if( !ensemble[i]->train( boostedDataset ) ){
            errorLog << "train_(ClassificationData &trainingData) - The classifier at ensemble index " << i << " failed training!" << endl;
            return false;
        }
    }
    
    //Set the class labels
    classLabels = trainingData.getClassLabels();
    
    //Flag that the model has been trained
    trained = true;
    
    return trained;
}
Esempio n. 4
0
bool ClassificationData::merge(const ClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();
    
    //Reserve the memory
    reserve( getNumSamples() + labelledData.getNumSamples() );

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getSample());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    //Sort the class labels
    sortClassLabels();

    return true;
}
Esempio n. 5
0
bool SwipeDetector::train_(ClassificationData &trainingData) {

    //Clear any previous models
    clear();

    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();

    if( M == 0 ) {
        errorLog << "train_(trainingData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }

    numInputDimensions = N;
    numClasses = 2; //This is always 2 for swipe detection [1 == swipe detected, everything else means no swipe detected]
    classLabels.resize( 2 );
    classLabels[0] = 1; //Swipe
    classLabels[1] = 2; //No Swipe
    nullRejectionThresholds.resize(2,0);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ) {
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    //We currently have no way to automatically train the swipe detection, user needs to manually set thresholds, so just flag the model is trained
    trained = true;

    return true;
}
bool SVM::convertClassificationDataToLIBSVMFormat(ClassificationData &trainingData){
    
    //clear any previous problems
    deleteProblemSet();
    
    const UINT numTrainingExamples = trainingData.getNumSamples();
    numInputDimensions = trainingData.getNumDimensions();
    
    //Compute the ranges encase the data should be scaled
    ranges = trainingData.getRanges();
    
    //Init the memory
    prob.l = numTrainingExamples;
    prob.x = new svm_node*[numTrainingExamples];
    prob.y = new double[numTrainingExamples];
    problemSet = true;
    
    for(UINT i=0; i<numTrainingExamples; i++){
        //Set the class ID
        prob.y[i] = trainingData[i].getClassLabel();
        
        //Assign the memory for this training example, note that a dummy node is needed at the end of the vector
        prob.x[i] = new svm_node[numInputDimensions+1];
        for(UINT j=0; j<numInputDimensions; j++){
            prob.x[i][j].index = j+1;
            prob.x[i][j].value = trainingData[i].getSample()[j];
        }
        prob.x[i][numInputDimensions].index = -1; //Assign the final node value
        prob.x[i][numInputDimensions].value = 0;
    }
    
    return true;
}
Esempio n. 7
0
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << "train(ClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << std::endl;
            return false;
        }
    }
    
    //Flag that the algorithm has been trained
    trained = true;
    return trained;
}
Esempio n. 8
0
bool HierarchicalClustering::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
		return false;
	}

    //Convert the labelled training data to a training matrix
	M = trainingData.getNumSamples();
    N = trainingData.getNumDimensions();

    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

	return train_( data );
}
Esempio n. 9
0
bool KMeans::train_(ClassificationData &trainingData){
	
	if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl;
		return false;
	}
	
	//Set the numClusters as the number of classes in the training data
	numClusters = trainingData.getNumClasses();

    //Convert the labelled training data to a training matrix
	UINT M = trainingData.getNumSamples();
    UINT N = trainingData.getNumDimensions();
    MatrixFloat data(M,N);
    for(UINT i=0; i<M; i++){
        for(UINT j=0; j<N; j++){
            data[i][j] = trainingData[i][j];
        }
    }

    //Run the K-Means algorithm
    return train_( data );
}
Esempio n. 10
0
bool MinDist::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    nullRejectionThresholds.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].setGamma( nullRejectionCoeff );
        if( !models[k].train(classLabel,data,numClusters) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << endl;
            models.clear();
            return false;
        }
        
        //Set the null rejection threshold
        nullRejectionThresholds[k] = models[k].getRejectionThreshold();
        
    }
    
    trained = true;
    return true;
}
Esempio n. 11
0
int main(int argc, char * argv[])
{

    if( argc < 3 ){
        errorLog << "Not enough input arguments!" << endl;
        printUsage();
        return EXIT_FAILURE;
    }

    const string inputDirectory = argv[1];
    const string outputFilename = argv[2];

    //Parse the data directory for files
    vector< string > filenames;
    infoLog << "- Parsing data directory: " << inputDirectory << endl;

    if( !Util::parseDirectory( inputDirectory, ".csv", filenames ) ){
        errorLog << "Failed to parse input directory: " << inputDirectory << endl;
        return EXIT_FAILURE; 
    }

    if( filenames.size() == 0 ){
        errorLog << "Failed to find any files in the input directory: " << inputDirectory << endl;
        return EXIT_FAILURE; 
    }

    ClassificationData data;
    unsigned int numFiles = (unsigned int)filenames.size();
    bool dataLoaded = false;
    for(unsigned int i=0; i<numFiles; i++){
        //Load the data
        infoLog << "- Loading data " << i+1 << " of " << numFiles << endl;
        
        ClassificationData tmp;
        if( tmp.load( filenames[i] ) ){
            if( i==0 ){
                data.setNumDimensions( tmp.getNumDimensions() );
            }
            dataLoaded = true;

            infoLog << "- Data loaded.  Number of samples: " << tmp.getNumSamples() << endl;

            data.merge( tmp );
        }else{
            warningLog << "- Failed to load data!" << endl;
        }
    }

    if( dataLoaded ){

        infoLog << "- Merged data to generate new dataset with " << data.getNumSamples() << " samples" << endl;

        //Save the new datasets
        infoLog << "- Saving main dataset to file: " << outputFilename << endl;
        if( !data.save( outputFilename ) ){
            errorLog << "Failed to save output data: " << outputFilename << endl;
            return EXIT_FAILURE; 
        }

    }else{
        warningLog << "- Failed to load any data!" << endl;
        return EXIT_FAILURE; 
    } 

    return EXIT_SUCCESS;
}
Esempio n. 12
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    UINT N = data.getNumDimensions();
    UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    Vector< UINT > randomTrainingOrder(M);
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){
        
        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        for(UINT m=0; m<M; m++){
            
            //Select the random sample
            UINT i = randomTrainingOrder[m];
            
            //Compute the error, given the current weights
            error = y[i] - model.compute( data[i].getSample() );
            errorSum += error;
            
            //Update the weights
            for(UINT j=0; j<N; j++){
                model.w[j] += learningRate  * error * data[i][j];
            }
            model.w0 += learningRate  * error;
        }
        
        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;
        
        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
    }
    
    return true;
}
int main (int argc, const char * argv[])
{

    //Create a new instance of the ClassificationData
    ClassificationData trainingData;
    
    //Set the dimensionality of the data (you need to do this before you can add any samples)
    trainingData.setNumDimensions( 3 );
    
    //You can also give the dataset a name (the name should have no spaces)
    trainingData.setDatasetName("DummyData");
    
    //You can also add some info text about the data
    trainingData.setInfoText("This data contains some dummy data");
    
    //Here you would grab some data from your sensor and label it with the corresponding gesture it belongs to
    UINT gestureLabel = 1;
    VectorDouble sample(3);
    
    //For now we will just add some random data
    Random random;
    for(UINT i=0; i<100; i++){
        sample[0] = random.getRandomNumberUniform(-1.0,1.0);
        sample[1] = random.getRandomNumberUniform(-1.0,1.0); 
        sample[2] = random.getRandomNumberUniform(-1.0,1.0); 
        
        //Add the sample to the training data
        trainingData.addSample( gestureLabel, sample );
    }
    
    //After recording your training data you can then save it to a file
    if( !trainingData.saveDatasetToFile( "TrainingData.txt" ) ){
		cout << "ERROR: Failed to save dataset to file!\n";
		return EXIT_FAILURE;
	}
    
    //This can then be loaded later
    if( !trainingData.loadDatasetFromFile( "TrainingData.txt" ) ){
		cout << "ERROR: Failed to load dataset from file!\n";
		return EXIT_FAILURE;
	}
    
    //You can also save and load the training data to a CSV file
    //Each row will contain a sample, with the first column containing the class label and the remaining columns containing the data
    if( !trainingData.saveDatasetToCSVFile( "TrainingData.csv" ) ){
		cout << "ERROR: Failed to save dataset to csv file!\n";
		return EXIT_FAILURE;
	}
	
    if( !trainingData.loadDatasetFromCSVFile( "TrainingData.csv" ) ){
		cout << "ERROR: Failed to load dataset from csv file!\n";
		return EXIT_FAILURE;
	}
    
    //This is how you can get some stats from the training data
    string datasetName = trainingData.getDatasetName();
    string infoText = trainingData.getInfoText();
    UINT numSamples = trainingData.getNumSamples();
    UINT numDimensions = trainingData.getNumDimensions();
    UINT numClasses = trainingData.getNumClasses();
    
    cout << "Dataset Name: " << datasetName << endl;
    cout << "InfoText: " << infoText << endl;
    cout << "NumberOfSamples: " << numSamples << endl;
    cout << "NumberOfDimensions: " << numDimensions << endl;
    cout << "NumberOfClasses: " << numClasses << endl;
    
    //You can also get the minimum and maximum ranges of the data
    vector< MinMax > ranges = trainingData.getRanges();
    
    cout << "The ranges of the dataset are: \n";
    for(UINT j=0; j<ranges.size(); j++){
        cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl;
    }
    
    //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function
    //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset
    ClassificationData testData = trainingData.partition( 80 );
    
    //If you have multiple datasets that you want to merge together then use the merge function
    if( !trainingData.merge( testData ) ){
		cout << "ERROR: Failed to save merge datasets!\n";
		return EXIT_FAILURE;
	}
    
    //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds
    //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used
    if( !trainingData.spiltDataIntoKFolds( 10, true ) ){
		cout << "ERROR: Failed to spiltDataIntoKFolds!\n";
		return EXIT_FAILURE;
	}
    
    //After you have called the spilt function you can then get the training and test sets for each fold
    for(UINT foldIndex=0; foldIndex<10; foldIndex++){
        ClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex );
        ClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex );
    }
    
    //If need you can clear any training data that you have recorded
    trainingData.clear();
    
    return EXIT_SUCCESS;
}
Esempio n. 14
0
File: ANBC.cpp Progetto: jdelfes/grt
bool ANBC::train_(ClassificationData &labelledTrainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = labelledTrainingData.getNumSamples();
    const unsigned int N = labelledTrainingData.getNumDimensions();
    const unsigned int K = labelledTrainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &labelledTrainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    if( weightsDataSet ){
        if( weightsData.getNumDimensions() != N ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - The number of dimensions in the weights data (" << weightsData.getNumDimensions() << ") is not equal to the number of dimensions of the training data (" << N << ")" << endl;
            return false;
        }
    }
    
    numInputDimensions = N;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = labelledTrainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        labelledTrainingData.scale(0, 1);
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        //Get the class label for the kth class
        UINT classLabel = labelledTrainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get the weights for this class
        VectorDouble weights(numInputDimensions);
        if( weightsDataSet ){
            bool weightsFound = false;
            for(UINT i=0; i<weightsData.getNumSamples(); i++){
                if( weightsData[i].getClassLabel() == classLabel ){
                    weights = weightsData[i].getSample();
                    weightsFound = true;
                    break;
                }
            }
            
            if( !weightsFound ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to find the weights for class " << classLabel << endl;
                return false;
            }
        }else{
            //If the weights data has not been set then all the weights are 1
            for(UINT j=0; j<numInputDimensions; j++) weights[j] = 1.0;
        }
        
        //Get all the training data for this class
        ClassificationData classData = labelledTrainingData.getClassData(classLabel);
        MatrixDouble data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].gamma = nullRejectionCoeff;
        if( !models[k].train(classLabel,data,weights) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train model for class: " << classLabel << endl;
            
            //Try and work out why the training failed
            if( models[k].N == 0 ){
                errorLog << "train_(ClassificationData &labelledTrainingData) - N == 0!" << endl;
                models.clear();
                return false;
            }
            for(UINT j=0; j<numInputDimensions; j++){
                if( models[k].mu[j] == 0 ){
                    errorLog << "train_(ClassificationData &labelledTrainingData) - The mean of column " << j+1 << " is zero! Check the training data" << endl;
                    models.clear();
                    return false;
                }
            }
            models.clear();
            return false;
        }
        
    }
    
    //Store the null rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++) {
        nullRejectionThresholds[k] = models[k].threshold;
    }
    
    //Flag that the models have been trained
    trained = true;
    return trained;
    
}
Esempio n. 15
0
bool AdaBoost::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    if( trainingData.getNumSamples() <= 1 ){
        errorLog << "train_(ClassificationData &trainingData) - There are not enough training samples to train a model! Number of samples: " << trainingData.getNumSamples()  << endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    const UINT M = trainingData.getNumSamples();
    const UINT POSITIVE_LABEL = WEAK_CLASSIFIER_POSITIVE_CLASS_LABEL;
    const UINT NEGATIVE_LABEL = WEAK_CLASSIFIER_NEGATIVE_CLASS_LABEL;
    double alpha = 0;
    const double beta = 0.001;
    double epsilon = 0;
    TrainingResult trainingResult;
    
    const UINT K = (UINT)weakClassifiers.size();
    if( K == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - No weakClassifiers have been set. You need to set at least one weak classifier first." << endl;
        return false;
    }

    classLabels.resize(numClasses);
    models.resize(numClasses);
    ranges = trainingData.getRanges();

    //Scale the training data if needed
    if( useScaling ){
        trainingData.scale(ranges,0,1);
    }
    
    //Create the weights vector
    VectorDouble weights(M);
    
    //Create the error matrix
    MatrixDouble errorMatrix(K,M);
    
    for(UINT classIter=0; classIter<numClasses; classIter++){
        
        //Get the class label for the current class
        classLabels[classIter] = trainingData.getClassLabels()[classIter];
        
        //Set the class label of the current model
        models[ classIter ].setClassLabel( classLabels[classIter] );
        
        //Setup the labels for this class, POSITIVE_LABEL == 1, NEGATIVE_LABEL == 2
        ClassificationData classData;
        classData.setNumDimensions(trainingData.getNumDimensions());
        for(UINT i=0; i<M; i++){
            UINT label = trainingData[i].getClassLabel()==classLabels[classIter] ? POSITIVE_LABEL : NEGATIVE_LABEL;
            VectorDouble trainingSample = trainingData[i].getSample();
            classData.addSample(label,trainingSample);
        }
        
        //Setup the initial training sample weights
        std::fill(weights.begin(),weights.end(),1.0/M);
        
        //Run the boosting loop
        bool keepBoosting = true;
        UINT t = 0;
        
        while( keepBoosting ){
            
            //Pick the classifier from the family of classifiers that minimizes the total error
            UINT bestClassifierIndex = 0;
            double minError = numeric_limits<double>::max();
            for(UINT k=0; k<K; k++){
                //Get the k'th possible classifier
                WeakClassifier *weakLearner = weakClassifiers[k];
                
                //Train the current classifier
                if( !weakLearner->train(classData,weights) ){
                    errorLog << "Failed to train weakLearner!" << endl;
                    return false;
                }
                
                //Compute the weighted error for this clasifier
                double e = 0;
                double positiveLabel = weakLearner->getPositiveClassLabel();
                double numCorrect = 0;
                double numIncorrect = 0;
                for(UINT i=0; i<M; i++){
                    //Only penalize errors
                    double prediction = weakLearner->predict( classData[i].getSample() );
                    
                    if( (prediction == positiveLabel && classData[i].getClassLabel() != POSITIVE_LABEL) ||        //False positive
                        (prediction != positiveLabel && classData[i].getClassLabel() == POSITIVE_LABEL) ){       //False negative
                        e += weights[i]; //Increase the error proportional to the weight of the example
                        errorMatrix[k][i] = 1; //Flag that there was an error
                        numIncorrect++;
                    }else{
                        errorMatrix[k][i] = 0; //Flag that there was no error
                        numCorrect++;
                    }
                }
                
                trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Classifier: " << k << " WeightedError: " << e << " NumCorrect: " << numCorrect/M << " NumIncorrect: " <<numIncorrect/M << endl;
                
                if( e < minError ){
                    minError = e;
                    bestClassifierIndex = k;
                }
                
            }
  
            epsilon = minError;
            
            //Set alpha, using the M1 weight value, small weights (close to 0) will receive a strong weight in the final classifier
            alpha = 0.5 * log( (1.0-epsilon)/epsilon );
            
            trainingLog << "PositiveClass: " << classLabels[classIter] << " Boosting Iter: " << t << " Best Classifier Index: " << bestClassifierIndex << " MinError: " << minError << " Alpha: " << alpha << endl;
            
            if( isinf(alpha) ){ keepBoosting = false; trainingLog << "Alpha is INF. Stopping boosting for current class" << endl; }
            if( 0.5 - epsilon <= beta ){ keepBoosting = false; trainingLog << "Epsilon <= Beta. Stopping boosting for current class" << endl; }
            if( ++t >= numBoostingIterations ) keepBoosting = false;

            trainingResult.setClassificationResult(t, minError, this);
            trainingResults.push_back(trainingResult);
            trainingResultsObserverManager.notifyObservers( trainingResult );
            
            if( keepBoosting ){
                
                //Add the best weak classifier to the committee
                models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                
                //Update the weights for the next boosting iteration
                double reWeight = (1.0 - epsilon) / epsilon;
                double oldSum = 0;
                double newSum = 0;
                for(UINT i=0; i<M; i++){
                    oldSum += weights[i];
                    //Only update the weights that resulted in an incorrect prediction
                    if( errorMatrix[bestClassifierIndex][i] == 1 ) weights[i] *= reWeight;
                    newSum += weights[i];
                }
                
                //Normalize all the weights
                //This results to increasing the weights of the samples that were incorrectly labelled
                //While decreasing the weights of the samples that were correctly classified
                reWeight = oldSum/newSum;
                for(UINT i=0; i<M; i++){
                    weights[i] *= reWeight;
                }
                
            }else{
                trainingLog << "Stopping boosting training at iteration : " << t-1 << " with an error of " << epsilon << endl;
                if( t-1 == 0 ){
                    //Add the best weak classifier to the committee (we have to add it as this is the first iteration)
                    if( isinf(alpha) ){ alpha = 1; } //If alpha is infinite then the first classifier got everything correct
                    models[ classIter ].addClassifierToCommitee( weakClassifiers[bestClassifierIndex], alpha );
                }
            }
            
        }
    }
    
    //Normalize the weights
    for(UINT k=0; k<numClasses; k++){
        models[k].normalizeWeights();
    }
    
    //Flag that the model has been trained
    trained = true;
    
    //Setup the data for prediction
    predictedClassLabel = 0;
    maxLikelihood = 0;
    classLikelihoods.resize(numClasses);
    classDistances.resize(numClasses);
    
    return true;
}
Esempio n. 16
0
bool test( CommandLineParser &parser ){

    infoLog << "Testing model..." << endl;

    string datasetFilename = "";
    string modelFilename = "";
    string resultsFilename = "";

    //Get the model filename
    if( !parser.get("model-filename",modelFilename) ){
        errorLog << "Failed to parse model filename from command line! You can set the model filename using the -m." << endl;
        printUsage();
        return false;
    }

    //Get the filename
    if( !parser.get("dataset-filename",datasetFilename) ){
        errorLog << "Failed to parse dataset filename from command line! You can set the dataset filename using the -f." << endl;
        printUsage();
        return false;
    }

    //Get the model filename
    parser.get("results-filename",resultsFilename,string("results.txt"));

    //Load the pipeline from a file
    GestureRecognitionPipeline pipeline;

    infoLog << "- Loading model..." << endl;

    if( !pipeline.load( modelFilename ) ){
        errorLog << "Failed to load model from file: " << modelFilename << endl;
        printUsage();
        return false;
    }

    infoLog << "- Model loaded!" << endl;

    //Load the data to test the classifier
    ClassificationData data;

    infoLog << "- Loading Training Data..." << endl;
    if( !data.load( datasetFilename ) ){
        errorLog << "Failed to load data!\n";
        return false;
    }

    const unsigned int N = data.getNumDimensions();
    infoLog << "- Num training samples: " << data.getNumSamples() << endl;
    infoLog << "- Num dimensions: " << N << endl;
    infoLog << "- Num classes: " << data.getNumClasses() << endl;

    //Test the classifier
    if( !pipeline.test( data ) ){
        errorLog << "Failed to test pipeline!" << endl;
        return false;
    }

    infoLog << "- Test complete in " << pipeline.getTestTime()/1000.0 << " seconds with and accuracy of: " << pipeline.getTestAccuracy() << endl;

    return saveResults( pipeline, resultsFilename );
}
Esempio n. 17
0
bool RandomForests::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }

    if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
        errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels = trainingData.getClassLabels();
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Flag that the main algorithm has been trained encase we need to trigger any callbacks
    trained = true;
    
    //Train the random forest
    forest.reserve( forestSize );
    for(UINT i=0; i<forestSize; i++){
        
        //Get a balanced bootstrapped dataset
        UINT datasetSize = (UINT)(trainingData.getNumSamples() * bootstrappedDatasetWeight);
        ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );
 
        DecisionTree tree;
        tree.setDecisionTreeNode( *decisionTreeNode );
        tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
        tree.setTrainingMode( trainingMode );
        tree.setNumSplittingSteps( numRandomSplits );
        tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
        tree.setMaxDepth( maxDepth );
        tree.enableNullRejection( useNullRejection );
        tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt );

        trainingLog << "Training forest " << i+1 << "/" << forestSize << "..." << endl;
        
        //Train this tree
        if( !tree.train( data ) ){
            errorLog << "train_(ClassificationData &labelledTrainingData) - Failed to train tree at forest index: " << i << endl;
            clear();
            return false;
        }
        
        //Deep copy the tree into the forest
        forest.push_back( tree.deepCopyTree() );
    }

    return true;
}
Esempio n. 18
0
bool MinDist::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
        return false;
    }
    
    if( M <= numClusters ){
        errorLog << __GRT_LOG__ << " There are not enough training samples for the number of clusters. Either reduce the number of clusters or increase the number of training samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = K;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    nullRejectionThresholds.resize(K);
    ranges = trainingData.getRanges();
    ClassificationData validationData;
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationData = trainingData.split( 100-validationSetSize );
    }
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        
        trainingLog << "Training model for class: " << trainingData.getClassTracker()[k].classLabel << std::endl;

        //Pass the logging state onto the kmeans algorithm
        models[k].setTrainingLoggingEnabled( this->getTrainingLoggingEnabled() );
            
        //Get the class label for the kth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        
        //Set the kth class label
        classLabels[k] = classLabel;
        
        //Get all the training data for this class
        ClassificationData classData = trainingData.getClassData(classLabel);
        MatrixFloat data(classData.getNumSamples(),N);
        
        //Copy the training data into a matrix
        for(UINT i=0; i<data.getNumRows(); i++){
            for(UINT j=0; j<data.getNumCols(); j++){
                data[i][j] = classData[i][j];
            }
        }
        
        //Train the model for this class
        models[k].setGamma( nullRejectionCoeff );
        if( !models[k].train(classLabel,data,numClusters,minChange,maxNumEpochs) ){
            errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabel;
            errorLog << ". This is might be because this class does not have enough training samples! You should reduce the number of clusters or increase the number of training samples for this class." << std::endl;
            models.clear();
            return false;
        }
            
        //Set the null rejection threshold
        nullRejectionThresholds[k] = models[k].getRejectionThreshold();
    }

    //Flag that the models have been trained
    trained = true;
    converged = true;

    //Compute the final training stats
    trainingSetAccuracy = 0;
    validationSetAccuracy = 0;

    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
    bool scalingState = useScaling;
    useScaling = false;
    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
        trained = false;
        converged = false;
        errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
        return false;
    }
    
    if( useValidationSet ){
        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
            trained = false;
            converged = false;
            errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
            return false;
        }
        
    }

    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;

    if( useValidationSet ){
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
    }

    //Reset the scaling state for future prediction
    useScaling = scalingState;

    return trained;
}
Esempio n. 19
0
bool Softmax::trainSoftmaxModel(UINT classLabel,SoftmaxModel &model,ClassificationData &data){
    
    Float error = 0;
    Float errorSum = 0;
    Float lastErrorSum = 0;
    Float delta = 0;
    const UINT N = data.getNumDimensions();
    const UINT M = data.getNumSamples();
    UINT iter = 0;
    bool keepTraining = true;
    Random random;
    VectorFloat y(M);
    VectorFloat batchMean(N);
    Vector< UINT > randomTrainingOrder(M);
    Vector< VectorFloat > batchData(batchSize,VectorFloat(N));
    
    //Init the model
    model.init( classLabel,  N );
    
    //Setup the target vector, the input data is relabelled as positive samples (with label 1.0) and negative samples (with label 0.0)
    for(UINT i=0; i<M; i++){
        y[i] = data[i].getClassLabel()==classLabel ? 1.0 : 0;
    }
    
    //In most cases, the training data is grouped into classes (100 samples for class 1, followed by 100 samples for class 2, etc.)
    //This can cause a problem for stochastic gradient descent algorithm. To avoid this issue, we randomly shuffle the order of the
    //training samples. This random order is then used at each epoch.
    for(UINT i=0; i<M; i++){
        randomTrainingOrder[i] = i;
    }
    std::random_shuffle(randomTrainingOrder.begin(), randomTrainingOrder.end());
    
    //Clear any previous training results
    trainingResults.clear();
    trainingResults.reserve( maxNumEpochs );
    TrainingResult epochResult;

    //Run the main stochastic gradient descent training algorithm
    while( keepTraining ){

        //Run one epoch of training using stochastic gradient descent
        errorSum = 0;
        UINT m=0;
        while( m < M ){
          //Get the batch data for this update
          UINT roundSize = m+batchSize < M ? batchSize : M-m;
          batchMean.fill(0.0);
          for(UINT i=0; i<roundSize; i++){
            for(UINT j=0; j<N; j++){
              batchData[i][j] = data[ randomTrainingOrder[m+i] ][j];
              batchMean[j] += batchData[i][j];
            }
          }

          for(UINT j=0; j<N; j++) batchMean[j] /= roundSize;

          //Compute the error on this batch, given the current weights
          error = 0.0;
          for(UINT i=0; i<roundSize; i++){
            error += y[ randomTrainingOrder[m+i] ] - model.compute( batchData[i] );
          }
          error /= roundSize;
          errorSum += error;

          //Update the weights
          for(UINT j=0; j<N; j++){
            model.w[j] += learningRate  * error * batchMean[j];
          }
          model.w0 += learningRate  * error;

          m += roundSize;
        }

        //Compute the error
        delta = fabs( errorSum-lastErrorSum );
        lastErrorSum = errorSum;

        //Check to see if we should stop
        if( delta <= minChange ){
            keepTraining = false;
        }
        
        if( ++iter >= maxNumEpochs ){
            keepTraining = false;
        }
        
        trainingLog << "Class: " << classLabel << " Epoch: " << iter << " TotalError: " << errorSum << " Delta: " << delta << std::endl;
        epochResult.setClassificationResult( iter, errorSum, this );
        trainingResults.push_back( epochResult );
    }
    
    return true;
}
Esempio n. 20
0
bool GMM::train_(ClassificationData &trainingData){
    
    //Clear any old models
	clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data is empty!" << std::endl;
        return false;
    }
    
    //Set the number of features and number of classes and resize the models buffer
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();
    models.resize(numClasses);
    
    if( numInputDimensions >= 6 ){
        warningLog << "train_(ClassificationData &trainingData) - The number of features in your training data is high (" << numInputDimensions << ").  The GMMClassifier does not work well with high dimensional data, you might get better results from one of the other classifiers." << std::endl;
    }
    
    //Get the ranges of the training data if the training data is going to be scaled
	ranges = trainingData.getRanges();
    if( !trainingData.scale(GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE) ){
        errorLog << "train_(ClassificationData &trainingData) - Failed to scale training data!" << std::endl;
        return false;
    }

    //Fit a Mixture Model to each class (independently)
    for(UINT k=0; k<numClasses; k++){
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        ClassificationData classData = trainingData.getClassData( classLabel );
        
        //Train the Mixture Model for this class
        GaussianMixtureModels gaussianMixtureModel;
        gaussianMixtureModel.setNumClusters( numMixtureModels );
        gaussianMixtureModel.setMinChange( minChange );
        gaussianMixtureModel.setMaxNumEpochs( maxIter );
        
        if( !gaussianMixtureModel.train( classData.getDataAsMatrixFloat() ) ){
            errorLog << "train_(ClassificationData &trainingData) - Failed to train Mixture Model for class " << classLabel << std::endl;
            return false;
        }
        
        //Setup the model container
        models[k].resize( numMixtureModels );
        models[k].setClassLabel( classLabel );
        
        //Store the mixture model in the container
        for(UINT j=0; j<numMixtureModels; j++){
            models[k][j].mu = gaussianMixtureModel.getMu().getRowVector(j);
            models[k][j].sigma = gaussianMixtureModel.getSigma()[j];
            
            //Compute the determinant and invSigma for the realtime prediction
            LUDecomposition ludcmp( models[k][j].sigma );
            if( !ludcmp.inverse( models[k][j].invSigma ) ){
                models.clear();
                errorLog << "train_(ClassificationData &trainingData) - Failed to invert Matrix for class " << classLabel << "!" << std::endl;
                return false;
            }
            models[k][j].det = ludcmp.det();
        }
        
        //Compute the normalize factor
        models[k].recomputeNormalizationFactor();
        
        //Compute the rejection thresholds
        Float mu = 0;
        Float sigma = 0;
        VectorFloat predictionResults(classData.getNumSamples(),0);
        for(UINT i=0; i<classData.getNumSamples(); i++){
            VectorFloat sample = classData[i].getSample();
            predictionResults[i] = models[k].computeMixtureLikelihood( sample );
            mu += predictionResults[i];
        }
        
        //Update mu
        mu /= Float( classData.getNumSamples() );
        
        //Calculate the standard deviation
        for(UINT i=0; i<classData.getNumSamples(); i++) 
            sigma += grt_sqr( (predictionResults[i]-mu) );
        sigma = grt_sqrt( sigma / (Float(classData.getNumSamples())-1.0) );
        sigma = 0.2;
        
        //Set the models training mu and sigma 
        models[k].setTrainingMuAndSigma(mu,sigma);
        
        if( !models[k].recomputeNullRejectionThreshold(nullRejectionCoeff) && useNullRejection ){
            warningLog << "train_(ClassificationData &trainingData) - Failed to recompute rejection threshold for class " << classLabel << " - the nullRjectionCoeff value is too high!" << std::endl;
        }
        
        //cout << "Training Mu: " << mu << " TrainingSigma: " << sigma << " RejectionThreshold: " << models[k].getNullRejectionThreshold() << std::endl;
        //models[k].printModelValues();
    }
    
    //Reset the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = models[k].getClassLabel();
    }
    
    //Resize the rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        nullRejectionThresholds[k] = models[k].getNullRejectionThreshold();
    }
    
    //Flag that the models have been trained
    trained = true;
    
    return true;
}
Esempio n. 21
0
bool KNN::train_(ClassificationData &trainingData){
    
    //Clear any previous models
    clear();
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl;
        return false;
    }
    
    //Get the ranges of the data
    ranges = trainingData.getRanges();
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }
    
    //Store the number of features, classes and the training data
    this->numInputDimensions = trainingData.getNumDimensions();
    this->numClasses = trainingData.getNumClasses();
    
    //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction
    this->trainingData = trainingData;
    
    //Set the class labels
    classLabels.resize(numClasses);
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
    }

    //If we do not need to search for the best K value, then call the sub training function and return the result
	if( !searchForBestKValue ){
        return train_(trainingData,K);
    }

    //If we have got this far then we are going to search for the best K value
    UINT index = 0;
    double bestAccuracy = 0;
    vector< IndexedDouble > trainingAccuracyLog;

    for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){
        //Randomly spilt the data and use 80% to train the algorithm and 20% to test it
        ClassificationData trainingSet(trainingData);
        ClassificationData testSet = trainingSet.partition(80,true);

        if( !train_(trainingSet, k) ){
            errorLog << "Failed to train model for a k value of " << k << endl;
        }else{

            //Compute the classification error
            double accuracy = 0;
            for(UINT i=0; i<testSet.getNumSamples(); i++){

                VectorDouble sample = testSet[i].getSample();

                if( !predict( sample , k) ){
                    errorLog << "Failed to predict label for test sample with a k value of " << k << endl;
                    return false;
                }

                if( testSet[i].getClassLabel() == predictedClassLabel ){
                    accuracy++;
                }
            }

            accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0;
            trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) );
			
			trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl;

            if( accuracy > bestAccuracy ){
                bestAccuracy = accuracy;
            }

            index++;
        }

    }

    if( bestAccuracy > 0 ){
        //Sort the training log by value
        std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending);

        //Copy the top matching values into a temporary buffer
        vector< IndexedDouble > tempLog;

        //Add the first value
        tempLog.push_back( trainingAccuracyLog[0] );

        //Keep adding values until the value changes
        for(UINT i=1; i<trainingAccuracyLog.size(); i++){
            if( trainingAccuracyLog[i].value == tempLog[0].value ){
                tempLog.push_back( trainingAccuracyLog[i] );
            }else break;
        }

        //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy)
        std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending);

		trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl;

        //Use the minimum index, this should give us the best accuracy with the minimum K value
        //We now need to train the model again to make sure all the training metrics are computed correctly
        return train_(trainingData,tempLog[0].index);
    }

    return false;
}
Esempio n. 22
0
bool RandomForests::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << std::endl;
        return false;
    }

    if( bootstrappedDatasetWeight <= 0.0 || bootstrappedDatasetWeight > 1.0 ){
        errorLog << "train_(ClassificationData &trainingData) - Bootstrapped Dataset Weight must be [> 0.0 and <= 1.0]" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numClasses = K;
    classLabels = trainingData.getClassLabels();
    ranges = trainingData.getRanges();
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationSetAccuracy = 0;
        validationSetPrecision.resize( useNullRejection ? K+1 : K, 0 );
        validationSetRecall.resize( useNullRejection ? K+1 : K, 0 );
    }
    
    //Flag that the main algorithm has been trained encase we need to trigger any callbacks
    trained = true;
    
    //Train the random forest
    forest.reserve( forestSize );

    for(UINT i=0; i<forestSize; i++){
        
        //Get a balanced bootstrapped dataset
        UINT datasetSize = (UINT)(trainingData.getNumSamples() * bootstrappedDatasetWeight);
        ClassificationData data = trainingData.getBootstrappedDataset( datasetSize, true );

        Timer timer;
        timer.start();
 
        DecisionTree tree;
        tree.setDecisionTreeNode( *decisionTreeNode );
        tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again
        tree.setUseValidationSet( useValidationSet );
        tree.setValidationSetSize( validationSetSize );
        tree.setTrainingMode( trainingMode );
        tree.setNumSplittingSteps( numRandomSplits );
        tree.setMinNumSamplesPerNode( minNumSamplesPerNode );
        tree.setMaxDepth( maxDepth );
        tree.enableNullRejection( useNullRejection );
        tree.setRemoveFeaturesAtEachSpilt( removeFeaturesAtEachSpilt );

        trainingLog << "Training decision tree " << i+1 << "/" << forestSize << "..." << std::endl;
        
        //Train this tree
        if( !tree.train_( data ) ){
            errorLog << "train_(ClassificationData &trainingData) - Failed to train tree at forest index: " << i << std::endl;
            clear();
            return false;
        }

        Float computeTime = timer.getMilliSeconds();
        trainingLog << "Decision tree trained in " << (computeTime*0.001)/60.0 << " minutes" << std::endl;

        if( useValidationSet ){
            Float forestNorm = 1.0 / forestSize;
            validationSetAccuracy += tree.getValidationSetAccuracy();
            VectorFloat precision = tree.getValidationSetPrecision();
            VectorFloat recall = tree.getValidationSetRecall();

            grt_assert( precision.getSize() == validationSetPrecision.getSize() );
            grt_assert( recall.getSize() == validationSetRecall.getSize() );

            for(UINT i=0; i<validationSetPrecision.getSize(); i++){
                validationSetPrecision[i] += precision[i] * forestNorm;
            }

            for(UINT i=0; i<validationSetRecall.getSize(); i++){
                validationSetRecall[i] += recall[i] * forestNorm;
            }

        }
        
        //Deep copy the tree into the forest
        forest.push_back( tree.deepCopyTree() );
    }

    if( useValidationSet ){
        validationSetAccuracy /= forestSize;
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;

        trainingLog << "Validation set precision: ";
        for(UINT i=0; i<validationSetPrecision.getSize(); i++){
            trainingLog << validationSetPrecision[i] << " ";
        }
        trainingLog << std::endl;

        trainingLog << "Validation set recall: ";
        for(UINT i=0; i<validationSetRecall.getSize(); i++){
            trainingLog << validationSetRecall[i] << " ";
        }
        trainingLog << std::endl;
    }

    return true;
}
Esempio n. 23
0
bool Softmax::train_(ClassificationData &trainingData){
    
    //Clear any previous model
    clear();
    
    const unsigned int M = trainingData.getNumSamples();
    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    
    if( M == 0 ){
        errorLog << __GRT_LOG__ << " Training data has zero samples!" << std::endl;
        return false;
    }
    
    numInputDimensions = N;
    numOutputDimensions = K;
    numClasses = K;
    models.resize(K);
    classLabels.resize(K);
    ranges = trainingData.getRanges();
    ClassificationData validationData;
    
    //Scale the training data if needed
    if( useScaling ){
        //Scale the training data between 0 and 1
        trainingData.scale(0, 1);
    }

    if( useValidationSet ){
        validationData = trainingData.split( 100-validationSetSize );
    }
    
    //Train a regression model for each class in the training data
    for(UINT k=0; k<numClasses; k++){
        
        //Set the class label
        classLabels[k] = trainingData.getClassTracker()[k].classLabel;
        
        //Train the model
        if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){
            errorLog << __GRT_LOG__ << " Failed to train model for class: " << classLabels[k] << std::endl;
                return false;
        }
    }

    //Flag that the models have been trained
    trained = true;
    converged = true;

    //Compute the final training stats
    trainingSetAccuracy = 0;
    validationSetAccuracy = 0;

    //If scaling was on, then the data will already be scaled, so turn it off temporially so we can test the model accuracy
    bool scalingState = useScaling;
    useScaling = false;
    if( !computeAccuracy( trainingData, trainingSetAccuracy ) ){
        trained = false;
        converged = false;
        errorLog << __GRT_LOG__ << " Failed to compute training set accuracy! Failed to fully train model!" << std::endl;
        return false;
    }
    
    if( useValidationSet ){
        if( !computeAccuracy( validationData, validationSetAccuracy ) ){
            trained = false;
            converged = false;
            errorLog << __GRT_LOG__ << " Failed to compute validation set accuracy! Failed to fully train model!" << std::endl;
            return false;
        }
    }

    trainingLog << "Training set accuracy: " << trainingSetAccuracy << std::endl;

    if( useValidationSet ){
        trainingLog << "Validation set accuracy: " << validationSetAccuracy << std::endl;
    }

    //Reset the scaling state for future prediction
    useScaling = scalingState;

    return trained;
}
Esempio n. 24
0
bool train( CommandLineParser &parser ){

    infoLog << "Training regression model..." << endl;

    string trainDatasetFilename = "";
    string modelFilename = "";

    //Get the filename
    if( !parser.get("filename",trainDatasetFilename) ){
        errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl;
        printHelp();
        return false;
    }

    //Get the model filename
    parser.get("model-filename",modelFilename);

    //Load the training data to train the model
    ClassificationData trainingData;

    infoLog << "- Loading Training Data..." << endl;
    if( !trainingData.load( trainDatasetFilename ) ){
        errorLog << "Failed to load training data!\n";
        return false;
    }

    const unsigned int N = trainingData.getNumDimensions();
    const unsigned int K = trainingData.getNumClasses();
    infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl;
    infoLog << "- Num input dimensions: " << N << endl;
    infoLog << "- Num classes: " << K << endl;
    
    float learningRate = 0;
    float minChange = 0;
    unsigned int maxEpoch = 0;
    unsigned int batchSize = 0;

    parser.get( "learning-rate", learningRate );
    parser.get( "min-change", minChange );
    parser.get( "max-epoch", maxEpoch );
    parser.get( "batch-size", batchSize );

    infoLog << "Softmax settings: learning-rate: " << learningRate << " min-change: " << minChange << " max-epoch: " << maxEpoch << " batch-size: " << batchSize << endl;

    //Create a new softmax instance
    bool enableScaling = true;
    Softmax classifier(enableScaling,learningRate,minChange,maxEpoch,batchSize);

    //Create a new pipeline that will hold the classifier
    GestureRecognitionPipeline pipeline;

    //Add the classifier to the pipeline
    pipeline << classifier;

    infoLog << "- Training model...\n";

    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        errorLog << "Failed to train model!" << endl;
        return false;
    }

    infoLog << "- Model trained!" << endl;

    infoLog << "- Saving model to: " << modelFilename << endl;

    //Save the pipeline
    if( pipeline.save( modelFilename ) ){
        infoLog << "- Model saved." << endl;
    }else warningLog << "Failed to save model to file: " << modelFilename << endl;

    infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl;
    
    string logFilename = "";
    if( parser.get( "log-filename", logFilename ) && logFilename.length() > 0 ){
      infoLog << "Writing training log to: " << logFilename << endl;

      fstream logFile( logFilename.c_str(), fstream::out );

      if( !logFile.is_open() ){
        errorLog << "Failed to open training log file: " << logFilename << endl;
        return false;
      }

      Vector< TrainingResult > trainingResults = pipeline.getTrainingResults();

      for(UINT i=0; i<trainingResults.getSize(); i++){
        logFile << trainingResults[i].getTrainingIteration() << "\t" << trainingResults[i].getAccuracy() << endl;
      }

      logFile.close();
    }

    return true;
}
Esempio n. 25
0
File: LDA.cpp Progetto: CV-IP/grt
bool LDA::train(ClassificationData trainingData){
    
    errorLog << "SORRY - this module is still under development and can't be used yet!" << std::endl;
    return false;
    
    //Reset any previous model
    numInputDimensions = 0;
    numClasses = 0;
    models.clear();
    classLabels.clear();
    trained = false;
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledClassificationData trainingData) - There is no training data to train the model!" << std::endl;
        return false;
    }
    
    numInputDimensions = trainingData.getNumDimensions();
    numClasses = trainingData.getNumClasses();

	//Calculate the between scatter matrix
	MatrixFloat SB = computeBetweenClassScatterMatrix( trainingData );
	
	//Calculate the within scatter matrix
	MatrixFloat SW = computeWithinClassScatterMatrix( trainingData );


   /*

    
    //Counters and stat containers
    vector< UINT > groupLabels(numClasses);
    VectorDouble groupCounters(numClasses);
    VectorDouble priorProb(numClasses);
    MatrixFloat groupMeans(numClasses,numFeatures);
    MatrixFloat pCov(numFeatures,numFeatures);
    MatrixFloat pCovInv(numFeatures,numFeatures);
    MatrixFloat modelCoeff(numClasses,numFeatures+1);
    
    pCov.setAllValues(0);
    modelCoeff.setAllValues(0);
    
    //Set the class labels and counters
    for(UINT k=0; k<numClasses; k++){
        groupLabels[k] = trainingData.getClassTracker()[k].classLabel;
        groupCounters[k] = trainingData.getClassTracker()[k].counter;
    }
    
    //Loop over the classes to compute the group stats
    for(UINT k=0; k<numClasses; k++){
        LabelledClassificationData classData = trainingData.getClassData( groupLabels[k] );
        MatrixFloat cov(numFeatures,numFeatures);
        
        //Compute class mu
        for(UINT j=0; j<numFeatures; j++){
            groupMeans[k][j] = 0;
            for(UINT i=0; i<classData.getNumSamples(); i++){
                groupMeans[k][j] += classData[i][j];
            }
            groupMeans[k][j] /= Float(classData.getNumSamples());
        }
        
        //Compute the class covariance
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                cov[m][n] = 0;
                for(UINT i=0; i<classData.getNumSamples(); i++){
                    cov[m][n] += (classData[i][m]-groupMeans[k][m]) * (classData[i][n]-groupMeans[k][n]);
                }
                cov[m][n] /= Float(classData.getNumSamples()-1);
            }
        }
        
        debugLog << "Group Cov:\n";
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                debugLog << cov[m][n] << "\t";
            }debugLog << "\n";
        }debugLog << std::endl;
        
        //Set the prior probability for this class (which is just 1/numClasses)
        priorProb[k] = 1.0/Float(numClasses);
        
        //Update the main covariance matrix
        Float weight = ((classData.getNumSamples() - 1) / Float(trainingData.getNumSamples() - numClasses) );
        debugLog << "Weight: " << weight << std::endl;
        for(UINT m=0; m<numFeatures; m++){
            for(UINT n=0; n<numFeatures; n++){
                pCov[m][n] += weight * cov[m][n];
            }
        }
    }
    
    for(UINT k=0; k<numClasses; k++){
        debugLog << "GroupMu: " << groupLabels[k] << "\t";
        for(UINT j=0; j<numFeatures; j++){
            debugLog << groupMeans[k][j] << "\t";
        }debugLog << std::endl;
    }
    
    debugLog << "pCov:\n";
    for(UINT m=0; m<numFeatures; m++){
        for(UINT n=0; n<numFeatures; n++){
            debugLog << pCov[m][n] << "\t";
        }debugLog << "\n";
    }debugLog << std::endl;
    
    //Invert the pCov matrix
    LUDecomposition matrixInverter(pCov);
    if( !matrixInverter.inverse(pCovInv) ){
        errorLog << "Failed to invert pCov Matrix!" << std::endl;
        return false;
    }
    
    //Loop over classes to calculate linear discriminant coefficients
    Float sum = 0;
    vector< Float > temp(numFeatures);
    for(UINT k=0; k<numClasses; k++){
        //Compute the temporary vector
        for(UINT j=0; j<numFeatures; j++){
            temp[j] = 0;
            for(UINT m=0; m<numFeatures; m++){
                    temp[j] += groupMeans[k][m] * pCovInv[m][j];
            }
        }
        
        //Compute the model coefficients
        sum = 0;
        for(UINT j=0; j<numFeatures; j++){
            sum += temp[j]*groupMeans[k][j];
        }
        modelCoeff[k][0] = -0.5 * sum + log( priorProb[k] );
        
        for(UINT j=0; j<numFeatures; j++){
            modelCoeff[k][j+1] = temp[j];
        }
    }
    
    //Setup the models for realtime prediction
    models.resize(numClasses);
    classLabels.resize(numClasses);
    
    for(UINT k=0; k<numClasses; k++){
        classLabels[k] = groupLabels[k];
        models[k].classLabel = groupLabels[k];
        models[k].priorProb = priorProb[k];
        models[k].weights = modelCoeff.getRowVector(k);
    }
    
    //Flag that the models were successfully trained
    trained = true;
    */
    
    return true;
}
Esempio n. 26
0
bool train( CommandLineParser &parser ){

    string trainDatasetFilename = "";
    string modelFilename = "";
    unsigned int forestSize = 0;
    unsigned int maxDepth = 0;
    unsigned int minNodeSize = 0;
    unsigned int numSplits = 0;
    bool removeFeatures = false;
    double bootstrapWeight = 0.0;

    //Get the filename
    if( !parser.get("filename",trainDatasetFilename) ){
        errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl;
        printUsage();
        return false;
    }

    //Get the model filename
    parser.get("model-filename",modelFilename);

    //Get the forest size
    parser.get("forest-size",forestSize);

    //Get the max depth
    parser.get("max-depth",maxDepth);

    //Get the min node size
    parser.get("min-node-size",minNodeSize);

    //Get the number of random splits
    parser.get("num-splits",numSplits);
    
    //Get the remove features
    parser.get("remove-features",removeFeatures);
   
    //Get the bootstrap weight 
    parser.get("bootstrap-weight",bootstrapWeight);

    //Load some training data to train the classifier
    ClassificationData trainingData;

    infoLog << "- Loading Training Data..." << endl;
    if( !trainingData.load( trainDatasetFilename ) ){
        errorLog << "Failed to load training data!\n";
        return false;
    }

    const unsigned int N = trainingData.getNumDimensions();
    Vector< ClassTracker > tracker = trainingData.getClassTracker();
    infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl;
    infoLog << "- Num dimensions: " << N << endl;
    infoLog << "- Num classes: " << trainingData.getNumClasses() << endl;
    infoLog << "- Class stats: " << endl;
    for(unsigned int i=0; i<tracker.getSize(); i++){
        infoLog << "- class " << tracker[i].classLabel << " number of samples: " << tracker[i].counter << endl;
    }
    
    //Create a new RandomForests instance
    RandomForests forest;

    //Set the decision tree node that will be used for each tree in the forest
    string nodeType = "cluster-node"; //TODO: make this a command line option in the future
    if( nodeType == "cluster-node" ){
        forest.setDecisionTreeNode( DecisionTreeClusterNode() );
    }
    if( nodeType == "threshold-node" ){
        forest.setTrainingMode( Tree::BEST_RANDOM_SPLIT );
        forest.setDecisionTreeNode( DecisionTreeThresholdNode() );
    }

    //Set the number of trees in the forest
    forest.setForestSize( forestSize );

    //Set the maximum depth of the tree
    forest.setMaxDepth( maxDepth );

    //Set the minimum number of samples allowed per node
    forest.setMinNumSamplesPerNode( minNodeSize );

    //Set the number of random splits used per node
    forest.setNumRandomSplits( numSplits );

    //Set if selected features should be removed at each node
    forest.setRemoveFeaturesAtEachSplit( removeFeatures );

    //Set the bootstrap weight
    forest.setBootstrappedDatasetWeight( bootstrapWeight );

    //Add the classifier to a pipeline
    GestureRecognitionPipeline pipeline;
    pipeline.setClassifier( forest );

    infoLog << "- Training model..." << endl;

    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        errorLog << "Failed to train classifier!" << endl;
        return false;
    }

    infoLog << "- Model trained!" << endl;
    infoLog << "- Training time: " << (pipeline.getTrainingTime() * 0.001) / 60.0 << " (minutes)" << endl;
    infoLog << "- Saving model to: " << modelFilename << endl;

    //Save the pipeline
    if( !pipeline.save( modelFilename ) ){
        warningLog << "Failed to save model to file: " << modelFilename << endl;
    } 

    return true;
}