Ejemplo n.º 1
0
bool ClassificationData::merge(const ClassificationData &otherData){

    if( otherData.getNumDimensions() != numDimensions ){
        errorLog << "merge(const ClassificationData &labelledData) - The number of dimensions in the labelledData (" << otherData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << std::endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    const UINT M = otherData.getNumSamples();
    
    //Reserve the memory
    reserve( getNumSamples() + M );

    //Add the data from the labelledData to this instance
    
    for(UINT i=0; i<M; i++){
        addSample(otherData[i].getClassLabel(), otherData[i].getSample());
    }

    //Set the class names from the dataset
    Vector< ClassTracker > classTracker = otherData.getClassTracker();
    for(UINT i=0; i<classTracker.getSize(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    //Sort the class labels
    sortClassLabels();

    return true;
}
bool LabelledClassificationData::loadDatasetFromCSVFile(string filename,UINT classLabelColumnIndex){

    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(string filename) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 1 ){
        errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have enough columns! It should contain at least two columns!" << endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-1;
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    VectorDouble sample(numDimensions);
    for(UINT i=0; i<parser.getRowSize(); i++){
        //Get the class label
        classLabel = Util::stringToInt( parser[i][classLabelColumnIndex] );
        
        //Get the sample data
        j=0;
        n=0;
        while( j != numDimensions ){
            if( n != classLabelColumnIndex ){
                sample[j++] = Util::stringToDouble( parser[i][n] );
            }
            n++;
        }
        
        //Add the labelled sample to the dataset
        if( !addSample(classLabel, sample) ){
            warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl;
        }
    }

	sortClassLabels();
    
    return true;
}
Ejemplo n.º 3
0
bool ClassificationData::addSample(const UINT classLabel,const VectorFloat &sample){
    
	if( sample.getSize() != numDimensions ){
        if( totalNumSamples == 0 ){
            warningLog << "addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.getSize() << ") does not match the number of dimensions of the dataset (" << numDimensions << "), setting dimensionality to: " << numDimensions << std::endl;
            numDimensions = sample.getSize();
        }else{
            errorLog << "addSample(const UINT classLabel, VectorFloat &sample) - the size of the new sample (" << sample.getSize() << ") does not match the number of dimensions of the dataset (" << numDimensions << ")" << std::endl;
            return false;
        }
    }

    //The class label must be greater than zero (as zero is used for the null rejection class label
    if( classLabel == GRT_DEFAULT_NULL_CLASS_LABEL && !allowNullGestureClass ){
        errorLog << "addSample(const UINT classLabel, VectorFloat &sample) - the class label can not be 0!" << std::endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

	ClassificationSample newSample(classLabel,sample);
	data.push_back( newSample );
	totalNumSamples++;

	if( classTracker.getSize() == 0 ){
		ClassTracker tracker(classLabel,1);
		classTracker.push_back(tracker);
	}else{
		bool labelFound = false;
		for(UINT i=0; i<classTracker.getSize(); i++){
			if( classLabel == classTracker[i].classLabel ){
				classTracker[i].counter++;
				labelFound = true;
				break;
			}
		}
		if( !labelFound ){
			ClassTracker tracker(classLabel,1);
			classTracker.push_back(tracker);
		}
	}

    //Update the class labels
    sortClassLabels();

	return true;
}
Ejemplo n.º 4
0
bool ClassificationData::relabelAllSamplesWithClassLabel(const UINT oldClassLabel,const UINT newClassLabel){
    bool oldClassLabelFound = false;
    bool newClassLabelAllReadyExists = false;
    UINT indexOfOldClassLabel = 0;
    UINT indexOfNewClassLabel = 0;

    //Find out how many training examples we need to relabel
    for(UINT i=0; i<classTracker.getSize(); i++){
        if( classTracker[i].classLabel == oldClassLabel ){
            indexOfOldClassLabel = i;
            oldClassLabelFound = true;
        }
        if( classTracker[i].classLabel == newClassLabel ){
            indexOfNewClassLabel = i;
            newClassLabelAllReadyExists = true;
        }
    }

    //If the old class label was not found then we can't do anything
    if( !oldClassLabelFound ){
        return false;
    }

    //Relabel the old class labels
    for(UINT i=0; i<totalNumSamples; i++){
        if( data[i].getClassLabel() == oldClassLabel ){
            data[i].setClassLabel(newClassLabel);
        }
    }

    //Update the class tracler
    if( newClassLabelAllReadyExists ){
        //Add the old sample count to the new sample count
        classTracker[ indexOfNewClassLabel ].counter += classTracker[ indexOfOldClassLabel ].counter;
    }else{
        //Create a new class tracker
        classTracker.push_back( ClassTracker(newClassLabel,classTracker[ indexOfOldClassLabel ].counter,classTracker[ indexOfOldClassLabel ].className) );
    }

    //Erase the old class tracker
    classTracker.erase( classTracker.begin() + indexOfOldClassLabel );

    //Sort the class labels
    sortClassLabels();

    return true;
}
Ejemplo n.º 5
0
bool ClassificationData::addClass(const UINT classLabel,const std::string className){
    
    //Check to make sure the class label does not exist
    for(UINT i=0; i<classTracker.size(); i++){
        if( classTracker[i].classLabel == classLabel ){
            return false;
        }
    }
    
    //Add the class label to the class tracker
    classTracker.push_back( ClassTracker(classLabel,0,className) );
    
    //Sort the class labels
    sortClassLabels();
    
    return true;
}
Ejemplo n.º 6
0
bool ClassificationData::addClass(const UINT classLabel,const std::string className){
    
    //Check to make sure the class label does not exist
    for(size_t i=0; i<classTracker.getSize(); i++){
        if( classTracker[i].classLabel == classLabel ){
            warningLog << "addClass(const UINT classLabel,const std::string className) - Failed to add class, it already exists! Class label: " << classLabel << std::endl;
            return false;
        }
    }
    
    //Add the class label to the class tracker
    classTracker.push_back( ClassTracker(classLabel,0,className) );
    
    //Sort the class labels
    sortClassLabels();
    
    return true;
}
Ejemplo n.º 7
0
ClassificationData ClassificationData::split(const UINT trainingSizePercentage,const bool useStratifiedSampling){

    //Partitions the dataset into a training dataset (which is kept by this instance of the ClassificationData) and
	//a testing/validation dataset (which is return as a new instance of the ClassificationData).  The trainingSizePercentage
	//therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to
	//the testing/validation dataset

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    ClassificationData trainingSet(numDimensions);
    ClassificationData testSet(numDimensions);
    trainingSet.setAllowNullGestureClass( allowNullGestureClass );
    testSet.setAllowNullGestureClass( allowNullGestureClass );

	//Create the random partion indexs
	Random random;
    UINT randomIndex = 0;
    UINT K = getNumClasses();

    if( useStratifiedSampling ){
        //Break the data into seperate classes
        Vector< Vector< UINT > > classData( K );

        //Add the indexs to their respective classes
        for(UINT i=0; i<totalNumSamples; i++){
            classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
        }

        //Randomize the order of the indexs in each of the class index buffers
        for(UINT k=0; k<K; k++){
            std::random_shuffle(classData[k].begin(), classData[k].end());
        }
        
        //Reserve the memory
        UINT numTrainingSamples = 0;
        UINT numTestSamples = 0;
        
        for(UINT k=0; k<K; k++){
            UINT numTrainingExamples = (UINT) floor( Float(classData[k].size()) / 100.0 * Float(trainingSizePercentage) );
            UINT numTestExamples = ((UINT)classData[k].size())-numTrainingExamples;
            numTrainingSamples += numTrainingExamples;
            numTestSamples += numTestExamples;
        }
        
        trainingSet.reserve( numTrainingSamples );
        testSet.reserve( numTestSamples );

        //Loop over each class and add the data to the trainingSet and testSet
        for(UINT k=0; k<K; k++){
            UINT numTrainingExamples = (UINT) floor( Float(classData[k].getSize()) / 100.0 * Float(trainingSizePercentage) );

            //Add the data to the training and test sets
            for(UINT i=0; i<numTrainingExamples; i++){
                trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
            }
            for(UINT i=numTrainingExamples; i<classData[k].getSize(); i++){
                testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
            }
        }
    }else{

        const UINT numTrainingExamples = (UINT) floor( Float(totalNumSamples) / 100.0 * Float(trainingSizePercentage) );

        //Create the random partion indexs
        UINT randomIndex = 0;
        Vector< UINT > indexs( totalNumSamples );
        for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
        std::random_shuffle(indexs.begin(), indexs.end());
        
        //Reserve the memory
        trainingSet.reserve( numTrainingExamples );
        testSet.reserve( totalNumSamples-numTrainingExamples );

        //Add the data to the training and test sets
        for(UINT i=0; i<numTrainingExamples; i++){
            trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
        }
        for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
            testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
        }
    }

    //Overwrite the training data in this instance with the training data of the trainingSet
    *this = trainingSet;

    //Sort the class labels in this dataset
    sortClassLabels();

    //Sort the class labels of the test dataset
    testSet.sortClassLabels();

	return testSet;
}
Ejemplo n.º 8
0
bool ClassificationData::loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex){

    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;

    Timer timer;

    timer.start();
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 1 ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-1;

    timer.start();

    //Reserve the memory for the data
    data.resize( parser.getRowSize(), ClassificationSample(numDimensions) );

    timer.start();
   
    //Loop over the samples and add them to the data set
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    totalNumSamples = parser.getRowSize();
    for(UINT i=0; i<totalNumSamples; i++){
        //Get the class label
        classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] );
        
        //Set the class label
        data[i].setClassLabel( classLabel );
        
        //Get the sample data
        j=0;
        n=0;
        while( j != numDimensions ){
            if( n != classLabelColumnIndex ){
                data[i][j++] = grt_from_str< Float >( parser[i][n] );
            }
            n++;
        }
        
        //Update the class tracker
        if( classTracker.size() == 0 ){
            ClassTracker tracker(classLabel,1);
            classTracker.push_back(tracker);
        }else{
            bool labelFound = false;
            const size_t numClasses = classTracker.size();
            for(size_t i=0; i<numClasses; i++){
                if( classLabel == classTracker[i].classLabel ){
                    classTracker[i].counter++;
                    labelFound = true;
                    break;
                }
            }
            if( !labelFound ){
                ClassTracker tracker(classLabel,1);
                classTracker.push_back(tracker);
            }
        }
    }

    //Sort the class labels
    sortClassLabels();
    
    return true;
}
Ejemplo n.º 9
0
bool ClassificationData::loadDatasetFromFile(const std::string &filename){

	std::fstream file;
	file.open(filename.c_str(), std::ios::in);
	UINT numClasses = 0;
	clear();

	if( !file.is_open() ){
        errorLog << "loadDatasetFromFile(const std::string &filename) - could not open file!" << std::endl;
		return false;
	}

	std::string word;

	//Check to make sure this is a file with the Training File Format
	file >> word;
	if(word != "GRT_LABELLED_CLASSIFICATION_DATA_FILE_V1.0"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - could not find file header!" << std::endl;
		file.close();
		return false;
	}

    //Get the name of the dataset
	file >> word;
	if(word != "DatasetName:"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find DatasetName header!" << std::endl;
        errorLog << word << std::endl;
		file.close();
		return false;
	}
	file >> datasetName;

    file >> word;
	if(word != "InfoText:"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find InfoText header!" << std::endl;
		file.close();
		return false;
	}

    //Load the info text
    file >> word;
    infoText = "";
    while( word != "NumDimensions:" ){
        infoText += word + " ";
        file >> word;
    }

	//Get the number of dimensions in the training data
	if( word != "NumDimensions:" ){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find NumDimensions header!" << std::endl;
		file.close();
		return false;
	}
	file >> numDimensions;

	//Get the total number of training examples in the training data
	file >> word;
	if( word != "TotalNumTrainingExamples:" && word != "TotalNumExamples:" ){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find TotalNumTrainingExamples header!" << std::endl;
		file.close();
		return false;
	}
	file >> totalNumSamples;

	//Get the total number of classes in the training data
	file >> word;
	if(word != "NumberOfClasses:"){
        errorLog << "loadDatasetFromFile(string filename) - failed to find NumberOfClasses header!" << std::endl;
		file.close();
		return false;
	}
	file >> numClasses;

	//Resize the class counter buffer and load the counters
	classTracker.resize(numClasses);

	//Get the total number of classes in the training data
	file >> word;
	if(word != "ClassIDsAndCounters:"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find ClassIDsAndCounters header!" << std::endl;
		file.close();
		return false;
	}

	for(UINT i=0; i<classTracker.getSize(); i++){
		file >> classTracker[i].classLabel;
		file >> classTracker[i].counter;
        file >> classTracker[i].className;
	}

    //Check if the dataset should be scaled using external ranges
	file >> word;
	if(word != "UseExternalRanges:"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find UseExternalRanges header!" << std::endl;
		file.close();
		return false;
	}
    file >> useExternalRanges;

    //If we are using external ranges then load them
    if( useExternalRanges ){
        externalRanges.resize(numDimensions);
        for(UINT i=0; i<externalRanges.getSize(); i++){
            file >> externalRanges[i].minValue;
            file >> externalRanges[i].maxValue;
        }
    }

	//Get the main training data
	file >> word;
	if( word != "LabelledTrainingData:" && word != "Data:"){
        errorLog << "loadDatasetFromFile(const std::string &filename) - failed to find LabelledTrainingData header!" << std::endl;
		file.close();
		return false;
	}

	ClassificationSample tempSample( numDimensions );
	data.resize( totalNumSamples, tempSample );

	for(UINT i=0; i<totalNumSamples; i++){
        UINT classLabel = 0;
        VectorFloat sample(numDimensions,0);
		file >> classLabel;
		for(UINT j=0; j<numDimensions; j++){
			file >> sample[j];
		}
        data[i].set(classLabel, sample);
	}

	file.close();
	
    //Sort the class labels
    sortClassLabels();
	
	return true;
}
LabelledClassificationData LabelledClassificationData::partition(UINT trainingSizePercentage,bool useStratifiedSampling){

    //Partitions the dataset into a training dataset (which is kept by this instance of the LabelledClassificationData) and
	//a testing/validation dataset (which is return as a new instance of the LabelledClassificationData).  The trainingSizePercentage
	//therefore sets the size of the data which remains in this instance and the remaining percentage of data is then added to
	//the testing/validation dataset

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    LabelledClassificationData trainingSet(numDimensions);
    LabelledClassificationData testSet(numDimensions);
    trainingSet.setAllowNullGestureClass( allowNullGestureClass );
    testSet.setAllowNullGestureClass( allowNullGestureClass );
    vector< UINT > indexs( totalNumSamples );

	//Create the random partion indexs
	Random random;
    UINT randomIndex = 0;

    if( useStratifiedSampling ){
        //Break the data into seperate classes
        vector< vector< UINT > > classData( getNumClasses() );

        //Add the indexs to their respective classes
        for(UINT i=0; i<totalNumSamples; i++){
            classData[ getClassLabelIndexValue( data[i].getClassLabel() ) ].push_back( i );
        }

        //Randomize the order of the indexs in each of the class index buffers
        for(UINT k=0; k<getNumClasses(); k++){
            UINT numSamples = (UINT)classData[k].size();
            for(UINT x=0; x<numSamples; x++){
                //Pick a random index
                randomIndex = random.getRandomNumberInt(0,numSamples);

                //Swap the indexs
                SWAP(classData[k][ x ], classData[k][ randomIndex ]);
            }
        }

        //Loop over each class and add the data to the trainingSet and testSet
        for(UINT k=0; k<getNumClasses(); k++){
            UINT numTrainingExamples = (UINT) floor( double(classData[k].size()) / 100.0 * double(trainingSizePercentage) );

            //Add the data to the training and test sets
            for(UINT i=0; i<numTrainingExamples; i++){
                trainingSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
            }
            for(UINT i=numTrainingExamples; i<classData[k].size(); i++){
                testSet.addSample( data[ classData[k][i] ].getClassLabel(), data[ classData[k][i] ].getSample() );
            }
        }
    }else{

        const UINT numTrainingExamples = (UINT) floor( double(totalNumSamples) / 100.0 * double(trainingSizePercentage) );
        //Create the random partion indexs
        Random random;
        UINT randomIndex = 0;
        for(UINT i=0; i<totalNumSamples; i++) indexs[i] = i;
        for(UINT x=0; x<totalNumSamples; x++){
            //Pick a random index
            randomIndex = random.getRandomNumberInt(0,totalNumSamples);

            //Swap the indexs
            SWAP(indexs[ x ],indexs[ randomIndex ]);
        }

        //Add the data to the training and test sets
        for(UINT i=0; i<numTrainingExamples; i++){
            trainingSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
        }
        for(UINT i=numTrainingExamples; i<totalNumSamples; i++){
            testSet.addSample( data[ indexs[i] ].getClassLabel(), data[ indexs[i] ].getSample() );
        }
    }

    //Overwrite the training data in this instance with the training data of the trainingSet
    *this = trainingSet;

	sortClassLabels();
    testSet.sortClassLabels();

	return testSet;
}