bool TimeSeriesClassificationData::loadDatasetFromCSVFile(const string &filename){
    
    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";
    
    //Clear any previous data
    clear();
    
    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 2 ){
        errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-2;
    
    //Reserve the memory for the data
    data.reserve( parser.getRowSize() );
    
    UINT sampleCounter = 0;
    UINT lastSampleCounter = 0;
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    VectorDouble sample(numDimensions);
    MatrixDouble timeseries;
    for(UINT i=0; i<parser.getRowSize(); i++){
        
        sampleCounter = Util::stringToInt( parser[i][0] );
        
        //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series
        if( sampleCounter != lastSampleCounter && i != 0 ){
            //Add the labelled sample to the dataset
            if( !addSample(classLabel, timeseries) ){
                warningLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << endl;
            }
            timeseries.clear();
        }
        lastSampleCounter = sampleCounter;
        
        //Get the class label
        classLabel = Util::stringToInt( parser[i][1] );
        
        //Get the sample data
        j=0;
        n=2;
        while( j != numDimensions ){
            sample[j++] = Util::stringToDouble( parser[i][n] );
            n++;
        }
        
        //Add the sample to the timeseries
        timeseries.push_back( sample );
    }
	if ( timeseries.getSize() > 0 )
        //Add the labelled sample to the dataset
        if( !addSample(classLabel, timeseries) ){
            warningLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 << " to the dataset!" << endl;
        }
    
    return true;
}
Пример #2
0
int main(int argc, const char * argv[]){
    
    //Load the training data
    TimeSeriesClassificationData trainingData;
    
    if( !trainingData.loadDatasetFromFile("HMMTrainingData.grt") ){
        cout << "ERROR: Failed to load training data!\n";
        return false;
    }
    
    //Remove 20% of the training data to use as test data
    TimeSeriesClassificationData testData = trainingData.partition( 80 );
    
    //The input to the HMM must be a quantized discrete value
    //We therefore use a KMeansQuantizer to covert the N-dimensional continuous data into 1-dimensional discrete data
    const UINT NUM_SYMBOLS = 10;
    KMeansQuantizer quantizer( NUM_SYMBOLS );
    
    //Train the quantizer using the training data
    if( !quantizer.train( trainingData ) ){
        cout << "ERROR: Failed to train quantizer!\n";
        return false;
    }
    
    //Quantize the training data
    TimeSeriesClassificationData quantizedTrainingData( 1 );
    
    for(UINT i=0; i<trainingData.getNumSamples(); i++){
        
        UINT classLabel = trainingData[i].getClassLabel();
        MatrixDouble quantizedSample;
        
        for(UINT j=0; j<trainingData[i].getLength(); j++){
            quantizer.quantize( trainingData[i].getData().getRowVector(j) );
            
            quantizedSample.push_back( quantizer.getFeatureVector() );
        }
        
        if( !quantizedTrainingData.addSample(classLabel, quantizedSample) ){
            cout << "ERROR: Failed to quantize training data!\n";
            return false;
        }
        
    }
    
    //Create a new HMM instance
    HMM hmm;
    
    //Set the number of states in each model
    hmm.setNumStates( 4 );
    
    //Set the number of symbols in each model, this must match the number of symbols in the quantizer
    hmm.setNumSymbols( NUM_SYMBOLS );
    
    //Set the HMM model type to LEFTRIGHT with a delta of 1
    hmm.setModelType( HiddenMarkovModel::LEFTRIGHT );
    hmm.setDelta( 1 );
    
    //Set the training parameters
    hmm.setMinImprovement( 1.0e-5 );
    hmm.setMaxNumIterations( 100 );
    hmm.setNumRandomTrainingIterations( 20 );
    
    //Train the HMM model
    if( !hmm.train( quantizedTrainingData ) ){
        cout << "ERROR: Failed to train the HMM model!\n";
        return false;
    }
    
    //Save the HMM model to a file
    if( !hmm.save( "HMMModel.grt" ) ){
        cout << "ERROR: Failed to save the model to a file!\n";
        return false;
    }
    
    //Load the HMM model from a file
    if( !hmm.load( "HMMModel.grt" ) ){
        cout << "ERROR: Failed to load the model from a file!\n";
        return false;
    }
    
    //Quantize the test data
    TimeSeriesClassificationData quantizedTestData( 1 );
    
    for(UINT i=0; i<testData.getNumSamples(); i++){
        
        UINT classLabel = testData[i].getClassLabel();
        MatrixDouble quantizedSample;
        
        for(UINT j=0; j<testData[i].getLength(); j++){
            quantizer.quantize( testData[i].getData().getRowVector(j) );
            
            quantizedSample.push_back( quantizer.getFeatureVector() );
        }
        
        if( !quantizedTestData.addSample(classLabel, quantizedSample) ){
            cout << "ERROR: Failed to quantize training data!\n";
            return false;
        }
    }
    
    //Compute the accuracy of the HMM models using the test data
    double numCorrect = 0;
    double numTests = 0;
    for(UINT i=0; i<quantizedTestData.getNumSamples(); i++){
        
        UINT classLabel = quantizedTestData[i].getClassLabel();
        hmm.predict( quantizedTestData[i].getData() );
        
        if( classLabel == hmm.getPredictedClassLabel() ) numCorrect++;
        numTests++;
        
        VectorDouble classLikelihoods = hmm.getClassLikelihoods();
        VectorDouble classDistances = hmm.getClassDistances();
        
        cout << "ClassLabel: " << classLabel;
        cout << " PredictedClassLabel: " << hmm.getPredictedClassLabel();
        cout << " MaxLikelihood: " << hmm.getMaximumLikelihood();
        
        cout << "  ClassLikelihoods: ";
        for(UINT k=0; k<classLikelihoods.size(); k++){
            cout << classLikelihoods[k] << "\t";
        }
        
        cout << "ClassDistances: ";
        for(UINT k=0; k<classDistances.size(); k++){
            cout << classDistances[k] << "\t";
        }
        cout << endl;
    }
    
    cout << "Test Accuracy: " << numCorrect/numTests*100.0 << endl;
    
    return true;
}
int main (int argc, const char * argv[])
{
    //Create a new instance of the TimeSeriesClassificationData
    TimeSeriesClassificationData trainingData;
    
    //Set the dimensionality of the data (you need to do this before you can add any samples)
    trainingData.setNumDimensions( 3 );
    
    //You can also give the dataset a name (the name should have no spaces)
    trainingData.setDatasetName("DummyData");
    
    //You can also add some info text about the data
    trainingData.setInfoText("This data contains some dummy timeseries data");
    
    //Here you would record a time series, when you have finished recording the time series then add the training sample to the training data
    UINT gestureLabel = 1;
    MatrixDouble trainingSample;
    
    //For now we will just add 10 x 20 random walk data timeseries
    Random random;
    for(UINT k=0; k<10; k++){//For the number of classes
        gestureLabel = k+1;
        
        //Get the init random walk position for this gesture
        VectorDouble startPos( trainingData.getNumDimensions() );
        for(UINT j=0; j<startPos.size(); j++){
            startPos[j] = random.getRandomNumberUniform(-1.0,1.0);
        }
                
        //Generate the 20 time series
        for(UINT x=0; x<20; x++){
            
            //Clear any previous timeseries
            trainingSample.clear();
            
            //Generate the random walk
            UINT randomWalkLength = random.getRandomNumberInt(90, 110);
            VectorDouble sample = startPos;
            for(UINT i=0; i<randomWalkLength; i++){
                for(UINT j=0; j<startPos.size(); j++){
                    sample[j] += random.getRandomNumberUniform(-0.1,0.1);
                }
                
                //Add the sample to the training sample
                trainingSample.push_back( sample );
            }
            
            //Add the training sample to the dataset
            trainingData.addSample( gestureLabel, trainingSample );
            
        }
    }
    
    //After recording your training data you can then save it to a file
    if( !trainingData.saveDatasetToFile( "TrainingData.txt" ) ){
	    cout << "Failed to save dataset to file!\n";
	    return EXIT_FAILURE;
	}
    
    //This can then be loaded later
    if( !trainingData.loadDatasetFromFile( "TrainingData.txt" ) ){
		cout << "Failed to load dataset from file!\n";
		return EXIT_FAILURE;
	}
    
    //This is how you can get some stats from the training data
    string datasetName = trainingData.getDatasetName();
    string infoText = trainingData.getInfoText();
    UINT numSamples = trainingData.getNumSamples();
    UINT numDimensions = trainingData.getNumDimensions();
    UINT numClasses = trainingData.getNumClasses();
    
    cout << "Dataset Name: " << datasetName << endl;
    cout << "InfoText: " << infoText << endl;
    cout << "NumberOfSamples: " << numSamples << endl;
    cout << "NumberOfDimensions: " << numDimensions << endl;
    cout << "NumberOfClasses: " << numClasses << endl;
    
    //You can also get the minimum and maximum ranges of the data
    vector< MinMax > ranges = trainingData.getRanges();
    
    cout << "The ranges of the dataset are: \n";
    for(UINT j=0; j<ranges.size(); j++){
        cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl;
    }
    
    //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function
    //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset
    TimeSeriesClassificationData testData = trainingData.partition( 80 );
    
    //If you have multiple datasets that you want to merge together then use the merge function
    if( !trainingData.merge( testData ) ){
		cout << "Failed to merge datasets!\n";
		return EXIT_FAILURE;
	}
    
    //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds
    //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used
    if( !trainingData.spiltDataIntoKFolds( 10, true ) ){
		cout << "Failed to spiltDataIntoKFolds!\n";
		return EXIT_FAILURE;
	}
    
    //After you have called the spilt function you can then get the training and test sets for each fold
    for(UINT foldIndex=0; foldIndex<10; foldIndex++){
        TimeSeriesClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex );
        TimeSeriesClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex );
    }
    
    //If need you can clear any training data that you have recorded
    trainingData.clear();
    
    return EXIT_SUCCESS;
}
Пример #4
0
int main() {
    vector<string> gestures(0,"");
    GetFilesInDirectory(gestures, "rawdata");
    CreateDirectory("processed", NULL);
    sort(gestures.begin(), gestures.end());
    data = vector<vector<vector<double > > >(gestures.size(), vector<vector<double > >(0,vector<double>(0,0)));
    for(size_t i = 0; i < gestures.size(); i++) {
        ifstream fin(gestures[i]);
        int n; fin >> n;
       // cerr << gestures[i] << endl;
       // cerr << n << endl;
        data[i] = vector<vector<double> >(n, vector<double>(NUMPARAM, 0));
        for(int j = 0; j < n; j++) {
            for(int k = 0; k < NUMPARAM; k++) {
                fin >> data[i][j][k];
            }
        }
        fin.close();
    }


    //Create a new instance of the TimeSeriesClassificationDataStream
    TimeSeriesClassificationData trainingData;

    // ax, ay, az
    trainingData.setNumDimensions(3);
    trainingData.setDatasetName("processed\\GestureTrainingData.txt");
    ofstream labelfile("processed\\GestureTrainingDataLabels.txt");
    UINT currLabel = 1;
    Random random;
    map<string, int> gesturenames;
    for(size_t overall = 0; overall < gestures.size(); overall++) {

        string nam = gestures[overall].substr(8,gestures[overall].find_first_of('_')-8);
        if(gesturenames.count(nam)) currLabel = gesturenames[nam];
        else {
            currLabel = gesturenames.size()+1;
            gesturenames[nam] = currLabel;
            labelfile << currLabel << " " << nam << endl;
        }
        MatrixDouble trainingSample;
        VectorDouble currVec( trainingData.getNumDimensions() );
        for(size_t k = 1; k < data[overall].size(); k++) {
            for(UINT j=0; j<currVec.size(); j++){
                currVec[j] = data[overall][k][j];
            }
            trainingSample.push_back(currVec);
        }
        trainingData.addSample(currLabel, trainingSample);

    }
    for(size_t i = 0; i < gestures.size(); i++) {
        MatrixDouble trainingSample;
        VectorDouble currVec(trainingData.getNumDimensions());
        for(UINT j = 0; j < currVec.size(); j++) {
            currVec[j] = random.getRandomNumberUniform(-1.0, 1.0);
        }
        for(size_t k = 0; k < 100; k++) {
            trainingSample.push_back(currVec);
        }
        trainingData.addSample(0, trainingSample);
    }

    //After recording your training data you can then save it to a file
    if( !trainingData.save( "processed\\TrainingData.grt" ) ){
        cout << "ERROR: Failed to save dataset to file!\n";
        return EXIT_FAILURE;
    }

    //This can then be loaded later
    if( !trainingData.load( "processed\\TrainingData.grt" ) ){
        cout << "ERROR: Failed to load dataset from file!\n";
        return EXIT_FAILURE;
    }

    //This is how you can get some stats from the training data
    string datasetName = trainingData.getDatasetName();
    string infoText = trainingData.getInfoText();
    UINT numSamples = trainingData.getNumSamples();
    UINT numDimensions = trainingData.getNumDimensions();
    UINT numClasses = trainingData.getNumClasses();

    cout << "Dataset Name: " << datasetName << endl;
    cout << "InfoText: " << infoText << endl;
    cout << "NumberOfSamples: " << numSamples << endl;
    cout << "NumberOfDimensions: " << numDimensions << endl;
    cout << "NumberOfClasses: " << numClasses << endl;

    //You can also get the minimum and maximum ranges of the data
    vector< MinMax > ranges = trainingData.getRanges();

    cout << "The ranges of the dataset are: \n";
    for(UINT j=0; j<ranges.size(); j++){
        cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl;
    }

    DTW dtw;

    if( !dtw.train( trainingData ) ){
        cerr << "Failed to train classifier!\n";
        exit(EXIT_FAILURE);
    }
    dtw.enableNullRejection(true);
    dtw.setNullRejectionCoeff(4);
    dtw.enableTrimTrainingData(true, 0.1, 90);
    //Save the DTW model to a file
    if( !dtw.saveModelToFile("processed\\DTWModel.txt") ){
        cerr << "Failed to save the classifier model!\n";
        exit(EXIT_FAILURE);
    }

    trainingData.clear();

    return EXIT_SUCCESS;
}
int main (int argc, const char * argv[])
{
    //Create an empty matrix double
    MatrixDouble matrix;
    
    //Resize the matrix
    matrix.resize( 100, 2 );
    
    //Set all the values in the matrix to zero
    matrix.setAllValues( 0 );
    
    //Loop over the data and set the values to random values
    UINT counter = 0;
    for(UINT i=0; i<matrix.getNumRows(); i++){
        for(UINT j=0; j<matrix.getNumCols(); j++){
            matrix[i][j] = counter++;
        }
    }
    
    //Add a new row at the very end of the matrix
    VectorDouble newRow(2);
    newRow[0] = 1000;
    newRow[1] = 2000;
    matrix.push_back( newRow );
    
    //Print the values
    cout << "Matrix Data: \n";
    for(UINT i=0; i<matrix.getNumRows(); i++){
        for(UINT j=0; j<matrix.getNumCols(); j++){
            cout << matrix[i][j] << "\t";
        }
        cout << endl;
    }
    cout << endl;
    
    //Get the second row as a vector
    VectorDouble rowVector = matrix.getRowVector( 1 );
    
    cout << "Row Vector Data: \n";
    for(UINT i=0; i<rowVector.size(); i++){
        cout << rowVector[i] << "\t";
    }
    cout << endl;
    
    //Get the second column as a vector
    VectorDouble colVector = matrix.getColVector( 1 );
    
    cout << "Column Vector Data: \n";
    for(UINT i=0; i<colVector.size(); i++){
        cout << colVector[i] << "\n";
    }
    cout << endl;
    
    //Get the mean of each column
	VectorDouble mean = matrix.getMean();
	
	cout << "Mean: \n";
    for(UINT i=0; i<mean.size(); i++){
        cout << mean[i] << "\n";
    }
    cout << endl;
	
	//Get the Standard Deviation of each column
	VectorDouble stdDev = matrix.getStdDev();
	
	cout << "StdDev: \n";
    for(UINT i=0; i<stdDev.size(); i++){
        cout << stdDev[i] << "\n";
    }
    cout << endl;
	
	//Get the covariance matrix
	MatrixDouble cov = matrix.getCovarianceMatrix();
	
	cout << "Covariance Matrix: \n";
    for(UINT i=0; i<cov.getNumRows(); i++){
        for(UINT j=0; j<cov.getNumCols(); j++){
            cout << cov[i][j] << "\t";
        }
        cout << endl;
    }
    cout << endl;

	vector< MinMax > ranges = matrix.getRanges();
	
	cout << "Ranges: \n";
    for(UINT i=0; i<ranges.size(); i++){
        cout << "i: " << i << "\tMinValue: " << ranges[i].minValue << "\tMaxValue:" << ranges[i].maxValue << "\n";
    }
    cout << endl;
    
    //Save the matrix data to a csv file
    matrix.save( "data.csv" );
    
    //load the matrix data from a csv file
    matrix.load( "data.csv" );
    
    return EXIT_SUCCESS;
}