bool TimeSeriesClassificationData::loadDatasetFromCSVFile(const string &filename){ numDimensions = 0; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl; return false; } if( parser.getColumnSize() <= 2 ){ errorLog << "loadDatasetFromCSVFile(const string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << endl; return false; } //Set the number of dimensions numDimensions = parser.getColumnSize()-2; //Reserve the memory for the data data.reserve( parser.getRowSize() ); UINT sampleCounter = 0; UINT lastSampleCounter = 0; UINT classLabel = 0; UINT j = 0; UINT n = 0; VectorDouble sample(numDimensions); MatrixDouble timeseries; for(UINT i=0; i<parser.getRowSize(); i++){ sampleCounter = Util::stringToInt( parser[i][0] ); //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series if( sampleCounter != lastSampleCounter && i != 0 ){ //Add the labelled sample to the dataset if( !addSample(classLabel, timeseries) ){ warningLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << endl; } timeseries.clear(); } lastSampleCounter = sampleCounter; //Get the class label classLabel = Util::stringToInt( parser[i][1] ); //Get the sample data j=0; n=2; while( j != numDimensions ){ sample[j++] = Util::stringToDouble( parser[i][n] ); n++; } //Add the sample to the timeseries timeseries.push_back( sample ); } if ( timeseries.getSize() > 0 ) //Add the labelled sample to the dataset if( !addSample(classLabel, timeseries) ){ warningLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 << " to the dataset!" << endl; } return true; }
int main (int argc, const char * argv[]) { //Create a new instance of the TimeSeriesClassificationData TimeSeriesClassificationData trainingData; //Set the dimensionality of the data (you need to do this before you can add any samples) trainingData.setNumDimensions( 3 ); //You can also give the dataset a name (the name should have no spaces) trainingData.setDatasetName("DummyData"); //You can also add some info text about the data trainingData.setInfoText("This data contains some dummy timeseries data"); //Here you would record a time series, when you have finished recording the time series then add the training sample to the training data UINT gestureLabel = 1; MatrixDouble trainingSample; //For now we will just add 10 x 20 random walk data timeseries Random random; for(UINT k=0; k<10; k++){//For the number of classes gestureLabel = k+1; //Get the init random walk position for this gesture VectorDouble startPos( trainingData.getNumDimensions() ); for(UINT j=0; j<startPos.size(); j++){ startPos[j] = random.getRandomNumberUniform(-1.0,1.0); } //Generate the 20 time series for(UINT x=0; x<20; x++){ //Clear any previous timeseries trainingSample.clear(); //Generate the random walk UINT randomWalkLength = random.getRandomNumberInt(90, 110); VectorDouble sample = startPos; for(UINT i=0; i<randomWalkLength; i++){ for(UINT j=0; j<startPos.size(); j++){ sample[j] += random.getRandomNumberUniform(-0.1,0.1); } //Add the sample to the training sample trainingSample.push_back( sample ); } //Add the training sample to the dataset trainingData.addSample( gestureLabel, trainingSample ); } } //After recording your training data you can then save it to a file if( !trainingData.saveDatasetToFile( "TrainingData.txt" ) ){ cout << "Failed to save dataset to file!\n"; return EXIT_FAILURE; } //This can then be loaded later if( !trainingData.loadDatasetFromFile( "TrainingData.txt" ) ){ cout << "Failed to load dataset from file!\n"; return EXIT_FAILURE; } //This is how you can get some stats from the training data string datasetName = trainingData.getDatasetName(); string infoText = trainingData.getInfoText(); UINT numSamples = trainingData.getNumSamples(); UINT numDimensions = trainingData.getNumDimensions(); UINT numClasses = trainingData.getNumClasses(); cout << "Dataset Name: " << datasetName << endl; cout << "InfoText: " << infoText << endl; cout << "NumberOfSamples: " << numSamples << endl; cout << "NumberOfDimensions: " << numDimensions << endl; cout << "NumberOfClasses: " << numClasses << endl; //You can also get the minimum and maximum ranges of the data vector< MinMax > ranges = trainingData.getRanges(); cout << "The ranges of the dataset are: \n"; for(UINT j=0; j<ranges.size(); j++){ cout << "Dimension: " << j << " Min: " << ranges[j].minValue << " Max: " << ranges[j].maxValue << endl; } //If you want to partition the dataset into a training dataset and a test dataset then you can use the partition function //A value of 80 means that 80% of the original data will remain in the training dataset and 20% will be returned as the test dataset TimeSeriesClassificationData testData = trainingData.partition( 80 ); //If you have multiple datasets that you want to merge together then use the merge function if( !trainingData.merge( testData ) ){ cout << "Failed to merge datasets!\n"; return EXIT_FAILURE; } //If you want to run K-Fold cross validation using the dataset then you should first spilt the dataset into K-Folds //A value of 10 splits the dataset into 10 folds and the true parameter signals that stratified sampling should be used if( !trainingData.spiltDataIntoKFolds( 10, true ) ){ cout << "Failed to spiltDataIntoKFolds!\n"; return EXIT_FAILURE; } //After you have called the spilt function you can then get the training and test sets for each fold for(UINT foldIndex=0; foldIndex<10; foldIndex++){ TimeSeriesClassificationData foldTrainingData = trainingData.getTrainingFoldData( foldIndex ); TimeSeriesClassificationData foldTestingData = trainingData.getTestFoldData( foldIndex ); } //If need you can clear any training data that you have recorded trainingData.clear(); return EXIT_SUCCESS; }