bool LabelledClassificationData::loadDatasetFromCSVFile(string filename,UINT classLabelColumnIndex){ numDimensions = 0; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(string filename) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have a consistent number of columns!" << endl; return false; } if( parser.getColumnSize() <= 1 ){ errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have enough columns! It should contain at least two columns!" << endl; return false; } //Set the number of dimensions numDimensions = parser.getColumnSize()-1; UINT classLabel = 0; UINT j = 0; UINT n = 0; VectorDouble sample(numDimensions); for(UINT i=0; i<parser.getRowSize(); i++){ //Get the class label classLabel = Util::stringToInt( parser[i][classLabelColumnIndex] ); //Get the sample data j=0; n=0; while( j != numDimensions ){ if( n != classLabelColumnIndex ){ sample[j++] = Util::stringToDouble( parser[i][n] ); } n++; } //Add the labelled sample to the dataset if( !addSample(classLabel, sample) ){ warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl; } } sortClassLabels(); return true; }
bool MatrixDouble::loadFromCSVFile(const string &filename){ //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl; return false; } const UINT rows = parser.getRowSize(); const UINT cols = parser.getColumnSize(); //Resize the data resize(rows, cols); for(UINT i=0; i<rows; i++){ //Get the input vector for(UINT j=0; j<cols; j++){ dataPtr[i][j] = Util::stringToDouble( parser[i][j] ); } } return true; }
bool LabelledRegressionData::loadDatasetFromCSVFile(const string &filename,const UINT numInputDimensions,const UINT numTargetDimensions){ fstream file; string value; clear(); datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(...) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(...) - The CSV file does not have a consistent number of columns!" << endl; return false; } if( parser.getColumnSize() != numInputDimensions+numTargetDimensions ){ errorLog << "loadDatasetFromCSVFile(...) - The number of columns in the CSV file (" << parser.getColumnSize() << ")"; errorLog << " does not match the number of input dimensions plus the number of target dimensions (" << numInputDimensions+numTargetDimensions << ")" << endl; return false; } //Setup the labelled classification data setInputAndTargetDimensions(numInputDimensions, numTargetDimensions); UINT n = 0; VectorDouble inputVector(numInputDimensions); VectorDouble targetVector(numTargetDimensions); for(UINT i=0; i<parser.getRowSize(); i++){ //Reset n n = 0; //Get the input vector for(UINT j=0; j<numInputDimensions; j++){ inputVector[j] = Util::stringToDouble( parser[i][n++] ); } //Get the target vector for(UINT j=0; j<numTargetDimensions; j++){ targetVector[j] = Util::stringToDouble( parser[i][n++] ); } //Add the labelled sample to the dataset if( !addSample(inputVector, targetVector) ){ warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl; } } return true; }
bool UnlabelledClassificationData::loadDatasetFromCSVFile(string filename) { string value; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ) { errorLog << "loadDatasetFromCSVFile(string filename) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ) { errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have a consistent number of columns!" << endl; return false; } //Setup the labelled classification data numDimensions = parser.getColumnSize(); VectorDouble sample(numDimensions); for(UINT i=0; i<parser.getRowSize(); i++) { //Get the input vector for(UINT j=0; j<numDimensions; j++) { sample[j] = Util::stringToDouble( parser[i][j] ); } //Add the labelled sample to the dataset if( !addSample(sample) ) { warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl; } } return true; }
bool ClassificationData::loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex){ numDimensions = 0; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << endl; return false; } if( parser.getColumnSize() <= 1 ){ errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << endl; return false; } //Set the number of dimensions numDimensions = parser.getColumnSize()-1; //Reserve the memory for the data data.resize( parser.getRowSize(), ClassificationSample(numDimensions) ); //Loop over the samples and add them to the data set UINT classLabel = 0; UINT j = 0; UINT n = 0; totalNumSamples = parser.getRowSize(); for(UINT i=0; i<totalNumSamples; i++){ //Get the class label classLabel = Util::stringToInt( parser[i][classLabelColumnIndex] ); //Set the class label data[i].setClassLabel(classLabel); //Get the sample data j=0; n=0; while( j != numDimensions ){ if( n != classLabelColumnIndex ){ data[i][j++] = Util::stringToDouble( parser[i][n] ); } n++; } //Update the class tracker if( classTracker.size() == 0 ){ ClassTracker tracker(classLabel,1); classTracker.push_back(tracker); }else{ bool labelFound = false; const size_t numClasses = classTracker.size(); for(size_t i=0; i<numClasses; i++){ if( classLabel == classTracker[i].classLabel ){ classTracker[i].counter++; labelFound = true; break; } } if( !labelFound ){ ClassTracker tracker(classLabel,1); classTracker.push_back(tracker); } } } //Sort the class labels sortClassLabels(); return true; }
bool TimeSeriesClassificationData::loadDatasetFromCSVFile(const std::string &filename){ numDimensions = 0; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename) - Failed to parse CSV file!" << std::endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have a consistent number of columns!" << std::endl; return false; } if( parser.getColumnSize() <= 2 ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << std::endl; return false; } //Set the number of dimensions numDimensions = parser.getColumnSize()-2; //Reserve the memory for the data data.reserve( parser.getRowSize() ); UINT sampleCounter = 0; UINT lastSampleCounter = 0; UINT classLabel = 0; UINT j = 0; UINT n = 0; VectorFloat sample(numDimensions); MatrixFloat timeseries; for(UINT i=0; i<parser.getRowSize(); i++){ sampleCounter = grt_from_str< UINT >( parser[i][0] ); //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series if( sampleCounter != lastSampleCounter && i != 0 ){ //Add the labelled sample to the dataset if( !addSample(classLabel, timeseries) ){ warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << std::endl; } timeseries.clear(); } lastSampleCounter = sampleCounter; //Get the class label classLabel = grt_from_str< UINT >( parser[i][1] ); //Get the sample data j=0; n=2; while( j != numDimensions ){ sample[j++] = grt_from_str< Float >( parser[i][n] ); n++; } //Add the sample to the timeseries timeseries.push_back( sample ); } if ( timeseries.getSize() > 0 ) //Add the labelled sample to the dataset if( !addSample(classLabel, timeseries) ){ warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 << " to the dataset!" << std::endl; } return true; }