bool TimeSeriesClassificationDataStream::loadDatasetFromFile(const string &filename){ std::fstream file; file.open(filename.c_str(), std::ios::in); UINT numClasses = 0; UINT numTrackingPoints = 0; clear(); if( !file.is_open() ){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to open file!" << endl; return false; } string word; //Check to make sure this is a file with the Training File Format file >> word; if(word != "GRT_LABELLED_CONTINUOUS_TIME_SERIES_CLASSIFICATION_FILE_V1.0"){ file.close(); errorLog<< "loadDatasetFromFile(string fileName) - Failed to find file header!" << endl; return false; } //Get the name of the dataset file >> word; if(word != "DatasetName:"){ errorLog << "loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl; file.close(); return false; } file >> datasetName; file >> word; if(word != "InfoText:"){ errorLog << "loadDatasetFromFile(string filename) - failed to find InfoText!" << endl; file.close(); return false; } //Load the info text file >> word; infoText = ""; while( word != "NumDimensions:" ){ infoText += word + " "; file >> word; } //Get the number of dimensions in the training data if(word != "NumDimensions:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumDimensions!" << endl; file.close(); return false; } file >> numDimensions; //Get the total number of training examples in the training data file >> word; if(word != "TotalNumSamples:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find TotalNumSamples!" << endl; file.close(); return false; } file >> totalNumSamples; //Get the total number of classes in the training data file >> word; if(word != "NumberOfClasses:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumberOfClasses!" << endl; file.close(); return false; } file >> numClasses; //Resize the class counter buffer and load the counters classTracker.resize(numClasses); //Get the total number of classes in the training data file >> word; if(word != "ClassIDsAndCounters:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find ClassIDsAndCounters!" << endl; file.close(); return false; } for(UINT i=0; i<classTracker.size(); i++){ file >> classTracker[i].classLabel; file >> classTracker[i].counter; } //Get the NumberOfPositionTrackers file >> word; if(word != "NumberOfPositionTrackers:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumberOfPositionTrackers!" << endl; file.close(); return false; } file >> numTrackingPoints; timeSeriesPositionTracker.resize( numTrackingPoints ); //Get the TimeSeriesPositionTrackers file >> word; if(word != "TimeSeriesPositionTrackers:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find TimeSeriesPositionTrackers!" << endl; file.close(); return false; } for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){ UINT classLabel; UINT startIndex; UINT endIndex; file >> classLabel; file >> startIndex; file >> endIndex; timeSeriesPositionTracker[i].setTracker(startIndex,endIndex,classLabel); } //Check if the dataset should be scaled using external ranges file >> word; if(word != "UseExternalRanges:"){ errorLog << "loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl; file.close(); return false; } file >> useExternalRanges; //If we are using external ranges then load them if( useExternalRanges ){ externalRanges.resize(numDimensions); for(UINT i=0; i<externalRanges.size(); i++){ file >> externalRanges[i].minValue; file >> externalRanges[i].maxValue; } } //Get the main time series data file >> word; if(word != "LabelledContinuousTimeSeriesClassificationData:"){ errorLog<< "loadDatasetFromFile(string fileName) - Failed to find LabelledContinuousTimeSeriesClassificationData!" << endl; file.close(); return false; } //Reset the memory data.resize( totalNumSamples, ClassificationSample() ); //Load each sample for(UINT i=0; i<totalNumSamples; i++){ UINT classLabel = 0; vector<double> sample(numDimensions); file >> classLabel; for(UINT j=0; j<numDimensions; j++){ file >> sample[j]; } data[i].set(classLabel,sample); } file.close(); return true; }
bool ClassificationData::loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex){ numDimensions = 0; datasetName = "NOT_SET"; infoText = ""; //Clear any previous data clear(); //Parse the CSV file FileParser parser; Timer timer; timer.start(); if( !parser.parseCSVFile(filename,true) ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl; return false; } if( !parser.getConsistentColumnSize() ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl; return false; } if( parser.getColumnSize() <= 1 ){ errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl; return false; } //Set the number of dimensions numDimensions = parser.getColumnSize()-1; timer.start(); //Reserve the memory for the data data.resize( parser.getRowSize(), ClassificationSample(numDimensions) ); timer.start(); //Loop over the samples and add them to the data set UINT classLabel = 0; UINT j = 0; UINT n = 0; totalNumSamples = parser.getRowSize(); for(UINT i=0; i<totalNumSamples; i++){ //Get the class label classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] ); //Set the class label data[i].setClassLabel( classLabel ); //Get the sample data j=0; n=0; while( j != numDimensions ){ if( n != classLabelColumnIndex ){ data[i][j++] = grt_from_str< Float >( parser[i][n] ); } n++; } //Update the class tracker if( classTracker.size() == 0 ){ ClassTracker tracker(classLabel,1); classTracker.push_back(tracker); }else{ bool labelFound = false; const size_t numClasses = classTracker.size(); for(size_t i=0; i<numClasses; i++){ if( classLabel == classTracker[i].classLabel ){ classTracker[i].counter++; labelFound = true; break; } } if( !labelFound ){ ClassTracker tracker(classLabel,1); classTracker.push_back(tracker); } } } //Sort the class labels sortClassLabels(); return true; }
ClassificationSample TimeSeriesClassificationDataStream::getNextSample(){ if( totalNumSamples == 0 ) return ClassificationSample(); UINT index = playbackIndex++ % totalNumSamples; return data[ index ]; }