bool TimeSeriesClassificationDataStream::loadDatasetFromFile(const string &filename){

	std::fstream file; 
	file.open(filename.c_str(), std::ios::in);
	UINT numClasses = 0;
	UINT numTrackingPoints = 0;
	clear();

	if( !file.is_open() ){
		errorLog<< "loadDatasetFromFile(string fileName) - Failed to open file!" << endl;
		return false;
	}

	string word;

	//Check to make sure this is a file with the Training File Format
	file >> word;
	if(word != "GRT_LABELLED_CONTINUOUS_TIME_SERIES_CLASSIFICATION_FILE_V1.0"){
		file.close();
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find file header!" << endl;
		return false;
	}
    
    //Get the name of the dataset
	file >> word;
	if(word != "DatasetName:"){
        errorLog << "loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl;
		file.close();
		return false;
	}
	file >> datasetName;
    
    file >> word;
	if(word != "InfoText:"){
        errorLog << "loadDatasetFromFile(string filename) - failed to find InfoText!" << endl;
		file.close();
		return false;
	}
    
    //Load the info text
    file >> word;
    infoText = "";
    while( word != "NumDimensions:" ){
        infoText += word + " ";
        file >> word;
    }

	//Get the number of dimensions in the training data
	if(word != "NumDimensions:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumDimensions!" << endl;
		file.close();
		return false;
	}
	file >> numDimensions;

	//Get the total number of training examples in the training data
	file >> word;
	if(word != "TotalNumSamples:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find TotalNumSamples!" << endl;
		file.close();
		return false;
	}
	file >> totalNumSamples;

	//Get the total number of classes in the training data
	file >> word;
	if(word != "NumberOfClasses:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumberOfClasses!" << endl;
		file.close();
		return false;
	}
	file >> numClasses;

	//Resize the class counter buffer and load the counters
	classTracker.resize(numClasses);

	//Get the total number of classes in the training data
	file >> word;
	if(word != "ClassIDsAndCounters:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find ClassIDsAndCounters!" << endl;
		file.close();
		return false;
	}

	for(UINT i=0; i<classTracker.size(); i++){
		file >> classTracker[i].classLabel;
		file >> classTracker[i].counter;
	}

	//Get the NumberOfPositionTrackers
	file >> word;
	if(word != "NumberOfPositionTrackers:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find NumberOfPositionTrackers!" << endl;
		file.close();
		return false;
	}
	file >> numTrackingPoints;
	timeSeriesPositionTracker.resize( numTrackingPoints );

	//Get the TimeSeriesPositionTrackers
	file >> word;
	if(word != "TimeSeriesPositionTrackers:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find TimeSeriesPositionTrackers!" << endl;
		file.close();
		return false;
	}

	for(UINT i=0; i<timeSeriesPositionTracker.size(); i++){
		UINT classLabel;
		UINT startIndex;
		UINT endIndex;
		file >> classLabel;
		file >> startIndex;
		file >> endIndex;
		timeSeriesPositionTracker[i].setTracker(startIndex,endIndex,classLabel);
	}
    
    //Check if the dataset should be scaled using external ranges
	file >> word;
	if(word != "UseExternalRanges:"){
        errorLog << "loadDatasetFromFile(string filename) - failed to find DatasetName!" << endl;
		file.close();
		return false;
	}
    file >> useExternalRanges;
    
    //If we are using external ranges then load them
    if( useExternalRanges ){
        externalRanges.resize(numDimensions);
        for(UINT i=0; i<externalRanges.size(); i++){
            file >> externalRanges[i].minValue;
            file >> externalRanges[i].maxValue;
        }
    }
	
	//Get the main time series data
	file >> word;
	if(word != "LabelledContinuousTimeSeriesClassificationData:"){
        errorLog<< "loadDatasetFromFile(string fileName) - Failed to find LabelledContinuousTimeSeriesClassificationData!" << endl;
		file.close();
		return false;
	}

	//Reset the memory
	data.resize( totalNumSamples, ClassificationSample() );

	//Load each sample
	for(UINT i=0; i<totalNumSamples; i++){
		UINT classLabel = 0;
		vector<double> sample(numDimensions);

		file >> classLabel;
		for(UINT j=0; j<numDimensions; j++){
			file >> sample[j];
		}

		data[i].set(classLabel,sample);
	}

	file.close();
	return true;
}
Пример #2
0
bool ClassificationData::loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex){

    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;

    Timer timer;

    timer.start();
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << std::endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << std::endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 1 ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << std::endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-1;

    timer.start();

    //Reserve the memory for the data
    data.resize( parser.getRowSize(), ClassificationSample(numDimensions) );

    timer.start();
   
    //Loop over the samples and add them to the data set
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    totalNumSamples = parser.getRowSize();
    for(UINT i=0; i<totalNumSamples; i++){
        //Get the class label
        classLabel = grt_from_str< UINT >( parser[i][classLabelColumnIndex] );
        
        //Set the class label
        data[i].setClassLabel( classLabel );
        
        //Get the sample data
        j=0;
        n=0;
        while( j != numDimensions ){
            if( n != classLabelColumnIndex ){
                data[i][j++] = grt_from_str< Float >( parser[i][n] );
            }
            n++;
        }
        
        //Update the class tracker
        if( classTracker.size() == 0 ){
            ClassTracker tracker(classLabel,1);
            classTracker.push_back(tracker);
        }else{
            bool labelFound = false;
            const size_t numClasses = classTracker.size();
            for(size_t i=0; i<numClasses; i++){
                if( classLabel == classTracker[i].classLabel ){
                    classTracker[i].counter++;
                    labelFound = true;
                    break;
                }
            }
            if( !labelFound ){
                ClassTracker tracker(classLabel,1);
                classTracker.push_back(tracker);
            }
        }
    }

    //Sort the class labels
    sortClassLabels();
    
    return true;
}
ClassificationSample TimeSeriesClassificationDataStream::getNextSample(){
    if( totalNumSamples == 0 ) return ClassificationSample();
    
    UINT index = playbackIndex++ % totalNumSamples;
    return data[ index ];
}