예제 #1
0
bool MatrixDouble::loadFromCSVFile(const string &filename){
    
    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadFromCSVFile(const string &filename) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadFromCSVFile(const string &filename) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    const UINT rows = parser.getRowSize();
    const UINT cols = parser.getColumnSize();
    
    //Resize the data
    resize(rows, cols);
    
    for(UINT i=0; i<rows; i++){
        
        //Get the input vector
        for(UINT j=0; j<cols; j++){
            dataPtr[i][j] = Util::stringToDouble( parser[i][j] );
        }
    }
    
    return true;
}
bool LabelledRegressionData::loadDatasetFromCSVFile(const string &filename,const UINT numInputDimensions,const UINT numTargetDimensions){

    fstream file;
    string value;
    clear();
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();
    
    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(...) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(...) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    if( parser.getColumnSize() != numInputDimensions+numTargetDimensions ){
        errorLog << "loadDatasetFromCSVFile(...) - The number of columns in the CSV file (" << parser.getColumnSize() << ")";
        errorLog << " does not match the number of input dimensions plus the number of target dimensions (" << numInputDimensions+numTargetDimensions << ")" << endl;
        return false;
    }
    
    //Setup the labelled classification data
    setInputAndTargetDimensions(numInputDimensions, numTargetDimensions);
    
    UINT n = 0;
    VectorDouble inputVector(numInputDimensions);
    VectorDouble targetVector(numTargetDimensions);
    for(UINT i=0; i<parser.getRowSize(); i++){
        
        //Reset n
        n = 0;
        
        //Get the input vector
        for(UINT j=0; j<numInputDimensions; j++){
            inputVector[j] = Util::stringToDouble( parser[i][n++] );
        }
        
        //Get the target vector
        for(UINT j=0; j<numTargetDimensions; j++){
            targetVector[j] = Util::stringToDouble( parser[i][n++] );
        }
        
        //Add the labelled sample to the dataset
        if( !addSample(inputVector, targetVector) ){
            warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl;
        }
    }
    
    return true;
}
bool LabelledClassificationData::loadDatasetFromCSVFile(string filename,UINT classLabelColumnIndex){

    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(string filename) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 1 ){
        errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have enough columns! It should contain at least two columns!" << endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-1;
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    VectorDouble sample(numDimensions);
    for(UINT i=0; i<parser.getRowSize(); i++){
        //Get the class label
        classLabel = Util::stringToInt( parser[i][classLabelColumnIndex] );
        
        //Get the sample data
        j=0;
        n=0;
        while( j != numDimensions ){
            if( n != classLabelColumnIndex ){
                sample[j++] = Util::stringToDouble( parser[i][n] );
            }
            n++;
        }
        
        //Add the labelled sample to the dataset
        if( !addSample(classLabel, sample) ){
            warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl;
        }
    }

	sortClassLabels();
    
    return true;
}
bool UnlabelledClassificationData::loadDatasetFromCSVFile(string filename) {

    string value;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;

    if( !parser.parseCSVFile(filename,true) ) {
        errorLog << "loadDatasetFromCSVFile(string filename) - Failed to parse CSV file!" << endl;
        return false;
    }

    if( !parser.getConsistentColumnSize() ) {
        errorLog << "loadDatasetFromCSVFile(string filename) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }

    //Setup the labelled classification data
    numDimensions = parser.getColumnSize();

    VectorDouble sample(numDimensions);
    for(UINT i=0; i<parser.getRowSize(); i++) {

        //Get the input vector
        for(UINT j=0; j<numDimensions; j++) {
            sample[j] = Util::stringToDouble( parser[i][j] );
        }

        //Add the labelled sample to the dataset
        if( !addSample(sample) ) {
            warningLog << "loadDatasetFromCSVFile(string filename) - Could not add sample " << i << " to the dataset!" << endl;
        }
    }

    return true;
}
예제 #5
0
bool ClassificationData::loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex){

    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";

    //Clear any previous data
    clear();

    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - Failed to parse CSV file!" << endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndexe) - The CSV file does not have a consistent number of columns!" << endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 1 ){
        errorLog << "loadDatasetFromCSVFile(const string &filename,const UINT classLabelColumnIndex) - The CSV file does not have enough columns! It should contain at least two columns!" << endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-1;

    //Reserve the memory for the data
    data.resize( parser.getRowSize(), ClassificationSample(numDimensions) );
   
    //Loop over the samples and add them to the data set
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    totalNumSamples = parser.getRowSize();
    for(UINT i=0; i<totalNumSamples; i++){
        //Get the class label
        classLabel = Util::stringToInt( parser[i][classLabelColumnIndex] );
        
        //Set the class label
        data[i].setClassLabel(classLabel);
        
        //Get the sample data
        j=0;
        n=0;
        while( j != numDimensions ){
            if( n != classLabelColumnIndex ){
                data[i][j++] = Util::stringToDouble( parser[i][n] );
            }
            n++;
        }
        
        //Update the class tracker
        if( classTracker.size() == 0 ){
            ClassTracker tracker(classLabel,1);
            classTracker.push_back(tracker);
        }else{
            bool labelFound = false;
            const size_t numClasses = classTracker.size();
            for(size_t i=0; i<numClasses; i++){
                if( classLabel == classTracker[i].classLabel ){
                    classTracker[i].counter++;
                    labelFound = true;
                    break;
                }
            }
            if( !labelFound ){
                ClassTracker tracker(classLabel,1);
                classTracker.push_back(tracker);
            }
        }
    }

    //Sort the class labels
    sortClassLabels();
    
    return true;
}
bool TimeSeriesClassificationData::loadDatasetFromCSVFile(const std::string &filename){
    
    numDimensions = 0;
    datasetName = "NOT_SET";
    infoText = "";
    
    //Clear any previous data
    clear();
    
    //Parse the CSV file
    FileParser parser;
    
    if( !parser.parseCSVFile(filename,true) ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename) - Failed to parse CSV file!" << std::endl;
        return false;
    }
    
    if( !parser.getConsistentColumnSize() ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have a consistent number of columns!" << std::endl;
        return false;
    }
    
    if( parser.getColumnSize() <= 2 ){
        errorLog << "loadDatasetFromCSVFile(const std::string &filename) - The CSV file does not have enough columns! It should contain at least three columns!" << std::endl;
        return false;
    }
    
    //Set the number of dimensions
    numDimensions = parser.getColumnSize()-2;
    
    //Reserve the memory for the data
    data.reserve( parser.getRowSize() );
    
    UINT sampleCounter = 0;
    UINT lastSampleCounter = 0;
    UINT classLabel = 0;
    UINT j = 0;
    UINT n = 0;
    VectorFloat sample(numDimensions);
    MatrixFloat timeseries;
    for(UINT i=0; i<parser.getRowSize(); i++){
        
        sampleCounter = grt_from_str< UINT >( parser[i][0] );
        
        //Check to see if a new timeseries has started, if so then add the previous time series as a sample and start recording the new time series
        if( sampleCounter != lastSampleCounter && i != 0 ){
            //Add the labelled sample to the dataset
            if( !addSample(classLabel, timeseries) ){
                warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << i << " to the dataset!" << std::endl;
            }
            timeseries.clear();
        }
        lastSampleCounter = sampleCounter;
        
        //Get the class label
        classLabel = grt_from_str< UINT >( parser[i][1] );
        
        //Get the sample data
        j=0;
        n=2;
        while( j != numDimensions ){
            sample[j++] = grt_from_str< Float >( parser[i][n] );
            n++;
        }
        
        //Add the sample to the timeseries
        timeseries.push_back( sample );
    }
	if ( timeseries.getSize() > 0 )
        //Add the labelled sample to the dataset
        if( !addSample(classLabel, timeseries) ){
            warningLog << "loadDatasetFromCSVFile(const std::string &filename,const UINT classLabelColumnIndex) - Could not add sample " << parser.getRowSize()-1 << " to the dataset!" << std::endl;
        }
    
    return true;
}