/* * Create file with the position of each line */ void DataSet::create_position_file(const string& file) { cout << endl; cout << "Trying to create file with all line positions ..." << endl; int pos = file.find("."); string file_tmp = file.substr(0, pos); string x_filename = file_tmp + ".pos_data"; string y_filename = file_tmp + ".pos_labels"; ofstream x_num_file(x_filename.c_str(), ios::binary); ofstream y_num_file(y_filename.c_str(), ios::binary); /* Try to open files */ ifstream xfp(x_filename_.c_str(), ios::binary); if (!xfp) { cout << "Could not open input file " << x_filename_ << endl; exit(EXIT_FAILURE); } ifstream yfp(y_filename_.c_str(), ios::binary); if (!yfp) { cout << "Could not open input file " << y_filename_ << endl; exit(EXIT_FAILURE); } /* Reading the header (first line of file)*/ int tmp; xfp >> num_samples_; xfp >> feature_dim_; yfp >> tmp; if (tmp != num_samples_) { cout << "Number of samples in data and labels file is different" << endl; exit(EXIT_FAILURE); } yfp >> tmp; x_num_file << xfp.tellg(); x_num_file << "\n"; y_num_file << yfp.tellg(); y_num_file << "\n"; /* Going through complete files */ for (int n_samp = 0; n_samp < num_samples_; n_samp++) { Sample sample; sample.x = arma::fvec(feature_dim_); yfp >> sample.y; y_num_file << yfp.tellg(); y_num_file << "\n"; for (int n_feat = 0; n_feat < feature_dim_; n_feat++) { xfp >> sample.x(n_feat); } x_num_file << xfp.tellg(); x_num_file << "\n"; } xfp.close(); yfp.close(); x_num_file.close(); y_num_file.close(); }
/* * Load complete dataset into memory */ void DataSet::load_complete_dataset(const string& x_filename, const string& y_filename) { /* Try to open files */ ifstream xfp(x_filename.c_str(), ios::binary); if (!xfp) { cout << "Could not open input file " << x_filename << endl; exit(EXIT_FAILURE); } ifstream yfp(y_filename.c_str(), ios::binary); if (!yfp) { cout << "Could not open input file " << y_filename << endl; exit(EXIT_FAILURE); } cout << endl; cout << "Loading data file: " << x_filename << " ... " << endl; cout << "Loading data file: " << y_filename << " ... " << endl; /* Reading the header (first line of file)*/ int tmp; long int tmp_samples; xfp >> tmp_samples; num_samples_ = tmp_samples; xfp >> feature_dim_; yfp >> tmp; if (tmp != tmp_samples) { cout << "Number of samples in data and labels file is different" << endl; exit(EXIT_FAILURE); } yfp >> tmp; /* Delete list with data points */ if (!add_points_) samples_.clear(); set<int> labels; /* Going through complete files */ for (int n_samp = 0; n_samp < num_samples_; n_samp++) { Sample sample; sample.x = arma::fvec(feature_dim_); yfp >> sample.y; labels.insert(sample.y); for (int n_feat = 0; n_feat < feature_dim_; n_feat++) { xfp >> sample.x(n_feat); } samples_.push_back(sample); } xfp.close(); yfp.close(); num_classes_ = labels.size(); if (random_) { srand(init_seed()); random_shuffle(samples_.begin(), samples_.end()); } }
void DataSet::load(const string& x_filename, const string& y_filename) { ifstream xfp(x_filename.c_str(), ios::binary); if (!xfp) { cout << "Could not open input file " << x_filename << endl; exit(EXIT_FAILURE); } ifstream yfp(y_filename.c_str(), ios::binary); if (!yfp) { cout << "Could not open input file " << y_filename << endl; exit(EXIT_FAILURE); } cout << "Loading data file: " << x_filename << " ... " << endl; // Reading the header int tmp; xfp >> m_numSamples; xfp >> m_numFeatures; yfp >> tmp; if (tmp != m_numSamples) { cout << "Number of samples in data and labels file is different" << endl; exit(EXIT_FAILURE); } yfp >> tmp; m_samples.clear(); set<int> labels; for (int nSamp = 0; nSamp < m_numSamples; nSamp++) { Sample sample; sample.x = VectorXd(m_numFeatures); sample.id = nSamp; sample.w = 1.0; yfp >> sample.y; labels.insert(sample.y); for (int nFeat = 0; nFeat < m_numFeatures; nFeat++) { xfp >> sample.x(nFeat); } m_samples.push_back(sample); // push sample into dataset } xfp.close(); yfp.close(); m_numClasses = labels.size(); // Find the data range findFeatRange(); cout << "Loaded " << m_numSamples << " samples with " << m_numFeatures; cout << " features and " << m_numClasses << " classes." << endl; }