/*
 * Create file with the position of each line 
 */
void DataSet::create_position_file(const string& file) {
    cout << endl;
    cout << "Trying to create file with all line positions ..." << endl;

    int pos = file.find(".");
    string file_tmp = file.substr(0, pos);
    string x_filename = file_tmp + ".pos_data";
    string y_filename = file_tmp + ".pos_labels";

    ofstream x_num_file(x_filename.c_str(), ios::binary);
    ofstream y_num_file(y_filename.c_str(), ios::binary);
    
    /* Try to open files */
    ifstream xfp(x_filename_.c_str(), ios::binary);
    if (!xfp) {
        cout << "Could not open input file " << x_filename_ << endl;
        exit(EXIT_FAILURE);
    }
    ifstream yfp(y_filename_.c_str(), ios::binary);
    if (!yfp) {
        cout << "Could not open input file " << y_filename_ << endl;
        exit(EXIT_FAILURE);
    }

    /* Reading the header (first line of file)*/
    int tmp;
    xfp >> num_samples_;
    xfp >> feature_dim_;
    yfp >> tmp;
    if (tmp != num_samples_) {
        cout << "Number of samples in data and labels file is different" << endl;
        exit(EXIT_FAILURE);
    }
    yfp >> tmp;

    x_num_file << xfp.tellg();
    x_num_file << "\n";
    y_num_file << yfp.tellg();
    y_num_file << "\n";
    /* Going through complete files */
    for (int n_samp = 0; n_samp < num_samples_; n_samp++) {
        Sample sample;
        sample.x = arma::fvec(feature_dim_);
        yfp >> sample.y;
        y_num_file << yfp.tellg();
        y_num_file << "\n";
        for (int n_feat = 0; n_feat < feature_dim_; n_feat++) {
            xfp >> sample.x(n_feat);
        }
        x_num_file << xfp.tellg();
        x_num_file << "\n";
    }
    xfp.close();
    yfp.close();
    
    x_num_file.close();
    y_num_file.close();

}
/*
 * Load complete dataset into memory
 */
void DataSet::load_complete_dataset(const string& x_filename,
        const string& y_filename) {
    /* Try to open files */
    ifstream xfp(x_filename.c_str(), ios::binary);
    if (!xfp) {
        cout << "Could not open input file " << x_filename << endl;
        exit(EXIT_FAILURE);
    }
    ifstream yfp(y_filename.c_str(), ios::binary);
    if (!yfp) {
        cout << "Could not open input file " << y_filename << endl;
        exit(EXIT_FAILURE);
    }
    cout << endl;
    cout << "Loading data file: " << x_filename << " ... " << endl;
    cout << "Loading data file: " << y_filename << " ... " << endl;

    /* Reading the header (first line of file)*/
    int tmp;
    long int tmp_samples;
    xfp >> tmp_samples;
    num_samples_ = tmp_samples;
    xfp >> feature_dim_;
    yfp >> tmp;
    if (tmp != tmp_samples) {
        cout << "Number of samples in data and labels file is different" << endl;
        exit(EXIT_FAILURE);
    }
    yfp >> tmp;
    /* Delete list with data points */
    if (!add_points_)
        samples_.clear();
    set<int> labels;
    /* Going through complete files */
    for (int n_samp = 0; n_samp < num_samples_; n_samp++) {
        Sample sample;
        sample.x = arma::fvec(feature_dim_);
        yfp >> sample.y;
        labels.insert(sample.y);
        for (int n_feat = 0; n_feat < feature_dim_; n_feat++) {
            xfp >> sample.x(n_feat);
        }
        samples_.push_back(sample);
    }
    xfp.close();
    yfp.close();
    num_classes_ = labels.size();

    if (random_) {
        srand(init_seed());
        random_shuffle(samples_.begin(), samples_.end());
    }
}
void DataSet::load(const string& x_filename, const string& y_filename) {
    ifstream xfp(x_filename.c_str(), ios::binary);
    if (!xfp) {
        cout << "Could not open input file " << x_filename << endl;
        exit(EXIT_FAILURE);
    }
    ifstream yfp(y_filename.c_str(), ios::binary);
    if (!yfp) {
        cout << "Could not open input file " << y_filename << endl;
        exit(EXIT_FAILURE);
    }
    cout << "Loading data file: " << x_filename << " ... " << endl;

    // Reading the header
    int tmp;
    xfp >> m_numSamples;
    xfp >> m_numFeatures;
    yfp >> tmp;
    if (tmp != m_numSamples) {
        cout << "Number of samples in data and labels file is different" << endl;
        exit(EXIT_FAILURE);
    }
    yfp >> tmp;

    m_samples.clear();
    set<int> labels;
    for (int nSamp = 0; nSamp < m_numSamples; nSamp++) {
        Sample sample;
        sample.x = VectorXd(m_numFeatures);
        sample.id = nSamp;
        sample.w = 1.0;
        yfp >> sample.y;
        labels.insert(sample.y);
        for (int nFeat = 0; nFeat < m_numFeatures; nFeat++) {
            xfp >> sample.x(nFeat);
        }
        m_samples.push_back(sample); // push sample into dataset
    }
    xfp.close();
    yfp.close();
    m_numClasses = labels.size();

    // Find the data range
    findFeatRange();

    cout << "Loaded " << m_numSamples << " samples with " << m_numFeatures;
    cout << " features and " << m_numClasses << " classes." << endl;
}