void split(std::vector<DataSetView>& groups, DataSet& dataset, int numberOfGroups, bool shuffling) { OPENANN_CHECK(numberOfGroups > 1); std::vector<int> indices; indices.reserve(dataset.samples()); groups.reserve(numberOfGroups); for(int i = 0; i < dataset.samples(); ++i) indices.push_back(i); int samplesPerGroup = std::floor(dataset.samples() / numberOfGroups + 0.5); if(shuffling) std::random_shuffle(indices.begin(), indices.end()); for(int i = 0; i < numberOfGroups; ++i) { std::vector<int>::iterator it = indices.begin() + i * samplesPerGroup; if(i < numberOfGroups - 1) groups.push_back(DataSetView(dataset, it, it + samplesPerGroup)); else groups.push_back(DataSetView(dataset, it, indices.end())); } }
void split(std::vector<DataSetView>& groups, DataSet& dataset, double ratio, bool shuffling) { OPENANN_CHECK_WITHIN(ratio, 0.0, 1.0); std::vector<int> indices; indices.reserve(dataset.samples()); groups.reserve(2); for(int i = 0; i < dataset.samples(); ++i) indices.push_back(i); int samples = std::ceil(ratio * dataset.samples()); if(shuffling) std::random_shuffle(indices.begin(), indices.end()); groups.push_back(DataSetView(dataset, indices.begin(), indices.begin() + samples)); groups.push_back(DataSetView(dataset, indices.begin() + samples, indices.end())); }
DataSetView sample(DataSet& dataSet, double fraction, bool replacement) { std::vector<int> indices; int samples = std::ceil(dataSet.samples() * fraction); indices.reserve(samples); RandomNumberGenerator rng; if(replacement) for(int n = 0; n < samples; n++) indices[n] = rng.generateIndex(dataSet.samples()); else rng.generateIndices(dataSet.samples(), indices, false); return DataSetView(dataSet, indices.begin(), indices.begin() + samples); }