示例#1
0
void split(std::vector<DataSetView>& groups, DataSet& dataset,
           int numberOfGroups, bool shuffling)
{
  OPENANN_CHECK(numberOfGroups > 1);
  std::vector<int> indices;

  indices.reserve(dataset.samples());
  groups.reserve(numberOfGroups);

  for(int i = 0; i < dataset.samples(); ++i)
    indices.push_back(i);

  int samplesPerGroup = std::floor(dataset.samples() / numberOfGroups + 0.5);

  if(shuffling)
    std::random_shuffle(indices.begin(), indices.end());

  for(int i = 0; i < numberOfGroups; ++i)
  {
    std::vector<int>::iterator it = indices.begin() + i * samplesPerGroup;

    if(i < numberOfGroups - 1)
      groups.push_back(DataSetView(dataset, it, it + samplesPerGroup));
    else
      groups.push_back(DataSetView(dataset, it, indices.end()));
  }
}
示例#2
0
void split(std::vector<DataSetView>& groups, DataSet& dataset, double ratio,
           bool shuffling)
{
  OPENANN_CHECK_WITHIN(ratio, 0.0, 1.0);
  std::vector<int> indices;

  indices.reserve(dataset.samples());
  groups.reserve(2);

  for(int i = 0; i < dataset.samples(); ++i)
    indices.push_back(i);

  int samples = std::ceil(ratio * dataset.samples());

  if(shuffling)
    std::random_shuffle(indices.begin(), indices.end());

  groups.push_back(DataSetView(dataset, indices.begin(), indices.begin() + samples));
  groups.push_back(DataSetView(dataset, indices.begin() + samples, indices.end()));
}
示例#3
0
DataSetView sample(DataSet& dataSet, double fraction, bool replacement)
{
  std::vector<int> indices;
  int samples = std::ceil(dataSet.samples() * fraction);
  indices.reserve(samples);

  RandomNumberGenerator rng;
  if(replacement)
    for(int n = 0; n < samples; n++)
      indices[n] = rng.generateIndex(dataSet.samples());
  else
    rng.generateIndices(dataSet.samples(), indices, false);

  return DataSetView(dataSet, indices.begin(), indices.begin() + samples);
}