Example #1
0
int64_t MultiDataProvider::getNextBatchInternal(int64_t size,
                                                DataBatch* batch) {
  batch->clear();
  for (size_t i = 0; i < subDataProviders_.size(); ++i) {
    // calc size according to data ratio
    int64_t subSize =
        (int64_t)(1.0 * size * config_.sub_data_configs(i).data_ratio() /
                  totalDataRatio_);
    DataBatch subBatch;
    int64_t realSize =
        subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch);
    if (realSize == 0) {
      // current subDataProvider has no data
      if (!isTestMode()) {
        // in train mode
        if (config_.sub_data_configs(i).is_main_data()) {
          // is main data provider. then return 0
          batch->clear();
          return 0;
        } else {
          // not main data provider, reset current subDataProvider and try again
          subDataProviders_[i]->reset();
          subBatch.clear();
          realSize =
              subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch);
          CHECK_GT(realSize, 0);
        }
      } else {
        // in test mode, make an empty argument
        Argument emptyArgu;
        std::vector<Argument> argus;
        argus.push_back(emptyArgu);
        batch->appendArguments(argus, 0, -1);
        continue;
      }
    }
    batch->appendArguments(subBatch.getStreams(), subBatch.getSize(), i);
  }
  return batch->getSize();
}