int64_t MultiDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) { batch->clear(); for (size_t i = 0; i < subDataProviders_.size(); ++i) { // calc size according to data ratio int64_t subSize = (int64_t)(1.0 * size * config_.sub_data_configs(i).data_ratio() / totalDataRatio_); DataBatch subBatch; int64_t realSize = subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); if (realSize == 0) { // current subDataProvider has no data if (!isTestMode()) { // in train mode if (config_.sub_data_configs(i).is_main_data()) { // is main data provider. then return 0 batch->clear(); return 0; } else { // not main data provider, reset current subDataProvider and try again subDataProviders_[i]->reset(); subBatch.clear(); realSize = subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); CHECK_GT(realSize, 0); } } else { // in test mode, make an empty argument Argument emptyArgu; std::vector<Argument> argus; argus.push_back(emptyArgu); batch->appendArguments(argus, 0, -1); continue; } } batch->appendArguments(subBatch.getStreams(), subBatch.getSize(), i); } return batch->getSize(); }