inline bool LoadNext(DataBatch &val) { if (base_->Next()) { val.CopyFromDense(base_->Value()); return true; } else { return false; } }
void testProtoSequenceDataProvider(int* numPerSlotType, bool async, bool useGpu) { mkDir(kTestDir); DataBatch data; prepareData(&data, numPerSlotType, /* iid */ true, useGpu); writeData(data, useGpu, /* dataCompression */ false); DataConfig config; config.set_type("proto_sequence"); config.set_files(kProtoFileList); config.set_async_load_data(async); unique_ptr<DataProvider> dataProvider(DataProvider::create(config, useGpu)); dataProvider->setSkipShuffle(); EXPECT_EQ(data.getSize(), dataProvider->getSize()); int64_t batchSize = 10; DataBatch batch; vector<Argument>& args1 = data.getStreams(); ICpuGpuVectorPtr sequenceStartPositions1 = args1[0].sequenceStartPositions; dataProvider->reset(); size_t args1Offset = 0; while (dataProvider->getNextBatch(batchSize, &batch) > 0) { CHECK_EQ(data.getNumStreams(), batch.getNumStreams()); vector<Argument>& args2 = batch.getStreams(); ICpuGpuVectorPtr sequenceStartPositions2 = args2[0].sequenceStartPositions; for (auto& arg : args1) { // args1 should not has sequence EXPECT_EQ(true, !arg.sequenceStartPositions); } for (auto& arg : args2) { // args2 should has sequence EXPECT_NE(true, !arg.sequenceStartPositions); } size_t numSeqs = batch.getNumSequences(); checkSampleSequence(args1, args2, args1Offset, numSeqs, useGpu); args1Offset += numSeqs; } EXPECT_EQ(args1Offset, (size_t)data.getNumSequences()); rmDir(kTestDir); }
void writeData(const DataBatch& batch, bool useGpu, bool dataCompression) { DataHeader header; const vector<Argument>& arguments = batch.getStreams(); for (auto& argument : arguments) { SlotDef* slotDef = header.add_slot_defs(); slotDef->set_type(getSlotType(argument)); slotDef->set_dim(getSlotDim(argument)); } VLOG(1) << "header=" << header.DebugString(); int64_t totalSeqs = batch.getNumSequences(); int64_t seq = 0; ICpuGpuVectorPtr sequenceStartPositions = arguments[0].sequenceStartPositions; int64_t numWritten = 0; vector<string> curProtoFiles = dataCompression ? protoFilesCompressed : protoFiles; for (size_t i = 0; i < curProtoFiles.size(); ++i) { int64_t numSeqs = totalSeqs * (i + 1) / curProtoFiles.size() - totalSeqs * i / curProtoFiles.size(); ofstream os(curProtoFiles[i]); CHECK(os) << "Fail to open " << curProtoFiles[i]; unique_ptr<ProtoWriter> writer(new ProtoWriter(&os, dataCompression)); CHECK(writer->write(header)); for (int j = 0; j < numSeqs; ++j, ++seq) { int64_t begin = seq; int64_t end = seq + 1; if (sequenceStartPositions) { begin = sequenceStartPositions->getElement(seq); end = sequenceStartPositions->getElement(seq + 1); } for (int pos = begin; pos < end; ++pos) { DataSample sample; makeSample(arguments, pos, pos == begin, &sample, useGpu); CHECK(writer->write(sample)); ++numWritten; } } writer.reset(nullptr); os.close(); } CHECK_EQ(arguments[0].getBatchSize(), numWritten); }
void Trainer::time() { startTrain(); trainerInternal_.getParameterUpdater()->startPass(); evaluator_->start(); DataBatch dataBatch; int32_t batchSize = config_->getOptConfig().batch_size(); int32_t num = dataProvider_->getNextBatch(batchSize, &dataBatch); CHECK_EQ(num, batchSize) << "The sample number is less than batch size " << num << " != " << batchSize; CHECK(dataBatch.getSize()) << "No data from data provider"; std::vector<paddle::Argument> outputs; // burning time LOG(INFO) << "Burning time..."; for (int n = 0; n < 10; ++n) { trainerInternal_.trainOneBatch(n, dataBatch, &outputs); } LOG(INFO) << "Burning time end."; for (int n = 0; n < FLAGS_test_period; n++) { if (FLAGS_feed_data) { REGISTER_TIMER("GetData"); num = dataProvider_->getNextBatch(batchSize, &dataBatch); } if (num != batchSize) { break; } { REGISTER_TIMER("FwdBwd"); trainerInternal_.trainOneBatch(n, dataBatch, &outputs); } } globalStat.setThreadInfo(true); globalStat.printSegTimerStatus(); globalStat.reset(); finishTrain(); }
int64_t MultiDataProvider::getNextBatchInternal(int64_t size, DataBatch* batch) { batch->clear(); for (size_t i = 0; i < subDataProviders_.size(); ++i) { // calc size according to data ratio int64_t subSize = (int64_t)(1.0 * size * config_.sub_data_configs(i).data_ratio() / totalDataRatio_); DataBatch subBatch; int64_t realSize = subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); if (realSize == 0) { // current subDataProvider has no data if (!isTestMode()) { // in train mode if (config_.sub_data_configs(i).is_main_data()) { // is main data provider. then return 0 batch->clear(); return 0; } else { // not main data provider, reset current subDataProvider and try again subDataProviders_[i]->reset(); subBatch.clear(); realSize = subDataProviders_[i]->getNextBatchInternal(subSize, &subBatch); CHECK_GT(realSize, 0); } } else { // in test mode, make an empty argument Argument emptyArgu; std::vector<Argument> argus; argus.push_back(emptyArgu); batch->appendArguments(argus, 0, -1); continue; } } batch->appendArguments(subBatch.getStreams(), subBatch.getSize(), i); } return batch->getSize(); }
void calcGradient(bool useGpu, comData& Data) { FLAGS_use_gpu = useGpu; FLAGS_config = configFile; *ThreadLocalRand::getSeed() = 0; srand(0); Trainer trainer; trainer.init(TrainerConfigHelper::createFromFlagConfig()); Data.parameters = trainer.getGradientMachine()->getParameters(); DataBatch dataBatch; int32_t batchSize = trainer.getConfig().opt_config().batch_size(); trainer.getDataProvider()->setSkipShuffle(); trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch); CHECK(dataBatch.getSize()) << "No data from data provider"; vector<Argument>& inArgs = dataBatch.getStreams(); trainer.getGradientMachine()->start(); for (int i = 0; i < 2; ++i) { trainer.getGradientMachine()->forwardBackward( inArgs, &Data.outArgs, PASS_TRAIN); } trainer.getGradientMachine()->finish(); }
inline void FreeSpace(DataBatch &a) { a.FreeSpaceDense(); }
inline DataBatch Create(void) { DataBatch a; a.AllocSpaceDense(oshape_, batch_size_, label_width_, extra_shape_); return a; }
double checkRemoteParameterUpdater(TrainerForTest& trainer) { auto gradientMachine = trainer.getGradientMachine(); auto parameterUpdater = trainer.getParameterUpdaterForTest(); auto dataProvider = trainer.getDataProvider(); auto& parameters = gradientMachine->getParameters(); const TrainerConfig& config = trainer.getConfig(); const string& alg = config.opt_config().algorithm(); vector<ParameterPtr> parameterCheck; for (auto& parameter : parameters) { parameterCheck.emplace_back( new Parameter(parameter->getConfig(), /* useGpu= */ false)); parameterCheck.back() ->getBuf(PARAMETER_VALUE) ->copyFrom(*parameter->getBuf(PARAMETER_VALUE)); parameterCheck.back() ->getBuf(PARAMETER_GRADIENT) ->copyFrom(*parameter->getBuf(PARAMETER_GRADIENT)); } std::unique_ptr<ParameterUpdater> parameterUpdaterCheck; if (alg == TrainAlgorithm::SGD) { parameterUpdaterCheck.reset(new SgdLocalUpdater(config.opt_config())); } else { LOG(INFO) << "unsupported algorithm in remote parameter check: " << alg; return -1.0; } parameterUpdaterCheck->init(parameterCheck); // gradientMachine->start(config, *dataProvider); DataBatch dataBatch; int32_t batchSize = config.opt_config().batch_size(); dataProvider->getNextBatch(batchSize, &dataBatch); CHECK(dataBatch.getSize()) << "No data from data provider"; int64_t actualBatchSize = dataBatch.getSize(); const vector<Argument>& inArgs = dataBatch.getStreams(); vector<Argument> outArgs; UpdateCallback updateCallback = [parameterUpdater, parameterCheck](Parameter* para) { parameterCheck[para->getID()] ->getBuf(PARAMETER_GRADIENT) ->copyFrom(*para->getBuf(PARAMETER_GRADIENT)); parameterUpdater->update(para); }; parameterUpdater->startPass(); parameterUpdaterCheck->startPass(); for (int i = 0; i < config.opt_config().num_batches_per_get_parameter() * 2; ++i) { PassType passType = parameterUpdater->startBatch(actualBatchSize); gradientMachine->forwardBackward( inArgs, &outArgs, passType, updateCallback); parameterUpdater->finishBatch(0); parameterUpdaterCheck->startBatch(actualBatchSize); for (auto& para : parameterCheck) { parameterUpdaterCheck->update(para.get()); } parameterUpdaterCheck->finishBatch(0); } double sum = 0.0f; for (size_t i = 0; i != parameters.size(); ++i) { real *v1, *v2; CpuVector trainerPara(parameters[i]->getSize()); trainerPara.copyFrom(*parameters[i]->getBuf(PARAMETER_VALUE)); if (!FLAGS_use_gpu) { v1 = parameters[i]->getBuf(PARAMETER_VALUE)->getData(); } else { v1 = trainerPara.getData(); } v2 = parameterCheck[i]->getBuf(PARAMETER_VALUE)->getData(); size_t size = parameters[i]->getSize(); double diff = 0; for (size_t j = 0; j < size; ++j) { diff += fabs(v1[j] - v2[j]); } sum += diff; LOG(INFO) << setiosflags(ios::left) << setfill(' ') << setw(20) << parameters[i]->getName() << "diff=" << setw(15) << diff; } parameterUpdater->finishPass(); parameterUpdaterCheck->finishPass(); gradientMachine->finish(); return sum; }
inline DataBatch Create( void ){ DataBatch a; a.AllocSpace( oshape_, batch_size_ ); return a; }
void testProtoDataProvider(int* numPerSlotType, bool iid, bool async, bool useGpu, bool dataCompression, int numConstantSlots = 0) { mkDir(kTestDir); DataBatch data; prepareData(&data, numPerSlotType, iid, useGpu); writeData(data, useGpu, dataCompression); DataConfig config; config.set_type("proto"); config.set_files(dataCompression ? kProtoFileListCompressed : kProtoFileList); config.set_async_load_data(async); for (int i = 0; i < numConstantSlots; ++i) { config.add_constant_slots(i + 11); MatrixPtr w = Matrix::create(data.getSize(), 1, /* trans= */ false, /* useGpu= */ false); w->assign(config.constant_slots(i)); data.appendData(w); } unique_ptr<DataProvider> dataProvider(DataProvider::create(config, useGpu)); dataProvider->setSkipShuffle(); EXPECT_EQ(data.getSize(), dataProvider->getSize()); int64_t batchSize = 10; DataBatch batch; size_t seq1 = 0; vector<Argument>& args1 = data.getStreams(); ICpuGpuVectorPtr sequenceStartPositions1 = args1[0].sequenceStartPositions; dataProvider->reset(); while (dataProvider->getNextBatch(batchSize, &batch) > 0) { CHECK_EQ(data.getNumStreams(), batch.getNumStreams()); vector<Argument>& args2 = batch.getStreams(); ICpuGpuVectorPtr sequenceStartPositions2 = args2[0].sequenceStartPositions; for (auto& arg : args2) { EXPECT_EQ(iid, !arg.sequenceStartPositions); } size_t numSeqs = batch.getNumSequences(); VLOG(1) << "numSeqs=" << numSeqs; for (size_t seq2 = 0; seq2 < numSeqs; ++seq1, ++seq2) { int64_t begin1 = seq1; int64_t end1 = seq1 + 1; if (sequenceStartPositions1) { begin1 = sequenceStartPositions1->getElement(seq1); end1 = sequenceStartPositions1->getElement(seq1 + 1); EXPECT_LT(seq1, sequenceStartPositions1->getSize() - 1); } int64_t begin2 = seq2; int64_t end2 = seq2 + 1; if (sequenceStartPositions2) { begin2 = sequenceStartPositions2->getElement(seq2); end2 = sequenceStartPositions2->getElement(seq2 + 1); } VLOG(1) << " begin1=" << begin1 << " end1=" << end1 << " begin2=" << begin2 << " end2=" << end2; EXPECT_EQ(end1 - begin1, end2 - begin2); for (int i = 0; i < end1 - begin1; ++i) { checkSample(args1, begin1 + i, args2, begin2 + i, useGpu); } } } EXPECT_EQ(seq1, (size_t)data.getNumSequences()); rmDir(kTestDir); }