void DoubleBuffer::asyncLoadBatch() { int64_t actualSize = 0; if (useGpu_) { hl_set_device(FLAGS_gpu_id); } setPending(false); while (true) { taskReadySem_.wait(); if (stopping_) break; while (batchSize_ == 0 && !stopping_) { usleep(5); } if (stopping_) break; do { DataBatch newBatch; { REGISTER_TIMER("getNextBatchInternal"); actualSize = dataPool_->getNextBatchInternal(batchSize_, &newBatch); } insertOneBatch(&newBatch); } while (actualSize > 0 && !stopping_); } }
void Trainer::time() { startTrain(); trainerInternal_.getParameterUpdater()->startPass(); evaluator_->start(); DataBatch dataBatch; int32_t batchSize = config_->getOptConfig().batch_size(); int32_t num = dataProvider_->getNextBatch(batchSize, &dataBatch); CHECK_EQ(num, batchSize) << "The sample number is less than batch size " << num << " != " << batchSize; CHECK(dataBatch.getSize()) << "No data from data provider"; std::vector<paddle::Argument> outputs; // burning time LOG(INFO) << "Burning time..."; for (int n = 0; n < 10; ++n) { trainerInternal_.trainOneBatch(n, dataBatch, &outputs); } LOG(INFO) << "Burning time end."; for (int n = 0; n < FLAGS_test_period; n++) { if (FLAGS_feed_data) { REGISTER_TIMER("GetData"); num = dataProvider_->getNextBatch(batchSize, &dataBatch); } if (num != batchSize) { break; } { REGISTER_TIMER("FwdBwd"); trainerInternal_.trainOneBatch(n, dataBatch, &outputs); } } globalStat.setThreadInfo(true); globalStat.printSegTimerStatus(); globalStat.reset(); finishTrain(); }
void SelectiveFullyConnectedLayer::forward(PassType passType) { REGISTER_TIMER("selective_fc.forward"); Layer::forward(passType); getSelectiveCols(); size_t height = getInput(0).getBatchSize(); size_t width = getSize(); size_t nnz = height * width; if (!fullOutput_) { CHECK(selCols_); CHECK(height == selCols_->getHeight()); CHECK(width == selCols_->getWidth()); nnz = selCols_->getElementCnt(); } // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually // this outV should be used as input of MaxIdLayer and softmax activation reserveOutput(height, width, nnz); bool flag = true; for (size_t i = 0; i < inputNum_; i++) { MatrixPtr input = getInputValue(i); MatrixPtr weight = weights_[i]->getW(); size_t hsize = input->getHeight(); size_t wsize = weight->getHeight(); real scaleT = i == 0 ? real(0) : real(1); flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() && !fullOutput_; if (flag) { // if the indecies are highly sparse, // manully compute the multiplication of // the input vector and the selected rows. REGISTER_TIMER("selective.plain"); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { // if the indecies is not sparse enough, // use full mul instead REGISTER_TIMER("selective.mul"); if (fullOutput_) { interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { Matrix::resizeOrCreate(mmat_, hsize, wsize, /*trans=*/false, /*useGpu=*/useGpu_); mmat_->mul(*input, *weight->getTranspose()); interOutput_->add3(mmat_); } } } if (biases_) { interOutput_->addBias(*(biases_->getW()), 1); } flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() && !fullOutput_); if (flag) { // during generation, output of this layer is a sparse csr matrix, // which is probably the input of maxid layer // if the model is trained with multi-class-cross-entroy-with-selfnorm, // activiation of this layer should be exponential, not softmax. Argument arg; arg.value = Matrix::create(interOutput_->getData(), 1, nnz, /*trans=*/false, /*useGpu=*/useGpu_); //! TODO(yuyang18): Why we cannot invoke forwardActivation here? activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy // while training, every sample has a equal number of selected // columns to be activated. // note indices of multi-class-cross-entropy need to be remapped // to this index. // e.g. sample = [1,3,5] and 3 is gold, then label is 1 forwardActivation(); } }