Пример #1
0
void DoubleBuffer::asyncLoadBatch() {
  int64_t actualSize = 0;
  if (useGpu_) {
    hl_set_device(FLAGS_gpu_id);
  }
  setPending(false);

  while (true) {
    taskReadySem_.wait();
    if (stopping_) break;

    while (batchSize_ == 0 && !stopping_) {
      usleep(5);
    }
    if (stopping_) break;

    do {
      DataBatch newBatch;
      {
        REGISTER_TIMER("getNextBatchInternal");
        actualSize = dataPool_->getNextBatchInternal(batchSize_, &newBatch);
      }
      insertOneBatch(&newBatch);
    } while (actualSize > 0 && !stopping_);
  }
}
Пример #2
0
void Trainer::time() {
  startTrain();

  trainerInternal_.getParameterUpdater()->startPass();
  evaluator_->start();

  DataBatch dataBatch;
  int32_t batchSize = config_->getOptConfig().batch_size();
  int32_t num = dataProvider_->getNextBatch(batchSize, &dataBatch);
  CHECK_EQ(num, batchSize) << "The sample number is less than batch size "
                           << num << " != " << batchSize;

  CHECK(dataBatch.getSize()) << "No data from data provider";

  std::vector<paddle::Argument> outputs;
  // burning time
  LOG(INFO) << "Burning time...";
  for (int n = 0; n < 10; ++n) {
    trainerInternal_.trainOneBatch(n, dataBatch, &outputs);
  }
  LOG(INFO) << "Burning time end.";

  for (int n = 0; n < FLAGS_test_period; n++) {
    if (FLAGS_feed_data) {
      REGISTER_TIMER("GetData");
      num = dataProvider_->getNextBatch(batchSize, &dataBatch);
    }

    if (num != batchSize) {
      break;
    }

    {
      REGISTER_TIMER("FwdBwd");
      trainerInternal_.trainOneBatch(n, dataBatch, &outputs);
    }
  }
  globalStat.setThreadInfo(true);
  globalStat.printSegTimerStatus();
  globalStat.reset();

  finishTrain();
}
void SelectiveFullyConnectedLayer::forward(PassType passType) {
  REGISTER_TIMER("selective_fc.forward");
  Layer::forward(passType);

  getSelectiveCols();
  size_t height = getInput(0).getBatchSize();
  size_t width = getSize();
  size_t nnz = height * width;
  if (!fullOutput_) {
    CHECK(selCols_);
    CHECK(height == selCols_->getHeight());
    CHECK(width == selCols_->getWidth());
    nnz = selCols_->getElementCnt();
  }

  // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually
  // this outV should be used as input of MaxIdLayer and softmax activation
  reserveOutput(height, width, nnz);

  bool flag = true;
  for (size_t i = 0; i < inputNum_; i++) {
    MatrixPtr input = getInputValue(i);
    MatrixPtr weight = weights_[i]->getW();
    size_t hsize = input->getHeight();
    size_t wsize = weight->getHeight();
    real scaleT = i == 0 ? real(0) : real(1);

    flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() &&
           !fullOutput_;
    if (flag) {
      // if the indecies are highly sparse,
      // manully compute the multiplication of
      // the input vector and the selected rows.
      REGISTER_TIMER("selective.plain");
      interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
    } else {
      // if the indecies is not sparse enough,
      // use full mul instead
      REGISTER_TIMER("selective.mul");
      if (fullOutput_) {
        interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
      } else {
        Matrix::resizeOrCreate(mmat_,
                               hsize,
                               wsize,
                               /*trans=*/false,
                               /*useGpu=*/useGpu_);
        mmat_->mul(*input, *weight->getTranspose());
        interOutput_->add3(mmat_);
      }
    }
  }

  if (biases_) {
    interOutput_->addBias(*(biases_->getW()), 1);
  }

  flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() &&
          !fullOutput_);
  if (flag) {
    // during generation, output of this layer is a sparse csr matrix,
    // which is probably the input of maxid layer
    // if the model is trained with multi-class-cross-entroy-with-selfnorm,
    // activiation of this layer should be exponential, not softmax.

    Argument arg;
    arg.value = Matrix::create(interOutput_->getData(),
                               1,
                               nnz,
                               /*trans=*/false,
                               /*useGpu=*/useGpu_);
    //! TODO(yuyang18): Why we cannot invoke forwardActivation here?
    activation_->forward(arg).check();
  } else /* train and test in train, not generating */ {
    // during training, this layer output value is *Matrix*, which is input of
    // eg. multi-class-cross-entropy

    // while training, every sample has a equal number of selected
    // columns to be activated.
    // note indices of multi-class-cross-entropy need to be remapped
    // to this index.
    // e.g. sample = [1,3,5] and 3 is gold, then label is 1

    forwardActivation();
  }
}