示例#1
0
void DeConv3DLayer::backward(const UpdateCallback &callback) {
  backwardActivation();
  int batchSize = getOutputGrad()->getHeight();
  if (biases_ && biases_->getWGrad()) {
    bpropBiases();
    biases_->getParameterPtr()->incUpdate(callback);
  }
  REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
  for (size_t i = 0; i < inputLayers_.size(); ++i) {
    if (weights_[i]->getWGrad() || this->needGradient_) {
      int M = M_[i];
      int N = N_[i];
      int K = K_[i];
      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
      const MatrixPtr &inMat = getInputValue(i);
      for (int n = 0; n < batchSize; ++n) {
        colBuf_->vol2Col(
            getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
            numFilters_,
            imgSizeD_[i],
            imgSizeH_[i],
            imgSizeW_[i],
            filterSizeZ_[i],
            filterSizeY_[i],
            filterSize_[i],
            strideZ_[i],
            strideY_[i],
            stride_[i],
            paddingZ_[i],
            paddingY_[i],
            padding_[i]);
        if (weights_[i]->getWGrad()) {
          real *inData = inMat->getData() + n * inMat->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
            MatrixPtr wGradMatSub =
                weights_[i]->getWGrad()->subMatrix(g * K, K);
            MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
            wGradMatSub->mul(
                *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0);
            inData += M * N;
          }
        }
        if (getInputGrad(i)) {
          real *preGrad =
              getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
            MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
            MatrixPtr inGradMatSub =
                Matrix::create(preGrad, M, N, false, useGpu_);
            inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0);
            preGrad += M * N;
          }
        }
      }
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
void MKLPackedRecurrentLayer::backwardBatch(int batchSize,
                                            size_t numSequences,
                                            const int* starts) {
  if (!batchGrad_) {
    batchGrad_.reset(new SequenceToBatch(useGpu_));
  }
  batchGrad_->shareIndexWith(*batchValue_);

  size_t numBatch = batchGrad_->getNumBatch();
  bool backwardByBatch = numBatch < numSequences;

  batchGrad_->copyFromSeq(*output_.grad);
  {
    REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str());
    /* backward one batch */
    for (int n = (int)numBatch - 1; n >= 0; n--) {
      MatrixPtr batchGrad = batchGrad_->getBatchValue(n);
      MatrixPtr batchValue =
          batchValue_->getBatchValue(n, batchGrad->getHeight());

      Argument arg;
      arg.value = batchValue;
      arg.grad = batchGrad;
      activation_->backward(arg).check();

      if (n != 0) {
        batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight());
        packed_weightT_->gemm_compute(batchGrad, batchValue);
      }

      if (backwardByBatch && weight_->getWGrad()) {
        if (n != 0) {
          /* backward weight */
          batchValue =
              batchValue_->getBatchValue(n - 1, batchGrad->getHeight());
          weight_->getWGrad()->mul(
              *batchValue->getTranspose(), *batchGrad, 1, 1);
        }
      }
    }
  }

  batchGrad_->copyBackSeq(*output_.grad);

  if (!backwardByBatch && weight_->getWGrad()) {
    REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str());
    for (size_t seq = 0; seq < numSequences; ++seq) {
      int len = starts[seq + 1] - starts[seq];
      weight_->getWGrad()->mul(
          *output_.value
               ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1)
               ->getTranspose(),
          *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1,
                                   len - 1),
          1,
          1);
    }
  }
}
void SelectiveFullyConnectedLayer::forward(PassType passType) {
  REGISTER_TIMER("selective_fc.forward");
  Layer::forward(passType);

  getSelectiveCols();
  size_t height = getInput(0).getBatchSize();
  size_t width = getSize();
  size_t nnz = height * width;
  if (!fullOutput_) {
    CHECK(selCols_);
    CHECK(height == selCols_->getHeight());
    CHECK(width == selCols_->getWidth());
    nnz = selCols_->getElementCnt();
  }

  // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually
  // this outV should be used as input of MaxIdLayer and softmax activation
  reserveOutput(height, width, nnz);

  bool flag = true;
  for (size_t i = 0; i < inputNum_; i++) {
    MatrixPtr input = getInputValue(i);
    MatrixPtr weight = weights_[i]->getW();
    size_t hsize = input->getHeight();
    size_t wsize = weight->getHeight();
    real scaleT = i == 0 ? real(0) : real(1);

    flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() &&
           !fullOutput_;
    if (flag) {
      // if the indecies are highly sparse,
      // manully compute the multiplication of
      // the input vector and the selected rows.
      REGISTER_TIMER("selective.plain");
      interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
    } else {
      // if the indecies is not sparse enough,
      // use full mul instead
      REGISTER_TIMER("selective.mul");
      if (fullOutput_) {
        interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
      } else {
        Matrix::resizeOrCreate(mmat_,
                               hsize,
                               wsize,
                               /*trans=*/false,
                               /*useGpu=*/useGpu_);
        mmat_->mul(*input, *weight->getTranspose());
        interOutput_->add3(mmat_);
      }
    }
  }

  if (biases_) {
    interOutput_->addBias(*(biases_->getW()), 1);
  }

  flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() &&
          !fullOutput_);
  if (flag) {
    // during generation, output of this layer is a sparse csr matrix,
    // which is probably the input of maxid layer
    // if the model is trained with multi-class-cross-entroy-with-selfnorm,
    // activiation of this layer should be exponential, not softmax.

    Argument arg;
    arg.value = Matrix::create(interOutput_->getData(),
                               1,
                               nnz,
                               /*trans=*/false,
                               /*useGpu=*/useGpu_);
    //! TODO(yuyang18): Why we cannot invoke forwardActivation here?
    activation_->forward(arg).check();
  } else /* train and test in train, not generating */ {
    // during training, this layer output value is *Matrix*, which is input of
    // eg. multi-class-cross-entropy

    // while training, every sample has a equal number of selected
    // columns to be activated.
    // note indices of multi-class-cross-entropy need to be remapped
    // to this index.
    // e.g. sample = [1,3,5] and 3 is gold, then label is 1

    forwardActivation();
  }
}