Esempio n. 1
0
void DeConv3DLayer::backward(const UpdateCallback &callback) {
  backwardActivation();
  int batchSize = getOutputGrad()->getHeight();
  if (biases_ && biases_->getWGrad()) {
    bpropBiases();
    biases_->getParameterPtr()->incUpdate(callback);
  }
  REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
  for (size_t i = 0; i < inputLayers_.size(); ++i) {
    if (weights_[i]->getWGrad() || this->needGradient_) {
      int M = M_[i];
      int N = N_[i];
      int K = K_[i];
      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
      const MatrixPtr &inMat = getInputValue(i);
      for (int n = 0; n < batchSize; ++n) {
        colBuf_->vol2Col(
            getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
            numFilters_,
            imgSizeD_[i],
            imgSizeH_[i],
            imgSizeW_[i],
            filterSizeZ_[i],
            filterSizeY_[i],
            filterSize_[i],
            strideZ_[i],
            strideY_[i],
            stride_[i],
            paddingZ_[i],
            paddingY_[i],
            padding_[i]);
        if (weights_[i]->getWGrad()) {
          real *inData = inMat->getData() + n * inMat->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
            MatrixPtr wGradMatSub =
                weights_[i]->getWGrad()->subMatrix(g * K, K);
            MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
            wGradMatSub->mul(
                *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0);
            inData += M * N;
          }
        }
        if (getInputGrad(i)) {
          real *preGrad =
              getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
            MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
            MatrixPtr inGradMatSub =
                Matrix::create(preGrad, M, N, false, useGpu_);
            inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0);
            preGrad += M * N;
          }
        }
      }
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) {
  backwardActivation();
  MatrixPtr oGrad = getOutputGrad();
  if (!fullOutput_) {
    interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(),
                                               interOutput_->getRows(),
                                               interOutput_->getCols(),
                                               interOutput_->getHeight(),
                                               interOutput_->getWidth(),
                                               interOutput_->getElementCnt(),
                                               FLOAT_VALUE,
                                               SPARSE_CSR,
                                               /*trans=*/false,
                                               /*useGpu=*/useGpu_);
  } else {
    interOutGrad_ = Matrix::create(oGrad->getData(),
                                   oGrad->getHeight(),
                                   oGrad->getWidth(),
                                   /*trans=*/false,
                                   /*useGpu=*/useGpu_);
  }

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*interOutGrad_, 1);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  // backward is different from FullyConnectedLayer
  // because the weight is transposed
  for (size_t i = 0; i < inputNum_; i++) {
    AsyncGpuBlock block;
    MatrixPtr preGrad = getInputGrad(i);
    if (preGrad) {
      REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
      preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1);
    }

    MatrixPtr wGrad = weights_[i]->getWGrad();
    if (wGrad) {
      REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
      MatrixPtr input = getInputValue(i);
      wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1);
    }

    {
      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
Esempio n. 3
0
void SequenceConcatLayer::backward(const UpdateCallback& callback) {
  /* activation */
  backwardActivation();

  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);

    // Increasing the number of gradient
    biases_->getParameterPtr()->incUpdate(callback);
  }

  MatrixPtr inputGrad1 = getInputGrad(0);
  MatrixPtr inputGrad2 = getInputGrad(1);
  MatrixPtr outputGrad = getOutputGrad();
  auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false);
  auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false);

  size_t numSequences1 = startPositions1->getSize() - 1;
  size_t numSequences2 = startPositions2->getSize() - 1;

  CHECK_EQ(numSequences1, numSequences2);

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      if (inputGrad1) {
        inputGrad1->subMatrix(starts1[seqId], leftNumIns)
            ->add(*(outputGrad->subMatrix(offset, leftNumIns)));
      }
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      if (inputGrad2) {
        inputGrad2->subMatrix(starts2[seqId], rightNumIns)
            ->add(*(outputGrad->subMatrix(offset, rightNumIns)));
      }
      offset += rightNumIns;
    }
  }
}
Esempio n. 4
0
void CudnnConvLayer::backward(const UpdateCallback &callback) {
  backwardActivation();

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str());
    for (int g = 0; g < groups_[0]; ++g) {
      real *biasGrad = biases_->getWGrad()->getData() + biasOffset_ * g;
      real *outGrad = getOutputGrad()->getData() + outputOffset_ * g;
      hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad);
    }
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    projections_[i]->backward(callback);
  }
}
Esempio n. 5
0
void ConcatenateLayer2::backward(const UpdateCallback& callback) {
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  AsyncGpuBlock block;
  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->backward(callback);
    }
  }
}
Esempio n. 6
0
void MultiplexLayer::backward(const UpdateCallback& callback) {
  /* Do derivation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  MatrixPtr outG = getOutputGrad();

  {
    REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str());
    AsyncGpuBlock block;
    for (const CopyInfo& info : copySchedule_) {
      if (getInputGrad(info.copyIdx + 1)) {
        getInputGrad(info.copyIdx + 1)
            ->subMatrix(info.startIdx, info.length, tmpDest_)
            ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_));
      }
    }
  }
}
Esempio n. 7
0
void ConcatenateLayer::backward(const UpdateCallback& callback) {
  (void)callback;

  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  const MatrixPtr& out = getOutputGrad();
  int offset = 0;

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    const MatrixPtr& in = getInputGrad(i);
    size_t inSize = getInputValue(i)->getWidth();
    if (in) {
      in->addAtOffset(*out, offset);
    }
    offset += inSize;
  }
}
void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inGrad = getInputGrad(0);
  if (NULL == inGrad) {
    return;
  }
  MatrixPtr outGrad = getOutputGrad();
  size_t batchSize = getInput(0).getBatchSize();
  int imgSize = inGrad->getWidth();
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }
  {
    AsyncGpuBlock asyncGpuBlock;
    if (asRowVector_) {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outGradTmp =
            Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
                           numFilters_,
                           imgSize,
                           false,
                           useGpu_);
        MatrixPtr inGradTmp = Matrix::create(
            inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
        inGradTmp->collectBias(*outGradTmp, 1);
      }
    } else {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outGradTmp =
            Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
                           imgSize,
                           numFilters_,
                           false,
                           useGpu_);
        MatrixPtr inGradTmp = Matrix::create(
            inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
        inGradTmp->sumRows(*outGradTmp, 1, 1);
      }
    }
  }
}
Esempio n. 9
0
void MixedLayer::backward(const UpdateCallback& callback) {
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);

    /* Increasing the number of gradient */
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->backward(callback);
    }
  }

  for (auto& op : operators_) {
    op->backward();
  }
}