void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
  bool syncFlag = hl_get_sync_flag();

  /* Calculate the W-gradient for the current layer */
  if (weight_->getWGrad()) {
    REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
    weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1);
  }

  // If callback does not change value, backprop error asynchronously so that
  // we can do the callback concurrently.
  // This is still a little bit dangerous since theoretically for
  // SyncMultiGpuMachine it is possible that the value copyback can still
  // happen at the same time as the error backprop where the value is being
  // used.
  hl_set_sync_flag(false);

  /* Calculate the input layers error */
  if (in_->grad) {
    REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
    in_->grad->mul(out_->grad, weight_->getW(), 1, 1);
  }

  hl_set_sync_flag(syncFlag);
  parameter_->incUpdate(callback);
}
void SequenceConcatLayer::backward(const UpdateCallback& callback) {
  /* activation */
  backwardActivation();

  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);

    // Increasing the number of gradient
    biases_->getParameterPtr()->incUpdate(callback);
  }

  MatrixPtr inputGrad1 = getInputGrad(0);
  MatrixPtr inputGrad2 = getInputGrad(1);
  MatrixPtr outputGrad = getOutputGrad();
  auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false);
  auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false);

  size_t numSequences1 = startPositions1->getSize() - 1;
  size_t numSequences2 = startPositions2->getSize() - 1;

  CHECK_EQ(numSequences1, numSequences2);

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      if (inputGrad1) {
        inputGrad1->subMatrix(starts1[seqId], leftNumIns)
            ->add(*(outputGrad->subMatrix(offset, leftNumIns)));
      }
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      if (inputGrad2) {
        inputGrad2->subMatrix(starts2[seqId], rightNumIns)
            ->add(*(outputGrad->subMatrix(offset, rightNumIns)));
      }
      offset += rightNumIns;
    }
  }
}
Example #3
0
 void backwardBias(const UpdateCallback& callback) {
   if (!biases_) return;
   real* bias = biases_->getWGrad()->getData();
   real* sampleOut = sampleOut_.grad->getData();
   for (size_t i = 0; i < samples_.size(); ++i) {
     bias[samples_[i].labelId] += sampleOut[i];
   }
   biases_->incUpdate(callback);
 }
Example #4
0
void ConcatenateLayer2::backward(const UpdateCallback& callback) {
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  AsyncGpuBlock block;
  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->backward(callback);
    }
  }
}
Example #5
0
  void backward(const UpdateCallback& callback) override {
    Matrix::resizeOrCreate(sampleOut_.grad,
                           1,
                           samples_.size(),
                           /* trans= */ false,
                           useGpu_);

    backwardCost();

    auto status = activation_->backward(sampleOut_);
    status.check();

    if (biases_->getWGrad()) {
      backwardBias(callback);
    }

    for (int l = 0; l < numInputs_; ++l) {
      backwardOneInput(l, callback);
    }
  }