void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
  bool syncFlag = hl_get_sync_flag();

  /* Calculate the W-gradient for the current layer */
  if (weight_->getWGrad()) {
    REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
    weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1);
  }

  // If callback does not change value, backprop error asynchronously so that
  // we can do the callback concurrently.
  // This is still a little bit dangerous since theoretically for
  // SyncMultiGpuMachine it is possible that the value copyback can still
  // happen at the same time as the error backprop where the value is being
  // used.
  hl_set_sync_flag(false);

  /* Calculate the input layers error */
  if (in_->grad) {
    REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
    in_->grad->mul(out_->grad, weight_->getW(), 1, 1);
  }

  hl_set_sync_flag(syncFlag);
  parameter_->incUpdate(callback);
}
Example #2
0
void ConcatenateLayer2::forward(PassType passType) {
  Layer::forward(passType);

  int batchSize = getInput(0).getBatchSize();
  int size = getSize();
  resetOutput(batchSize, size);

  for (size_t i = 0; i < projections_.size(); i++) {
    size_t startCol = projCol_[i].first;
    size_t endCol = projCol_[i].second;
    projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
    if (output_.grad) {
      projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
    }
  }

  {
    AsyncGpuBlock block;
    for (size_t i = 0; i != inputLayers_.size(); ++i) {
      projections_[i]->forward(&getInput(i), &projOutput_[i], passType);
    }
  }

  /* add the bias-vector */
  if (biases_) {
    REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
    output_.value->addBias(*(biases_->getW()), 1, sharedBias_);
  }

  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
Example #3
0
 void forwardBias() {
   if (!biases_) {
     sampleOut_.value->zeroMem();
   } else {
     real* bias = biases_->getW()->getData();
     real* sampleOut = sampleOut_.value->getData();
     for (size_t i = 0; i < samples_.size(); ++i) {
       sampleOut[i] = bias[samples_[i].labelId];
     }
   }
 }
void TransposedFullMatrixProjection::forward() {
  REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
  out_->value->mul(in_->value, weight_->getW()->getTranspose(), 1, 1);
}
void SequenceConcatLayer::forward(PassType passType) {
  Layer::forward(passType);

  size_t dim = getSize();

  const Argument& input1 = getInput(0);
  size_t numSequences1 = input1.getNumSequences();
  auto startPositions1 = input1.sequenceStartPositions->getVector(false);

  const Argument& input2 = getInput(1);
  size_t numSequences2 = input2.getNumSequences();
  auto startPositions2 = input2.sequenceStartPositions->getVector(false);

  CHECK_EQ(dim, input1.value->getWidth());
  CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize());
  CHECK_EQ(numSequences1, startPositions1->getSize() - 1);

  CHECK_EQ(dim, input2.value->getWidth());
  CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize());
  CHECK_EQ(numSequences2, startPositions2->getSize() - 1);

  CHECK_EQ(numSequences1, numSequences2);

  MatrixPtr inputValue1 = getInputValue(0);
  MatrixPtr inputValue2 = getInputValue(1);

  // reset output
  reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim);

  MatrixPtr outputValue = getOutputValue();

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      outputValue->subMatrix(offset, leftNumIns)
          ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns)));
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      outputValue->subMatrix(offset, rightNumIns)
          ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns)));
      offset += rightNumIns;
    }

    // modify the sequenceStartPositions
    ICpuGpuVector::resizeOrCreate(
        output_.sequenceStartPositions, numSequences1 + 1, false);

    int* tgtBuf = output_.sequenceStartPositions->getMutableData(false);

    for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) {
      tgtBuf[seqId] = starts1[seqId] + starts2[seqId];
    }
  }

  if (biases_.get() != NULL) {
    MatrixPtr outV = getOutputValue();
    outV->addBias(*(biases_->getW()), 1);
  }

  /* activation */
  forwardActivation();
}