예제 #1
0
void ExpandLayer::backward(const UpdateCallback& callback) {
  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
    /* Increasing the number of gradient */
    biases_->getParameterPtr()->incUpdate(callback);
  }

  if (!getInputGrad(0)) return;
  MatrixPtr inputGrad = getInputGrad(0);
  MatrixPtr outputGrad = getOutputGrad();
  auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
                              : getInput(1).sequenceStartPositions;
  size_t numSequences = cpuSeqStartPos->getSize() - 1;
  const int* starts = cpuSeqStartPos->getData(false);

  CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth());
  CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]);

  AsyncGpuBlock asyncGpuBlock;

  // sum to get the grad
  real scale = 1;
  for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) {
    // TODO(Dangqingqing) optimization for GPU
    int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
    if (sequenceLength == 0) {
      // empty sequence
      continue;
    }
    MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1);
    copyData->collectBias(
        *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale);
  }
}
예제 #2
0
void SequenceConcatLayer::backward(const UpdateCallback& callback) {
  /* activation */
  backwardActivation();

  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);

    // Increasing the number of gradient
    biases_->getParameterPtr()->incUpdate(callback);
  }

  MatrixPtr inputGrad1 = getInputGrad(0);
  MatrixPtr inputGrad2 = getInputGrad(1);
  MatrixPtr outputGrad = getOutputGrad();
  auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false);
  auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false);

  size_t numSequences1 = startPositions1->getSize() - 1;
  size_t numSequences2 = startPositions2->getSize() - 1;

  CHECK_EQ(numSequences1, numSequences2);

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      if (inputGrad1) {
        inputGrad1->subMatrix(starts1[seqId], leftNumIns)
            ->add(*(outputGrad->subMatrix(offset, leftNumIns)));
      }
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      if (inputGrad2) {
        inputGrad2->subMatrix(starts2[seqId], rightNumIns)
            ->add(*(outputGrad->subMatrix(offset, rightNumIns)));
      }
      offset += rightNumIns;
    }
  }
}
예제 #3
0
void MultiplexLayer::forward(PassType passType) {
  Layer::forward(passType);

  IVectorPtr copyIds = getInput(0).ids;
  MatrixPtr inV1 = getInputValue(1);
  CHECK_EQ(copyIds->getSize(), inV1->getHeight());
  for (size_t i = 2; i < inputLayers_.size(); i++) {
    CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight());
    CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth());
  }

  calculateCopySchedule(copyIds, inputLayers_.size() - 1);
  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(inV1->getHeight(), inV1->getWidth());
  }

  MatrixPtr outV = getOutputValue();
  {
    REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str());
    AsyncGpuBlock block;
    for (const CopyInfo& info : copySchedule_) {
      outV->subMatrix(info.startIdx, info.length, tmpDest_)
          ->copyFrom(*getInputValue(info.copyIdx + 1)
                          ->subMatrix(info.startIdx, info.length, tmpSrc_));
    }
  }

  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
예제 #4
0
void DeConv3DLayer::forward(PassType passType) {
  Layer::forward(passType);
  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
  int outWidth = getSize();
  resetOutput(batchSize, outWidth);
  const MatrixPtr outMat = getOutputValue();

  REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str());
  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    const MatrixPtr &inMat = getInputValue(i);
    int M = M_[i];
    int N = N_[i];
    int K = K_[i];
    MatrixPtr wMat = weights_[i]->getW();
    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
    for (int n = 0; n < batchSize; ++n) {
      real *inData = inMat->getData() + n * inMat->getStride();
      for (int g = 0; g < groups_[i]; ++g) {
        MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
        MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
        MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
        colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
        inData += M * N;
      }
      colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(),
                       numFilters_,
                       imgSizeD_[i],
                       imgSizeH_[i],
                       imgSizeW_[i],
                       filterSizeZ_[i],
                       filterSizeY_[i],
                       filterSize_[i],
                       strideZ_[i],
                       strideY_[i],
                       stride_[i],
                       paddingZ_[i],
                       paddingY_[i],
                       padding_[i],
                       1.0,
                       1.0);
    }
  }
  if (nullptr != this->biasParameter_) {
    this->addBias();
  }
  forwardActivation();
}
예제 #5
0
void MultiplexLayer::backward(const UpdateCallback& callback) {
  /* Do derivation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  MatrixPtr outG = getOutputGrad();

  {
    REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str());
    AsyncGpuBlock block;
    for (const CopyInfo& info : copySchedule_) {
      if (getInputGrad(info.copyIdx + 1)) {
        getInputGrad(info.copyIdx + 1)
            ->subMatrix(info.startIdx, info.length, tmpDest_)
            ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_));
      }
    }
  }
}
예제 #6
0
void SequenceConcatLayer::forward(PassType passType) {
  Layer::forward(passType);

  size_t dim = getSize();

  const Argument& input1 = getInput(0);
  size_t numSequences1 = input1.getNumSequences();
  auto startPositions1 = input1.sequenceStartPositions->getVector(false);

  const Argument& input2 = getInput(1);
  size_t numSequences2 = input2.getNumSequences();
  auto startPositions2 = input2.sequenceStartPositions->getVector(false);

  CHECK_EQ(dim, input1.value->getWidth());
  CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize());
  CHECK_EQ(numSequences1, startPositions1->getSize() - 1);

  CHECK_EQ(dim, input2.value->getWidth());
  CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize());
  CHECK_EQ(numSequences2, startPositions2->getSize() - 1);

  CHECK_EQ(numSequences1, numSequences2);

  MatrixPtr inputValue1 = getInputValue(0);
  MatrixPtr inputValue2 = getInputValue(1);

  // reset output
  reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim);

  MatrixPtr outputValue = getOutputValue();

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      outputValue->subMatrix(offset, leftNumIns)
          ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns)));
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      outputValue->subMatrix(offset, rightNumIns)
          ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns)));
      offset += rightNumIns;
    }

    // modify the sequenceStartPositions
    ICpuGpuVector::resizeOrCreate(
        output_.sequenceStartPositions, numSequences1 + 1, false);

    int* tgtBuf = output_.sequenceStartPositions->getMutableData(false);

    for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) {
      tgtBuf[seqId] = starts1[seqId] + starts2[seqId];
    }
  }

  if (biases_.get() != NULL) {
    MatrixPtr outV = getOutputValue();
    outV->addBias(*(biases_->getW()), 1);
  }

  /* activation */
  forwardActivation();
}