void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
  bool syncFlag = hl_get_sync_flag();

  /* Calculate the W-gradient for the current layer */
  if (weight_->getWGrad()) {
    REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
    weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1);
  }

  // If callback does not change value, backprop error asynchronously so that
  // we can do the callback concurrently.
  // This is still a little bit dangerous since theoretically for
  // SyncMultiGpuMachine it is possible that the value copyback can still
  // happen at the same time as the error backprop where the value is being
  // used.
  hl_set_sync_flag(false);

  /* Calculate the input layers error */
  if (in_->grad) {
    REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
    in_->grad->mul(out_->grad, weight_->getW(), 1, 1);
  }

  hl_set_sync_flag(syncFlag);
  parameter_->incUpdate(callback);
}
Beispiel #2
0
void ConcatenateLayer2::forward(PassType passType) {
  Layer::forward(passType);

  int batchSize = getInput(0).getBatchSize();
  int size = getSize();
  resetOutput(batchSize, size);

  for (size_t i = 0; i < projections_.size(); i++) {
    size_t startCol = projCol_[i].first;
    size_t endCol = projCol_[i].second;
    projOutput_[i].value = output_.value->subColMatrix(startCol, endCol);
    if (output_.grad) {
      projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol);
    }
  }

  {
    AsyncGpuBlock block;
    for (size_t i = 0; i != inputLayers_.size(); ++i) {
      projections_[i]->forward(&getInput(i), &projOutput_[i], passType);
    }
  }

  /* add the bias-vector */
  if (biases_) {
    REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
    output_.value->addBias(*(biases_->getW()), 1, sharedBias_);
  }

  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
Beispiel #3
0
void CosSimLayer::forward(PassType passType) {
  Layer::forward(passType);
  /* malloc memory for the output_ if necessary */
  int batchSize = getInputValue(0)->getHeight();
  int size = getSize();
  CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed";

  {
    REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str());
    reserveOutput(batchSize, size);
  }

  MatrixPtr outV = getOutputValue();
  /* activation */ {
    REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str());
    MatrixPtr prevOut1 = getInputValue(0);
    MatrixPtr prevOut2 = getInputValue(1);

    CHECK(outV && prevOut1 && prevOut2);
    BufferArgs inputs;
    BufferArgs outputs;
    inputs.addArg(*prevOut1);
    inputs.addArg(*prevOut2);
    outputs.addArg(*outV, ASSIGN_TO);
    forward_[0]->calc(inputs, outputs);
  }
}
Beispiel #4
0
void MultiplexLayer::forward(PassType passType) {
  Layer::forward(passType);

  IVectorPtr copyIds = getInput(0).ids;
  MatrixPtr inV1 = getInputValue(1);
  CHECK_EQ(copyIds->getSize(), inV1->getHeight());
  for (size_t i = 2; i < inputLayers_.size(); i++) {
    CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight());
    CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth());
  }

  calculateCopySchedule(copyIds, inputLayers_.size() - 1);
  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(inV1->getHeight(), inV1->getWidth());
  }

  MatrixPtr outV = getOutputValue();
  {
    REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str());
    AsyncGpuBlock block;
    for (const CopyInfo& info : copySchedule_) {
      outV->subMatrix(info.startIdx, info.length, tmpDest_)
          ->copyFrom(*getInputValue(info.copyIdx + 1)
                          ->subMatrix(info.startIdx, info.length, tmpSrc_));
    }
  }

  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
void InterpolationLayer::forward(PassType passType) {
  Layer::forward(passType);

  MatrixPtr weightV = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);
  MatrixPtr inV2 = getInputValue(2);

  size_t batchSize = inV1->getHeight();
  size_t dataDim = inV1->getWidth();

  CHECK_EQ(dataDim, getSize());
  CHECK_EQ(dataDim, inV2->getWidth());
  CHECK_EQ(batchSize, inV1->getHeight());
  CHECK_EQ(batchSize, inV2->getHeight());

  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    resetOutput(batchSize, dataDim);
  }

  MatrixPtr outV = getOutputValue();

  Matrix::resizeOrCreate(weightLast_, batchSize, 1, false, useGpu_);
  weightLast_->one();
  weightLast_->sub(*weightV);

  REGISTER_TIMER_INFO("FwInterpTimer", getName().c_str());
  // outV = inV1 * weight + inV2 * weightLast
  outV->addRowScale(0, *inV1, *weightV);
  outV->addRowScale(0, *inV2, *weightLast_);
}
Beispiel #6
0
void CosSimVecMatLayer::forward(PassType passType) {
  Layer::forward(passType);
  CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed";

  MatrixPtr inV0 = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);

  size_t batchSize = inV0->getHeight();
  size_t numKeys = getSize();

  CHECK_EQ(batchSize, inV1->getHeight());

  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(batchSize, numKeys);
  }

  MatrixPtr outV = getOutputValue();
  CHECK(outV && inV0 && inV1);
  REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str());
  for (size_t i = 0; i < batchSize; i++) {
    tmpRow0->setData(inV0->rowBuf(i));
    tmpMtx0->setData(inV1->rowBuf(i));
    tmpRow2->setData(outV->rowBuf(i));

    BufferArgs inputs;
    BufferArgs outputs;
    inputs.addArg(*tmpMtx0);
    inputs.addArg(*tmpRow0);
    outputs.addArg(*tmpRow2, ASSIGN_TO);
    forward_[0]->calc(inputs, outputs);
  }
}
Beispiel #7
0
void PowerLayer::forward(PassType passType) {
  Layer::forward(passType);

  MatrixPtr inV0 = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);

  size_t batchSize = inV1->getHeight();
  size_t dataDim = inV1->getWidth();

  CHECK_EQ(getSize(), dataDim);
  CHECK_EQ(1U, inV0->getWidth());
  CHECK_EQ(batchSize, inV0->getHeight());

  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(batchSize, dataDim);
  }

  MatrixPtr outV = getOutputValue();

  {
    REGISTER_TIMER_INFO("FwPowerTimer", getName().c_str());
    outV->rowPow(0, *inV1, *inV0);
  }
}
void MKLPackedRecurrentLayer::backwardBatch(int batchSize,
                                            size_t numSequences,
                                            const int* starts) {
  if (!batchGrad_) {
    batchGrad_.reset(new SequenceToBatch(useGpu_));
  }
  batchGrad_->shareIndexWith(*batchValue_);

  size_t numBatch = batchGrad_->getNumBatch();
  bool backwardByBatch = numBatch < numSequences;

  batchGrad_->copyFromSeq(*output_.grad);
  {
    REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str());
    /* backward one batch */
    for (int n = (int)numBatch - 1; n >= 0; n--) {
      MatrixPtr batchGrad = batchGrad_->getBatchValue(n);
      MatrixPtr batchValue =
          batchValue_->getBatchValue(n, batchGrad->getHeight());

      Argument arg;
      arg.value = batchValue;
      arg.grad = batchGrad;
      activation_->backward(arg).check();

      if (n != 0) {
        batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight());
        packed_weightT_->gemm_compute(batchGrad, batchValue);
      }

      if (backwardByBatch && weight_->getWGrad()) {
        if (n != 0) {
          /* backward weight */
          batchValue =
              batchValue_->getBatchValue(n - 1, batchGrad->getHeight());
          weight_->getWGrad()->mul(
              *batchValue->getTranspose(), *batchGrad, 1, 1);
        }
      }
    }
  }

  batchGrad_->copyBackSeq(*output_.grad);

  if (!backwardByBatch && weight_->getWGrad()) {
    REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str());
    for (size_t seq = 0; seq < numSequences; ++seq) {
      int len = starts[seq + 1] - starts[seq];
      weight_->getWGrad()->mul(
          *output_.value
               ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1)
               ->getTranspose(),
          *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1,
                                   len - 1),
          1,
          1);
    }
  }
}
void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) {
  backwardActivation();
  MatrixPtr oGrad = getOutputGrad();
  if (!fullOutput_) {
    interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(),
                                               interOutput_->getRows(),
                                               interOutput_->getCols(),
                                               interOutput_->getHeight(),
                                               interOutput_->getWidth(),
                                               interOutput_->getElementCnt(),
                                               FLOAT_VALUE,
                                               SPARSE_CSR,
                                               /*trans=*/false,
                                               /*useGpu=*/useGpu_);
  } else {
    interOutGrad_ = Matrix::create(oGrad->getData(),
                                   oGrad->getHeight(),
                                   oGrad->getWidth(),
                                   /*trans=*/false,
                                   /*useGpu=*/useGpu_);
  }

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*interOutGrad_, 1);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  // backward is different from FullyConnectedLayer
  // because the weight is transposed
  for (size_t i = 0; i < inputNum_; i++) {
    AsyncGpuBlock block;
    MatrixPtr preGrad = getInputGrad(i);
    if (preGrad) {
      REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
      preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1);
    }

    MatrixPtr wGrad = weights_[i]->getWGrad();
    if (wGrad) {
      REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
      MatrixPtr input = getInputValue(i);
      wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1);
    }

    {
      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
Beispiel #10
0
void DeConv3DLayer::backward(const UpdateCallback &callback) {
  backwardActivation();
  int batchSize = getOutputGrad()->getHeight();
  if (biases_ && biases_->getWGrad()) {
    bpropBiases();
    biases_->getParameterPtr()->incUpdate(callback);
  }
  REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str());
  for (size_t i = 0; i < inputLayers_.size(); ++i) {
    if (weights_[i]->getWGrad() || this->needGradient_) {
      int M = M_[i];
      int N = N_[i];
      int K = K_[i];
      Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
      const MatrixPtr &inMat = getInputValue(i);
      for (int n = 0; n < batchSize; ++n) {
        colBuf_->vol2Col(
            getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
            numFilters_,
            imgSizeD_[i],
            imgSizeH_[i],
            imgSizeW_[i],
            filterSizeZ_[i],
            filterSizeY_[i],
            filterSize_[i],
            strideZ_[i],
            strideY_[i],
            stride_[i],
            paddingZ_[i],
            paddingY_[i],
            padding_[i]);
        if (weights_[i]->getWGrad()) {
          real *inData = inMat->getData() + n * inMat->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
            MatrixPtr wGradMatSub =
                weights_[i]->getWGrad()->subMatrix(g * K, K);
            MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
            wGradMatSub->mul(
                *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0);
            inData += M * N;
          }
        }
        if (getInputGrad(i)) {
          real *preGrad =
              getInputGrad(i)->getData() + n * getInputGrad(i)->getStride();
          for (int g = 0; g < groups_[i]; ++g) {
            MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K);
            MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K);
            MatrixPtr inGradMatSub =
                Matrix::create(preGrad, M, N, false, useGpu_);
            inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0);
            preGrad += M * N;
          }
        }
      }
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
Beispiel #11
0
void InterpolationLayer::backward(const UpdateCallback& callback) {
  MatrixPtr outG = getOutputGrad();
  MatrixPtr weightV = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);
  MatrixPtr inV2 = getInputValue(2);
  MatrixPtr inG0 = getInputGrad(0);
  MatrixPtr inG1 = getInputGrad(1);
  MatrixPtr inG2 = getInputGrad(2);

  size_t batchSize = inV1->getHeight();
  size_t dataDim = inV1->getWidth();

  REGISTER_TIMER_INFO("BwInterpTimer", getName().c_str());

  if (inG0) {
    Matrix::resizeOrCreate(tmpMatrix, batchSize, dataDim, false, useGpu_);

    // inG0 += outG .* (inV1 - inV2)
    tmpMatrix->sub(*inV1, *inV2);
    inG0->rowDotMul(0, *outG, *tmpMatrix);
  }

  if (inG1) {
    // inG1 += outG * weight
    inG1->addRowScale(0, *outG, *weightV);
  }

  if (inG2) {
    // inG2 += outG * weightLast
    inG2->addRowScale(0, *outG, *weightLast_);
  }
}
Beispiel #12
0
void PowerLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inV0 = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);
  MatrixPtr inG0 = getInputGrad(0);
  MatrixPtr inG1 = getInputGrad(1);
  MatrixPtr outV = getOutputValue();
  MatrixPtr outG = getOutputGrad();

  size_t batchSize = inV1->getHeight();
  size_t dataDim = inV1->getWidth();

  {
    REGISTER_TIMER_INFO("BwPowerTimer", getName().c_str());
    Matrix::resizeOrCreate(tmpMtx, batchSize, dataDim, false, useGpu_);

    if (inG0) {
      tmpMtx->log2(*inV1);
      tmpMtx->dotMul(*tmpMtx, *outV);

      // inG0 += outG .* (log(inV1) * outV)
      inG0->rowDotMul(0, *outG, *tmpMtx);
    }

    if (inG1) {
      // tmp = (outV / inV1) * inV0
      tmpMtx->dotDiv(*outV, *inV1);
      tmpMtx->rowScale(0, *tmpMtx, *inV0);

      inG1->addDotMul(*outG, *tmpMtx, 1, 1);
    }
  }
}
void MKLPackedRecurrentLayer::forwardBatch(int batchSize,
                                           size_t numSequences,
                                           const int* starts) {
  if (!batchValue_) {
    batchValue_.reset(new SequenceToBatch(useGpu_));
  }

  batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_);

  batchValue_->copyFromSeq(*output_.value);

  {
    REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str());
    /* forward one batch */
    for (size_t n = 0; n < batchValue_->getNumBatch(); n++) {
      MatrixPtr batchValue = batchValue_->getBatchValue(n);

      if (n != 0) {
        MatrixPtr preBatchValue =
            batchValue_->getBatchValue(n - 1, batchValue->getHeight());

        packed_weight_->gemm_compute(preBatchValue, batchValue);
      }
      Argument arg;
      arg.value = batchValue;
      activation_->forward(arg).check();
    }
  }
  batchValue_->copyBackSeq(*output_.value);
}
Beispiel #14
0
void GatedRecurrentLayer::backward(const UpdateCallback& callback) {
  REGISTER_TIMER_INFO("GruBwTimer", getName().c_str());
  const Argument& input = getInput(0);
  CHECK(input.sequenceStartPositions);
  int batchSize = input.getBatchSize();
  const int* starts = input.sequenceStartPositions->getData(false);
  size_t numSequences = input.getNumSequences();

  Matrix::resizeOrCreate(gate_.grad, /* height= */batchSize,
                         getSize() * 3, /* trans= */false, useGpu_);
  Matrix::resizeOrCreate(resetOutput_.grad, /* height= */batchSize,
                         getSize(), /* trans= */false, useGpu_);

  if (useBatch_) {
    backwardBatch(batchSize, input.grad);
  } else {
    backwardSequence(batchSize, numSequences, starts, input.grad);
  }

  if (bias_) {
    bias_->getParameterPtr()->incUpdate(callback);
  }

  weight_->getParameterPtr()->incUpdate(callback);
}
Beispiel #15
0
void CudnnConvLayer::forward(PassType passType) {
  Layer::forward(passType);

  int batchSize = getInput(0).getBatchSize();
  resetOutput(batchSize, calOutputSize());

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    projections_[i]->forward(&getInput(i), &getOutput(), passType);
  }

  if (biases_) {
    REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str());
    int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
    hl_tensor_reshape(outputDesc_, batchSize, numFilters_ / groups_[0],
        outputH_[0], outputW_[0], numFilters_ * outputH_[0] * outputW_[0],
        outputH_[0] * outputW_[0], outputW_[0], 1);
    outputOffset_ = getOutputValue()->getWidth() / groups_[0];
    for (int g = 0; g < groups_[0]; ++g) {
      real *biasData = biases_->getW()->getData() + biasOffset_ * g;
      real *outData = getOutputValue()->getData() + outputOffset_ * g;
      hl_convolution_forward_add_bias(biasDesc_, biasData,
                                      outputDesc_, outData);
    }
  }

  forwardActivation();
}
Beispiel #16
0
void CosSimVecMatLayer::backward(const UpdateCallback& callback) {
  CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed";

  MatrixPtr inV0 = getInputValue(0);
  MatrixPtr inV1 = getInputValue(1);
  MatrixPtr inG0 = getInputGrad(0);
  MatrixPtr inG1 = getInputGrad(1);
  MatrixPtr outV = getOutputValue();
  MatrixPtr outG = getOutputGrad();

  size_t batchSize = inV0->getHeight();
  CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG);
  REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str());

  for (size_t i = 0; i < batchSize; i++) {
    tmpRow0->setData(inV0->rowBuf(i));
    tmpRow1->setData(inG0->rowBuf(i));
    tmpMtx0->setData(inV1->rowBuf(i));
    tmpMtx1->setData(inG1->rowBuf(i));
    tmpRow2->setData(outV->rowBuf(i));
    tmpRow3->setData(outG->rowBuf(i));

    BufferArgs inputs;
    BufferArgs outputs;
    inputs.addArg(*tmpRow3);
    inputs.addArg(*tmpRow2);
    inputs.addArg(*tmpMtx0);
    inputs.addArg(*tmpRow0);
    outputs.addArg(*tmpMtx1, ADD_TO);
    outputs.addArg(*tmpRow1, ADD_TO);

    backward_[0]->calc(inputs, outputs);
  }
}
Beispiel #17
0
void GatedRecurrentLayer::forward(PassType passType) {
  REGISTER_TIMER_INFO("GruFwTimer", getName().c_str());
  Layer::forward(passType);

  const Argument& input = getInput(0);
  CHECK(input.sequenceStartPositions);
  int batchSize = input.getBatchSize();
  size_t numSequences = input.getNumSequences();
  resetOutput(batchSize, getSize());
  CHECK_EQ(getSize() * 3, input.value->getWidth());
  const int* starts = input.sequenceStartPositions->getData(false);
  // batchSize = length of total frames in a batch (NOT size of mini-batch)
  CHECK_EQ(starts[numSequences], batchSize);

  Matrix::resizeOrCreate(gate_.value, /* height= */batchSize,
                         getSize() * 3, /* trans= */false, useGpu_);
  Matrix::resizeOrCreate(resetOutput_.value, /* height= */batchSize,
                         getSize(), /* trans= */false, useGpu_);

  if (useBatch_) {
    forwardBatch(batchSize, numSequences, starts, input.value);
  } else {
    forwardSequence(batchSize, numSequences, starts, input.value);
  }
}
void ScaleSubRegionLayer::forward(PassType passType) {
  Layer::forward(passType);
  auto in0 = getInput(0);
  imgH_ = in0.getFrameHeight();
  imgW_ = in0.getFrameWidth();
  if (imgH_ == 0 || imgW_ == 0) {
    auto& conf = config_.inputs(0).scale_sub_region_conf();
    imgH_ = conf.image_conf().img_size_y();
    imgW_ = conf.image_conf().img_size();
  }
  MatrixPtr imgV = in0.value;
  size_t batchSize = imgV->getHeight();
  size_t spatialSize = imgH_ * imgW_;
  channelsNum_ = imgV->getWidth() / spatialSize;
  shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_});

  resetOutput(batchSize, imgV->getWidth());
  auto& out = getOutput();
  out.setFrameHeight(imgH_);
  out.setFrameWidth(imgW_);

  MatrixPtr indicesV = getInputValue(1);
  indicesShape_ = TensorShape({batchSize, 6});

  REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str());
  BufferArgs inArgs;
  BufferArgs outArgs;
  inArgs.addArg(*imgV, shape_);
  inArgs.addArg(*indicesV, indicesShape_);
  outArgs.addArg(*out.value, shape_, ASSIGN_TO);
  forward_[0]->calc(inArgs, outArgs);
}
Beispiel #19
0
void ConvProjection::forward() {
  int batchSize = in_->value->getHeight();
  reshape(batchSize);

  void *workSpace = NULL;
  if (workSpaceInBytes_ > 0) {
    workSpace = getSpaceBytes(workSpaceInBytes_);
  }

  for (int g = 0; g < groups_; ++g) {
    REGISTER_TIMER_INFO("CudnnConvFwTimer", getName().c_str());

    real *inputData = in_->value->getData() + g * inputOffset_;
    real *wgtData = weight_->getW()->getData() + g * weightOffset_;
    real *outData = out_->value->getData() + g * outputOffset_;
    hl_convolution_forward(imageDesc_,
                           inputData,
                           outputDesc_,
                           outData,
                           filterDesc_,
                           wgtData,
                           convDesc_,
                           workSpace,
                           fwdLimitBytes_,
                           fwdAlgo_);
  }
}
  void backward(const UpdateCallback& callback) override {
    REGISTER_TIMER_INFO("RecurrentGroupBwTime", getName().c_str());
    network_->backward(nullptr);

    for (auto& para : parameters_) {
      para->incUpdate(callback);
    }
  }
void ScaleSubRegionLayer::backward(const UpdateCallback& callback) {
  REGISTER_TIMER_INFO("ScaleSubRegionBackward", getName().c_str());
  BufferArgs inArgs;
  BufferArgs outArgs;
  inArgs.addArg(*getOutputGrad(), shape_);
  inArgs.addArg(*getInputValue(1), indicesShape_);
  outArgs.addArg(*getInputGrad(0), shape_, ADD_TO);
  backward_[0]->calc(inArgs, outArgs);
}
void SlopeInterceptLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inG = getInputGrad(0);
  MatrixPtr outG = getOutputGrad();

  if (inG) {
    REGISTER_TIMER_INFO("BwSlopeInterceptTimer", getName().c_str());
    inG->add(*outG, config_.slope());
  }
}
Beispiel #23
0
void ConcatenateLayer2::backward(const UpdateCallback& callback) {
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  AsyncGpuBlock block;
  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->backward(callback);
    }
  }
}
Beispiel #24
0
void MultiplexLayer::backward(const UpdateCallback& callback) {
  /* Do derivation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  MatrixPtr outG = getOutputGrad();

  {
    REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str());
    AsyncGpuBlock block;
    for (const CopyInfo& info : copySchedule_) {
      if (getInputGrad(info.copyIdx + 1)) {
        getInputGrad(info.copyIdx + 1)
            ->subMatrix(info.startIdx, info.length, tmpDest_)
            ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_));
      }
    }
  }
}
Beispiel #25
0
void GatedRecurrentLayer::forwardBatch(int batchSize,
                                       size_t numSequences,
                                       const int* starts,
                                       MatrixPtr inputValue) {
  REGISTER_TIMER_INFO("GruFwBatchTime", getName().c_str());
  hl_gru_value gruValue;
  gruValue.gateWeight = (gateWeight_->getW())->getData();
  gruValue.stateWeight = (stateWeight_->getW())->getData();

  if (!batchValue_) {
    batchValue_.reset(new SequenceToBatch(useGpu_));
  }
  batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts,
                                   reversed_);

  batchValue_->resizeOrCreate(*output_.value);
  batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */true);
  if (bias_ && bias_->getWGrad()) {
    gate_.value->addBias(*(bias_->getW()), 1);
  }

  {
    int numBatch = batchValue_->getNumBatch();
    int batchSize = 0;
    AsyncGpuBlock asyncGpuBlock;
    for (int n = 0; n < numBatch; n++) {
      MatrixPtr outputValueTmp = batchValue_->getBatchValue(n);
      gruValue.outputValue = outputValueTmp->getData();
      gruValue.gateValue =
        (batchValue_->getBatchValue(*gate_.value, n))->getData();
      gruValue.resetOutputValue =
        (batchValue_->getBatchValue(*resetOutput_.value, n))->getData();

      batchSize = outputValueTmp->getHeight();
      gruValue.prevOutValue =
        (n == 0 ? nullptr
                : (batchValue_->getBatchValue(n - 1, batchSize))->getData());

      {
        if (useGpu_) {
          GruCompute::forward<1>(gruValue, getSize(), batchSize);
        } else {
          GruCompute::forward<0>(gruValue, getSize(), batchSize);
        }
      }
    }
  }
  {
    batchValue_->copyBackSeq(*output_.value);
  }
}
Beispiel #26
0
void MixedLayer::forward(PassType passType) {
  Layer::forward(passType);

  int batchSize = getInput(0).getBatchSize();
  int size = getSize();
  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    resetOutput(batchSize, size);
  }

  MatrixPtr outV = getOutputValue();

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->forward(&getInput(i), &output_, passType);
    }
  }

  std::vector<const Argument*> ins;
  for (auto& op : operators_) {
    ins.clear();
    for (auto& input_index : op->getConfig().input_indices()) {
      ins.push_back(&getInput(input_index));
    }
    op->forward(ins, &output_, passType);
  }

  /* add the bias-vector */
  if (biases_.get() != NULL) {
    REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str());
    outV->addBias(*(biases_->getW()), 1, sharedBias_);
  }

  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
void SlopeInterceptLayer::forward(PassType passType) {
  Layer::forward(passType);

  MatrixPtr inV = getInputValue(0);

  /* malloc memory for the output_ if necessary */
  size_t batchSize = inV->getHeight();
  size_t size = getSize();

  CHECK_EQ(size, inV->getWidth());

  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(batchSize, size);
  }

  MatrixPtr outV = getOutputValue();
  {
    REGISTER_TIMER_INFO("FwSlopeInterceptTimer", getName().c_str());
    outV->mulScalar(*inV, config_.slope());
    outV->add(config_.intercept());
  }
}
void SequenceConcatLayer::backward(const UpdateCallback& callback) {
  /* activation */
  backwardActivation();

  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);

    // Increasing the number of gradient
    biases_->getParameterPtr()->incUpdate(callback);
  }

  MatrixPtr inputGrad1 = getInputGrad(0);
  MatrixPtr inputGrad2 = getInputGrad(1);
  MatrixPtr outputGrad = getOutputGrad();
  auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false);
  auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false);

  size_t numSequences1 = startPositions1->getSize() - 1;
  size_t numSequences2 = startPositions2->getSize() - 1;

  CHECK_EQ(numSequences1, numSequences2);

  const int* starts1 = startPositions1->getData();
  const int* starts2 = startPositions2->getData();

  {
    AsyncGpuBlock asyncGpuBlock;
    REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str());

    size_t offset = 0;
    size_t leftNumIns = 0;
    size_t rightNumIns = 0;
    for (size_t seqId = 0; seqId < numSequences1; ++seqId) {
      leftNumIns = starts1[seqId + 1] - starts1[seqId];
      if (inputGrad1) {
        inputGrad1->subMatrix(starts1[seqId], leftNumIns)
            ->add(*(outputGrad->subMatrix(offset, leftNumIns)));
      }
      offset += leftNumIns;

      rightNumIns = starts2[seqId + 1] - starts2[seqId];
      if (inputGrad2) {
        inputGrad2->subMatrix(starts2[seqId], rightNumIns)
            ->add(*(outputGrad->subMatrix(offset, rightNumIns)));
      }
      offset += rightNumIns;
    }
  }
}
Beispiel #29
0
void MixedLayer::backward(const UpdateCallback& callback) {
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_);

    /* Increasing the number of gradient */
    biases_->getParameterPtr()->incUpdate(callback);
  }

  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    if (projections_[i]) {
      projections_[i]->backward(callback);
    }
  }

  for (auto& op : operators_) {
    op->backward();
  }
}
Beispiel #30
0
void DeConv3DLayer::forward(PassType passType) {
  Layer::forward(passType);
  int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
  int outWidth = getSize();
  resetOutput(batchSize, outWidth);
  const MatrixPtr outMat = getOutputValue();

  REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str());
  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    const MatrixPtr &inMat = getInputValue(i);
    int M = M_[i];
    int N = N_[i];
    int K = K_[i];
    MatrixPtr wMat = weights_[i]->getW();
    Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_);
    for (int n = 0; n < batchSize; ++n) {
      real *inData = inMat->getData() + n * inMat->getStride();
      for (int g = 0; g < groups_[i]; ++g) {
        MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_);
        MatrixPtr wMatSub = wMat->subMatrix(g * K, K);
        MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K);
        colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0);
        inData += M * N;
      }
      colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(),
                       numFilters_,
                       imgSizeD_[i],
                       imgSizeH_[i],
                       imgSizeW_[i],
                       filterSizeZ_[i],
                       filterSizeY_[i],
                       filterSize_[i],
                       strideZ_[i],
                       strideY_[i],
                       stride_[i],
                       paddingZ_[i],
                       paddingY_[i],
                       padding_[i],
                       1.0,
                       1.0);
    }
  }
  if (nullptr != this->biasParameter_) {
    this->addBias();
  }
  forwardActivation();
}