예제 #1
0
void ExpandLayer::backward(const UpdateCallback& callback) {
  if (biases_ && biases_->getWGrad()) {
    biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
    /* Increasing the number of gradient */
    biases_->getParameterPtr()->incUpdate(callback);
  }

  if (!getInputGrad(0)) return;
  MatrixPtr inputGrad = getInputGrad(0);
  MatrixPtr outputGrad = getOutputGrad();
  auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
                              : getInput(1).sequenceStartPositions;
  size_t numSequences = cpuSeqStartPos->getSize() - 1;
  const int* starts = cpuSeqStartPos->getData(false);

  CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth());
  CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]);

  AsyncGpuBlock asyncGpuBlock;

  // sum to get the grad
  real scale = 1;
  for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) {
    // TODO(Dangqingqing) optimization for GPU
    int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
    if (sequenceLength == 0) {
      // empty sequence
      continue;
    }
    MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1);
    copyData->collectBias(
        *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale);
  }
}
예제 #2
0
void ScaleSubRegionLayer::forward(PassType passType) {
  Layer::forward(passType);
  auto in0 = getInput(0);
  imgH_ = in0.getFrameHeight();
  imgW_ = in0.getFrameWidth();
  if (imgH_ == 0 || imgW_ == 0) {
    auto& conf = config_.inputs(0).scale_sub_region_conf();
    imgH_ = conf.image_conf().img_size_y();
    imgW_ = conf.image_conf().img_size();
  }
  MatrixPtr imgV = in0.value;
  size_t batchSize = imgV->getHeight();
  size_t spatialSize = imgH_ * imgW_;
  channelsNum_ = imgV->getWidth() / spatialSize;
  shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_});

  resetOutput(batchSize, imgV->getWidth());
  auto& out = getOutput();
  out.setFrameHeight(imgH_);
  out.setFrameWidth(imgW_);

  MatrixPtr indicesV = getInputValue(1);
  indicesShape_ = TensorShape({batchSize, 6});

  REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str());
  BufferArgs inArgs;
  BufferArgs outArgs;
  inArgs.addArg(*imgV, shape_);
  inArgs.addArg(*indicesV, indicesShape_);
  outArgs.addArg(*out.value, shape_, ASSIGN_TO);
  forward_[0]->calc(inArgs, outArgs);
}
예제 #3
0
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) {
  EXPECT_EQ(a->getWidth(), b->getWidth());
  EXPECT_EQ(a->getHeight(), b->getHeight());
  EXPECT_EQ(a->isTransposed(), b->isTransposed());
  for (size_t r = 0; r < a->getHeight(); ++r) {
    for (size_t c = 0; c < a->getWidth(); ++c) {
      EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c));
    }
  }
}
void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) {
  backwardActivation();
  MatrixPtr oGrad = getOutputGrad();
  if (!fullOutput_) {
    interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(),
                                               interOutput_->getRows(),
                                               interOutput_->getCols(),
                                               interOutput_->getHeight(),
                                               interOutput_->getWidth(),
                                               interOutput_->getElementCnt(),
                                               FLOAT_VALUE,
                                               SPARSE_CSR,
                                               /*trans=*/false,
                                               /*useGpu=*/useGpu_);
  } else {
    interOutGrad_ = Matrix::create(oGrad->getData(),
                                   oGrad->getHeight(),
                                   oGrad->getWidth(),
                                   /*trans=*/false,
                                   /*useGpu=*/useGpu_);
  }

  if (biases_ && biases_->getWGrad()) {
    REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str());
    biases_->getWGrad()->collectBias(*interOutGrad_, 1);
    biases_->getParameterPtr()->incUpdate(callback);
  }

  // backward is different from FullyConnectedLayer
  // because the weight is transposed
  for (size_t i = 0; i < inputNum_; i++) {
    AsyncGpuBlock block;
    MatrixPtr preGrad = getInputGrad(i);
    if (preGrad) {
      REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
      preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1);
    }

    MatrixPtr wGrad = weights_[i]->getWGrad();
    if (wGrad) {
      REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
      MatrixPtr input = getInputValue(i);
      wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1);
    }

    {
      REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
}
예제 #5
0
Error __must_check MKLDNNSoftmaxActivation::backward(Argument& act) {
  MatrixPtr outputV = act.value;
  MatrixPtr outputG = act.grad;
  Matrix::resizeOrCreate(sftMaxDot_,
                         outputG->getHeight(),
                         outputG->getWidth(),
                         /* trans */ false,
                         /* useGpu */ false);
  Matrix::resizeOrCreate(sftMaxSum_,
                         outputG->getHeight(),
                         1,
                         /* trans */ false,
                         /* useGpu */ false);
  sftMaxDot_->dotMul(*outputG, *outputV);
  sftMaxSum_->colMerge(*sftMaxDot_);
  act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
  return Error();
}
예제 #6
0
TEST(Arguments, Matrix) {
  MatrixPtr matrix = Matrix::create(100, 200);
  CheckBufferArg check = [=](const BufferArg& arg) {
    EXPECT_EQ(arg.shape().ndims(), 2U);
    EXPECT_EQ(arg.shape()[0], 100U);
    EXPECT_EQ(arg.shape()[1], 200U);
    EXPECT_EQ(arg.data(), matrix->getData());

    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
    EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
  };

  BufferArgs argments;
  argments.addArg(*matrix);
  std::vector<CheckBufferArg> checkFunc;
  checkFunc.push_back(check);
  testBufferArgs(argments, checkFunc);
}
예제 #7
0
void FeatureMapExpandLayer::forward(PassType passType) {
  Layer::forward(passType);
  MatrixPtr inputV = getInputValue(0);
  size_t batchSize = getInput(0).getBatchSize();
  int imgSize = inputV->getWidth();
  resetOutput(batchSize, imgSize * numFilters_);

  MatrixPtr outputV = getOutputValue();

  {
    AsyncGpuBlock asyncGpuBlock;
    if (asRowVector_) {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outVTmp =
            Matrix::create(outputV->getData() + i * imgSize * numFilters_,
                           numFilters_,
                           imgSize,
                           false,
                           useGpu_);
        MatrixPtr inVTmp = Matrix::create(
            inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
        outVTmp->addRowVector(*inVTmp);
      }
    } else {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outVTmp =
            Matrix::create(outputV->getData() + i * imgSize * numFilters_,
                           imgSize,
                           numFilters_,
                           false,
                           useGpu_);
        MatrixPtr inVTmp = Matrix::create(
            inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_);
        outVTmp->addColVector(*inVTmp);
      }
    }
  }
  /* activation */ {
    REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
    forwardActivation();
  }
}
예제 #8
0
void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inGrad = getInputGrad(0);
  if (NULL == inGrad) {
    return;
  }
  MatrixPtr outGrad = getOutputGrad();
  size_t batchSize = getInput(0).getBatchSize();
  int imgSize = inGrad->getWidth();
  /* Do activation */ {
    REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
    backwardActivation();
  }
  {
    AsyncGpuBlock asyncGpuBlock;
    if (asRowVector_) {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outGradTmp =
            Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
                           numFilters_,
                           imgSize,
                           false,
                           useGpu_);
        MatrixPtr inGradTmp = Matrix::create(
            inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
        inGradTmp->collectBias(*outGradTmp, 1);
      }
    } else {
      for (size_t i = 0; i < batchSize; i++) {
        MatrixPtr outGradTmp =
            Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
                           imgSize,
                           numFilters_,
                           false,
                           useGpu_);
        MatrixPtr inGradTmp = Matrix::create(
            inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
        inGradTmp->sumRows(*outGradTmp, 1, 1);
      }
    }
  }
}
예제 #9
0
void SlopeInterceptLayer::forward(PassType passType) {
  Layer::forward(passType);

  MatrixPtr inV = getInputValue(0);

  /* malloc memory for the output_ if necessary */
  size_t batchSize = inV->getHeight();
  size_t size = getSize();

  CHECK_EQ(size, inV->getWidth());

  {
    REGISTER_TIMER_INFO("FwResetTimer", getName().c_str());
    reserveOutput(batchSize, size);
  }

  MatrixPtr outV = getOutputValue();
  {
    REGISTER_TIMER_INFO("FwSlopeInterceptTimer", getName().c_str());
    outV->mulScalar(*inV, config_.slope());
    outV->add(config_.intercept());
  }
}
예제 #10
0
  virtual real evalImp(std::vector<Argument>& arguments) {
    overlapThreshold_ = config_.overlap_threshold();
    backgroundId_ = config_.background_id();
    evaluateDifficult_ = config_.evaluate_difficult();
    apType_ = config_.ap_type();

    MatrixPtr detectTmpValue = arguments[0].value;
    Matrix::resizeOrCreate(cpuOutput_,
                           detectTmpValue->getHeight(),
                           detectTmpValue->getWidth(),
                           false,
                           false);

    MatrixPtr labelTmpValue = arguments[1].value;
    Matrix::resizeOrCreate(cpuLabel_,
                           labelTmpValue->getHeight(),
                           labelTmpValue->getWidth(),
                           false,
                           false);

    cpuOutput_->copyFrom(*detectTmpValue);
    cpuLabel_->copyFrom(*labelTmpValue);

    Argument label = arguments[1];
    const int* labelIndex = label.sequenceStartPositions->getData(false);
    size_t batchSize = label.getNumSequences();

    vector<map<size_t, vector<NormalizedBBox>>> allGTBBoxes;
    vector<map<size_t, vector<pair<real, NormalizedBBox>>>> allDetectBBoxes;

    for (size_t n = 0; n < batchSize; ++n) {
      map<size_t, vector<NormalizedBBox>> bboxes;
      for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) {
        vector<NormalizedBBox> bbox;
        getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox);
        int c = cpuLabel_->getData()[i * 6];
        bboxes[c].push_back(bbox[0]);
      }
      allGTBBoxes.push_back(bboxes);
    }

    size_t n = 0;
    const real* cpuOutputData = cpuOutput_->getData();
    for (size_t imgId = 0; imgId < batchSize; ++imgId) {
      map<size_t, vector<pair<real, NormalizedBBox>>> bboxes;
      size_t curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]);
      while (curImgId == imgId && n < cpuOutput_->getHeight()) {
        vector<real> label;
        vector<real> score;
        vector<NormalizedBBox> bbox;
        getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox);
        bboxes[label[0]].push_back(make_pair(score[0], bbox[0]));
        ++n;
        curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]);
      }
      allDetectBBoxes.push_back(bboxes);
    }

    for (size_t n = 0; n < batchSize; ++n) {
      for (map<size_t, vector<NormalizedBBox>>::iterator it =
               allGTBBoxes[n].begin();
           it != allGTBBoxes[n].end();
           ++it) {
        size_t count = 0;
        if (evaluateDifficult_) {
          count = it->second.size();
        } else {
          for (size_t i = 0; i < it->second.size(); ++i)
            if (!(it->second[i].isDifficult)) ++count;
        }
        if (numPos_.find(it->first) == numPos_.end() && count != 0) {
          numPos_[it->first] = count;
        } else {
          numPos_[it->first] += count;
        }
      }
    }

    // calcTFPos
    calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes);

    return 0;
  }