void ExpandLayer::backward(const UpdateCallback& callback) { if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } if (!getInputGrad(0)) return; MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions : getInput(1).sequenceStartPositions; size_t numSequences = cpuSeqStartPos->getSize() - 1; const int* starts = cpuSeqStartPos->getData(false); CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth()); CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]); AsyncGpuBlock asyncGpuBlock; // sum to get the grad real scale = 1; for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) { // TODO(Dangqingqing) optimization for GPU int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; if (sequenceLength == 0) { // empty sequence continue; } MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1); copyData->collectBias( *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale); } }
void ScaleSubRegionLayer::forward(PassType passType) { Layer::forward(passType); auto in0 = getInput(0); imgH_ = in0.getFrameHeight(); imgW_ = in0.getFrameWidth(); if (imgH_ == 0 || imgW_ == 0) { auto& conf = config_.inputs(0).scale_sub_region_conf(); imgH_ = conf.image_conf().img_size_y(); imgW_ = conf.image_conf().img_size(); } MatrixPtr imgV = in0.value; size_t batchSize = imgV->getHeight(); size_t spatialSize = imgH_ * imgW_; channelsNum_ = imgV->getWidth() / spatialSize; shape_ = TensorShape({batchSize, channelsNum_, imgH_, imgW_}); resetOutput(batchSize, imgV->getWidth()); auto& out = getOutput(); out.setFrameHeight(imgH_); out.setFrameWidth(imgW_); MatrixPtr indicesV = getInputValue(1); indicesShape_ = TensorShape({batchSize, 6}); REGISTER_TIMER_INFO("ScaleSubRegionForward", getName().c_str()); BufferArgs inArgs; BufferArgs outArgs; inArgs.addArg(*imgV, shape_); inArgs.addArg(*indicesV, indicesShape_); outArgs.addArg(*out.value, shape_, ASSIGN_TO); forward_[0]->calc(inArgs, outArgs); }
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) { EXPECT_EQ(a->getWidth(), b->getWidth()); EXPECT_EQ(a->getHeight(), b->getHeight()); EXPECT_EQ(a->isTransposed(), b->isTransposed()); for (size_t r = 0; r < a->getHeight(); ++r) { for (size_t c = 0; c < a->getWidth(); ++c) { EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c)); } } }
void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) { backwardActivation(); MatrixPtr oGrad = getOutputGrad(); if (!fullOutput_) { interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(), interOutput_->getRows(), interOutput_->getCols(), interOutput_->getHeight(), interOutput_->getWidth(), interOutput_->getElementCnt(), FLOAT_VALUE, SPARSE_CSR, /*trans=*/false, /*useGpu=*/useGpu_); } else { interOutGrad_ = Matrix::create(oGrad->getData(), oGrad->getHeight(), oGrad->getWidth(), /*trans=*/false, /*useGpu=*/useGpu_); } if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); biases_->getWGrad()->collectBias(*interOutGrad_, 1); biases_->getParameterPtr()->incUpdate(callback); } // backward is different from FullyConnectedLayer // because the weight is transposed for (size_t i = 0; i < inputNum_; i++) { AsyncGpuBlock block; MatrixPtr preGrad = getInputGrad(i); if (preGrad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1); } MatrixPtr wGrad = weights_[i]->getWGrad(); if (wGrad) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); MatrixPtr input = getInputValue(i); wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1); } { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } }
Error __must_check MKLDNNSoftmaxActivation::backward(Argument& act) { MatrixPtr outputV = act.value; MatrixPtr outputG = act.grad; Matrix::resizeOrCreate(sftMaxDot_, outputG->getHeight(), outputG->getWidth(), /* trans */ false, /* useGpu */ false); Matrix::resizeOrCreate(sftMaxSum_, outputG->getHeight(), 1, /* trans */ false, /* useGpu */ false); sftMaxDot_->dotMul(*outputG, *outputV); sftMaxSum_->colMerge(*sftMaxDot_); act.grad->softmaxDerivative(*act.value, *sftMaxSum_); return Error(); }
TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); CheckBufferArg check = [=](const BufferArg& arg) { EXPECT_EQ(arg.shape().ndims(), 2U); EXPECT_EQ(arg.shape()[0], 100U); EXPECT_EQ(arg.shape()[1], 200U); EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData()); }; BufferArgs argments; argments.addArg(*matrix); std::vector<CheckBufferArg> checkFunc; checkFunc.push_back(check); testBufferArgs(argments, checkFunc); }
void FeatureMapExpandLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inputV = getInputValue(0); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inputV->getWidth(); resetOutput(batchSize, imgSize * numFilters_); MatrixPtr outputV = getOutputValue(); { AsyncGpuBlock asyncGpuBlock; if (asRowVector_) { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outVTmp = Matrix::create(outputV->getData() + i * imgSize * numFilters_, numFilters_, imgSize, false, useGpu_); MatrixPtr inVTmp = Matrix::create( inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_); outVTmp->addRowVector(*inVTmp); } } else { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outVTmp = Matrix::create(outputV->getData() + i * imgSize * numFilters_, imgSize, numFilters_, false, useGpu_); MatrixPtr inVTmp = Matrix::create( inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_); outVTmp->addColVector(*inVTmp); } } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { MatrixPtr inGrad = getInputGrad(0); if (NULL == inGrad) { return; } MatrixPtr outGrad = getOutputGrad(); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inGrad->getWidth(); /* Do activation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } { AsyncGpuBlock asyncGpuBlock; if (asRowVector_) { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outGradTmp = Matrix::create(outGrad->getData() + i * imgSize * numFilters_, numFilters_, imgSize, false, useGpu_); MatrixPtr inGradTmp = Matrix::create( inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_); inGradTmp->collectBias(*outGradTmp, 1); } } else { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outGradTmp = Matrix::create(outGrad->getData() + i * imgSize * numFilters_, imgSize, numFilters_, false, useGpu_); MatrixPtr inGradTmp = Matrix::create( inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_); inGradTmp->sumRows(*outGradTmp, 1, 1); } } } }
void SlopeInterceptLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV = getInputValue(0); /* malloc memory for the output_ if necessary */ size_t batchSize = inV->getHeight(); size_t size = getSize(); CHECK_EQ(size, inV->getWidth()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, size); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwSlopeInterceptTimer", getName().c_str()); outV->mulScalar(*inV, config_.slope()); outV->add(config_.intercept()); } }
virtual real evalImp(std::vector<Argument>& arguments) { overlapThreshold_ = config_.overlap_threshold(); backgroundId_ = config_.background_id(); evaluateDifficult_ = config_.evaluate_difficult(); apType_ = config_.ap_type(); MatrixPtr detectTmpValue = arguments[0].value; Matrix::resizeOrCreate(cpuOutput_, detectTmpValue->getHeight(), detectTmpValue->getWidth(), false, false); MatrixPtr labelTmpValue = arguments[1].value; Matrix::resizeOrCreate(cpuLabel_, labelTmpValue->getHeight(), labelTmpValue->getWidth(), false, false); cpuOutput_->copyFrom(*detectTmpValue); cpuLabel_->copyFrom(*labelTmpValue); Argument label = arguments[1]; const int* labelIndex = label.sequenceStartPositions->getData(false); size_t batchSize = label.getNumSequences(); vector<map<size_t, vector<NormalizedBBox>>> allGTBBoxes; vector<map<size_t, vector<pair<real, NormalizedBBox>>>> allDetectBBoxes; for (size_t n = 0; n < batchSize; ++n) { map<size_t, vector<NormalizedBBox>> bboxes; for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) { vector<NormalizedBBox> bbox; getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox); int c = cpuLabel_->getData()[i * 6]; bboxes[c].push_back(bbox[0]); } allGTBBoxes.push_back(bboxes); } size_t n = 0; const real* cpuOutputData = cpuOutput_->getData(); for (size_t imgId = 0; imgId < batchSize; ++imgId) { map<size_t, vector<pair<real, NormalizedBBox>>> bboxes; size_t curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]); while (curImgId == imgId && n < cpuOutput_->getHeight()) { vector<real> label; vector<real> score; vector<NormalizedBBox> bbox; getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox); bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); ++n; curImgId = static_cast<size_t>((cpuOutputData + n * 7)[0]); } allDetectBBoxes.push_back(bboxes); } for (size_t n = 0; n < batchSize; ++n) { for (map<size_t, vector<NormalizedBBox>>::iterator it = allGTBBoxes[n].begin(); it != allGTBBoxes[n].end(); ++it) { size_t count = 0; if (evaluateDifficult_) { count = it->second.size(); } else { for (size_t i = 0; i < it->second.size(); ++i) if (!(it->second[i].isDifficult)) ++count; } if (numPos_.find(it->first) == numPos_.end() && count != 0) { numPos_[it->first] = count; } else { numPos_[it->first] += count; } } } // calcTFPos calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes); return 0; }