void ExpandLayer::backward(const UpdateCallback& callback) { if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } if (!getInputGrad(0)) return; MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions : getInput(1).sequenceStartPositions; size_t numSequences = cpuSeqStartPos->getSize() - 1; const int* starts = cpuSeqStartPos->getData(false); CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth()); CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]); AsyncGpuBlock asyncGpuBlock; // sum to get the grad real scale = 1; for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) { // TODO(Dangqingqing) optimization for GPU int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; if (sequenceLength == 0) { // empty sequence continue; } MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1); copyData->collectBias( *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale); } }
void DeConv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); int batchSize = getOutputGrad()->getHeight(); if (biases_ && biases_->getWGrad()) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); for (size_t i = 0; i < inputLayers_.size(); ++i) { if (weights_[i]->getWGrad() || this->needGradient_) { int M = M_[i]; int N = N_[i]; int K = K_[i]; Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); const MatrixPtr &inMat = getInputValue(i); for (int n = 0; n < batchSize; ++n) { colBuf_->vol2Col( getOutputGrad()->getData() + n * getOutputGrad()->getStride(), numFilters_, imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], strideZ_[i], strideY_[i], stride_[i], paddingZ_[i], paddingY_[i], padding_[i]); if (weights_[i]->getWGrad()) { real *inData = inMat->getData() + n * inMat->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); MatrixPtr wGradMatSub = weights_[i]->getWGrad()->subMatrix(g * K, K); MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); wGradMatSub->mul( *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0); inData += M * N; } } if (getInputGrad(i)) { real *preGrad = getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); MatrixPtr inGradMatSub = Matrix::create(preGrad, M, N, false, useGpu_); inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0); preGrad += M * N; } } } weights_[i]->getParameterPtr()->incUpdate(callback); } } }
void SequenceConcatLayer::backward(const UpdateCallback& callback) { /* activation */ backwardActivation(); if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); // Increasing the number of gradient biases_->getParameterPtr()->incUpdate(callback); } MatrixPtr inputGrad1 = getInputGrad(0); MatrixPtr inputGrad2 = getInputGrad(1); MatrixPtr outputGrad = getOutputGrad(); auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false); auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false); size_t numSequences1 = startPositions1->getSize() - 1; size_t numSequences2 = startPositions2->getSize() - 1; CHECK_EQ(numSequences1, numSequences2); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; if (inputGrad1) { inputGrad1->subMatrix(starts1[seqId], leftNumIns) ->add(*(outputGrad->subMatrix(offset, leftNumIns))); } offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; if (inputGrad2) { inputGrad2->subMatrix(starts2[seqId], rightNumIns) ->add(*(outputGrad->subMatrix(offset, rightNumIns))); } offset += rightNumIns; } } }
void CosSimVecMatLayer::backward(const UpdateCallback& callback) { CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr outV = getOutputValue(); MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpRow1->setData(inG0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpMtx1->setData(inG1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); tmpRow3->setData(outG->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpRow3); inputs.addArg(*tmpRow2); inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpMtx1, ADD_TO); outputs.addArg(*tmpRow1, ADD_TO); backward_[0]->calc(inputs, outputs); } }
void InterpolationLayer::backward(const UpdateCallback& callback) { MatrixPtr outG = getOutputGrad(); MatrixPtr weightV = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inV2 = getInputValue(2); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr inG2 = getInputGrad(2); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); REGISTER_TIMER_INFO("BwInterpTimer", getName().c_str()); if (inG0) { Matrix::resizeOrCreate(tmpMatrix, batchSize, dataDim, false, useGpu_); // inG0 += outG .* (inV1 - inV2) tmpMatrix->sub(*inV1, *inV2); inG0->rowDotMul(0, *outG, *tmpMatrix); } if (inG1) { // inG1 += outG * weight inG1->addRowScale(0, *outG, *weightV); } if (inG2) { // inG2 += outG * weightLast inG2->addRowScale(0, *outG, *weightLast_); } }
void PowerLayer::backward(const UpdateCallback& callback) { MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); MatrixPtr inG1 = getInputGrad(1); MatrixPtr outV = getOutputValue(); MatrixPtr outG = getOutputGrad(); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); { REGISTER_TIMER_INFO("BwPowerTimer", getName().c_str()); Matrix::resizeOrCreate(tmpMtx, batchSize, dataDim, false, useGpu_); if (inG0) { tmpMtx->log2(*inV1); tmpMtx->dotMul(*tmpMtx, *outV); // inG0 += outG .* (log(inV1) * outV) inG0->rowDotMul(0, *outG, *tmpMtx); } if (inG1) { // tmp = (outV / inV1) * inV0 tmpMtx->dotDiv(*outV, *inV1); tmpMtx->rowScale(0, *tmpMtx, *inV0); inG1->addDotMul(*outG, *tmpMtx, 1, 1); } } }
void AverageLayer::backward(const UpdateCallback& callback) { SequencePoolLayer::backward(callback); if (getInputGrad(0)) { getInputGrad(0)->sequenceAvgBackward( *getOutputGrad(), *startPositions_->getVector(useGpu_), mode_); } }
void SlopeInterceptLayer::backward(const UpdateCallback& callback) { MatrixPtr inG = getInputGrad(0); MatrixPtr outG = getOutputGrad(); if (inG) { REGISTER_TIMER_INFO("BwSlopeInterceptTimer", getName().c_str()); inG->add(*outG, config_.slope()); } }
void ScaleSubRegionLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("ScaleSubRegionBackward", getName().c_str()); BufferArgs inArgs; BufferArgs outArgs; inArgs.addArg(*getOutputGrad(), shape_); inArgs.addArg(*getInputValue(1), indicesShape_); outArgs.addArg(*getInputGrad(0), shape_, ADD_TO); backward_[0]->calc(inArgs, outArgs); }
void BlockExpandLayer::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ if (getInputGrad(0)) { BufferArgs inputs; BufferArgs outputs; inputs.addArg(*getOutputGrad(), outputShape_); outputs.addArg(*getInputGrad(0), inputShape_, ADD_TO); backward_[0]->calc(inputs, outputs); } }
void MaxOutLayer::backward(const UpdateCallback& callback) { (void)callback; /* Do derivation */ MatrixPtr inputG = getInputGrad(0); MatrixPtr outG = getOutputGrad(); if (inputG) { inputG->maxoutBackward(*outG, *maxoutId_, outputChannels_, groups_); } }
void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) { backwardActivation(); MatrixPtr oGrad = getOutputGrad(); if (!fullOutput_) { interOutGrad_ = Matrix::createSparseMatrix(oGrad->getData(), interOutput_->getRows(), interOutput_->getCols(), interOutput_->getHeight(), interOutput_->getWidth(), interOutput_->getElementCnt(), FLOAT_VALUE, SPARSE_CSR, /*trans=*/false, /*useGpu=*/useGpu_); } else { interOutGrad_ = Matrix::create(oGrad->getData(), oGrad->getHeight(), oGrad->getWidth(), /*trans=*/false, /*useGpu=*/useGpu_); } if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); biases_->getWGrad()->collectBias(*interOutGrad_, 1); biases_->getParameterPtr()->incUpdate(callback); } // backward is different from FullyConnectedLayer // because the weight is transposed for (size_t i = 0; i < inputNum_; i++) { AsyncGpuBlock block; MatrixPtr preGrad = getInputGrad(i); if (preGrad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1); } MatrixPtr wGrad = weights_[i]->getWGrad(); if (wGrad) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); MatrixPtr input = getInputValue(i); wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1); } { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } }
void DeConv3DLayer::bpropBiases() { MatrixPtr biases = Matrix::create(biases_->getWGrad()->getData(), 1, biases_->getWGrad()->getElementCnt(), false, useGpu_); const MatrixPtr &outGradMat = getOutputGrad(); if (this->sharedBiases_) { biases->collectSharedBias(*outGradMat, 1.0f); } else { biases->collectBias(*outGradMat, 1.0f); } }
void CudnnConvLayer::backward(const UpdateCallback &callback) { backwardActivation(); if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("CudnnConvBpBiasTimer", getName().c_str()); for (int g = 0; g < groups_[0]; ++g) { real *biasGrad = biases_->getWGrad()->getData() + biasOffset_ * g; real *outGrad = getOutputGrad()->getData() + outputOffset_ * g; hl_convolution_backward_bias(biasDesc_, biasGrad, outputDesc_, outGrad); } biases_->getParameterPtr()->incUpdate(callback); } for (size_t i = 0; i != inputLayers_.size(); ++i) { projections_[i]->backward(callback); } }
void ConcatenateLayer2::backward(const UpdateCallback& callback) { /* Do activation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } AsyncGpuBlock block; if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("Concat2BpBiasTimer", getName().c_str()); biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_); biases_->getParameterPtr()->incUpdate(callback); } for (size_t i = 0; i != inputLayers_.size(); ++i) { if (projections_[i]) { projections_[i]->backward(callback); } } }
void MultiplexLayer::backward(const UpdateCallback& callback) { /* Do derivation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } MatrixPtr outG = getOutputGrad(); { REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { if (getInputGrad(info.copyIdx + 1)) { getInputGrad(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpDest_) ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_)); } } } }
void ConcatenateLayer::backward(const UpdateCallback& callback) { (void)callback; /* Do activation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } const MatrixPtr& out = getOutputGrad(); int offset = 0; for (size_t i = 0; i != inputLayers_.size(); ++i) { const MatrixPtr& in = getInputGrad(i); size_t inSize = getInputValue(i)->getWidth(); if (in) { in->addAtOffset(*out, offset); } offset += inSize; } }
void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { MatrixPtr inGrad = getInputGrad(0); if (NULL == inGrad) { return; } MatrixPtr outGrad = getOutputGrad(); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inGrad->getWidth(); /* Do activation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } { AsyncGpuBlock asyncGpuBlock; if (asRowVector_) { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outGradTmp = Matrix::create(outGrad->getData() + i * imgSize * numFilters_, numFilters_, imgSize, false, useGpu_); MatrixPtr inGradTmp = Matrix::create( inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_); inGradTmp->collectBias(*outGradTmp, 1); } } else { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outGradTmp = Matrix::create(outGrad->getData() + i * imgSize * numFilters_, imgSize, numFilters_, false, useGpu_); MatrixPtr inGradTmp = Matrix::create( inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_); inGradTmp->sumRows(*outGradTmp, 1, 1); } } } }
void MixedLayer::backward(const UpdateCallback& callback) { /* Do activation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } if (biases_ && biases_->getWGrad()) { REGISTER_TIMER_INFO("BpBiasTimer", getName().c_str()); biases_->getWGrad()->collectBias(*getOutputGrad(), 1, sharedBias_); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } for (size_t i = 0; i != inputLayers_.size(); ++i) { if (projections_[i]) { projections_[i]->backward(callback); } } for (auto& op : operators_) { op->backward(); } }