void ExpandLayer::backward(const UpdateCallback& callback) { if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } if (!getInputGrad(0)) return; MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions : getInput(1).sequenceStartPositions; size_t numSequences = cpuSeqStartPos->getSize() - 1; const int* starts = cpuSeqStartPos->getData(false); CHECK_EQ(inputGrad->getWidth(), outputGrad->getWidth()); CHECK_EQ(outputGrad->getHeight(), (size_t)starts[numSequences]); AsyncGpuBlock asyncGpuBlock; // sum to get the grad real scale = 1; for (size_t sequenceId = 0; sequenceId < numSequences; sequenceId++) { // TODO(Dangqingqing) optimization for GPU int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; if (sequenceLength == 0) { // empty sequence continue; } MatrixPtr copyData = inputGrad->subMatrix(sequenceId, 1); copyData->collectBias( *outputGrad->subMatrix(starts[sequenceId], sequenceLength), scale); } }
void SequenceConcatLayer::backward(const UpdateCallback& callback) { /* activation */ backwardActivation(); if (biases_ && biases_->getWGrad()) { biases_->getWGrad()->collectBias(*getOutputGrad(), 1); // Increasing the number of gradient biases_->getParameterPtr()->incUpdate(callback); } MatrixPtr inputGrad1 = getInputGrad(0); MatrixPtr inputGrad2 = getInputGrad(1); MatrixPtr outputGrad = getOutputGrad(); auto startPositions1 = getInput(0).sequenceStartPositions->getVector(false); auto startPositions2 = getInput(1).sequenceStartPositions->getVector(false); size_t numSequences1 = startPositions1->getSize() - 1; size_t numSequences2 = startPositions2->getSize() - 1; CHECK_EQ(numSequences1, numSequences2); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerBackward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; if (inputGrad1) { inputGrad1->subMatrix(starts1[seqId], leftNumIns) ->add(*(outputGrad->subMatrix(offset, leftNumIns))); } offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; if (inputGrad2) { inputGrad2->subMatrix(starts2[seqId], rightNumIns) ->add(*(outputGrad->subMatrix(offset, rightNumIns))); } offset += rightNumIns; } } }
void MultiplexLayer::forward(PassType passType) { Layer::forward(passType); IVectorPtr copyIds = getInput(0).ids; MatrixPtr inV1 = getInputValue(1); CHECK_EQ(copyIds->getSize(), inV1->getHeight()); for (size_t i = 2; i < inputLayers_.size(); i++) { CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); } calculateCopySchedule(copyIds, inputLayers_.size() - 1); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(inV1->getHeight(), inV1->getWidth()); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { outV->subMatrix(info.startIdx, info.length, tmpDest_) ->copyFrom(*getInputValue(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpSrc_)); } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void DeConv3DLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int outWidth = getSize(); resetOutput(batchSize, outWidth); const MatrixPtr outMat = getOutputValue(); REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; int K = K_[i]; MatrixPtr wMat = weights_[i]->getW(); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); for (int n = 0; n < batchSize; ++n) { real *inData = inMat->getData() + n * inMat->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); MatrixPtr wMatSub = wMat->subMatrix(g * K, K); MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); inData += M * N; } colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), numFilters_, imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], strideZ_[i], strideY_[i], stride_[i], paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0); } } if (nullptr != this->biasParameter_) { this->addBias(); } forwardActivation(); }
void MultiplexLayer::backward(const UpdateCallback& callback) { /* Do derivation */ { REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str()); backwardActivation(); } MatrixPtr outG = getOutputGrad(); { REGISTER_TIMER_INFO("BwLMultiplexTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { if (getInputGrad(info.copyIdx + 1)) { getInputGrad(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpDest_) ->add(*outG->subMatrix(info.startIdx, info.length, tmpSrc_)); } } } }
void SequenceConcatLayer::forward(PassType passType) { Layer::forward(passType); size_t dim = getSize(); const Argument& input1 = getInput(0); size_t numSequences1 = input1.getNumSequences(); auto startPositions1 = input1.sequenceStartPositions->getVector(false); const Argument& input2 = getInput(1); size_t numSequences2 = input2.getNumSequences(); auto startPositions2 = input2.sequenceStartPositions->getVector(false); CHECK_EQ(dim, input1.value->getWidth()); CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize()); CHECK_EQ(numSequences1, startPositions1->getSize() - 1); CHECK_EQ(dim, input2.value->getWidth()); CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize()); CHECK_EQ(numSequences2, startPositions2->getSize() - 1); CHECK_EQ(numSequences1, numSequences2); MatrixPtr inputValue1 = getInputValue(0); MatrixPtr inputValue2 = getInputValue(1); // reset output reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim); MatrixPtr outputValue = getOutputValue(); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; outputValue->subMatrix(offset, leftNumIns) ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns))); offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; outputValue->subMatrix(offset, rightNumIns) ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns))); offset += rightNumIns; } // modify the sequenceStartPositions ICpuGpuVector::resizeOrCreate( output_.sequenceStartPositions, numSequences1 + 1, false); int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) { tgtBuf[seqId] = starts1[seqId] + starts2[seqId]; } } if (biases_.get() != NULL) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); } /* activation */ forwardActivation(); }