void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { bool syncFlag = hl_get_sync_flag(); /* Calculate the W-gradient for the current layer */ if (weight_->getWGrad()) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1); } // If callback does not change value, backprop error asynchronously so that // we can do the callback concurrently. // This is still a little bit dangerous since theoretically for // SyncMultiGpuMachine it is possible that the value copyback can still // happen at the same time as the error backprop where the value is being // used. hl_set_sync_flag(false); /* Calculate the input layers error */ if (in_->grad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); in_->grad->mul(out_->grad, weight_->getW(), 1, 1); } hl_set_sync_flag(syncFlag); parameter_->incUpdate(callback); }
void ConcatenateLayer2::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); int size = getSize(); resetOutput(batchSize, size); for (size_t i = 0; i < projections_.size(); i++) { size_t startCol = projCol_[i].first; size_t endCol = projCol_[i].second; projOutput_[i].value = output_.value->subColMatrix(startCol, endCol); if (output_.grad) { projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol); } } { AsyncGpuBlock block; for (size_t i = 0; i != inputLayers_.size(); ++i) { projections_[i]->forward(&getInput(i), &projOutput_[i], passType); } } /* add the bias-vector */ if (biases_) { REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); output_.value->addBias(*(biases_->getW()), 1, sharedBias_); } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void forwardBias() { if (!biases_) { sampleOut_.value->zeroMem(); } else { real* bias = biases_->getW()->getData(); real* sampleOut = sampleOut_.value->getData(); for (size_t i = 0; i < samples_.size(); ++i) { sampleOut[i] = bias[samples_[i].labelId]; } } }
void TransposedFullMatrixProjection::forward() { REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); out_->value->mul(in_->value, weight_->getW()->getTranspose(), 1, 1); }
void SequenceConcatLayer::forward(PassType passType) { Layer::forward(passType); size_t dim = getSize(); const Argument& input1 = getInput(0); size_t numSequences1 = input1.getNumSequences(); auto startPositions1 = input1.sequenceStartPositions->getVector(false); const Argument& input2 = getInput(1); size_t numSequences2 = input2.getNumSequences(); auto startPositions2 = input2.sequenceStartPositions->getVector(false); CHECK_EQ(dim, input1.value->getWidth()); CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize()); CHECK_EQ(numSequences1, startPositions1->getSize() - 1); CHECK_EQ(dim, input2.value->getWidth()); CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize()); CHECK_EQ(numSequences2, startPositions2->getSize() - 1); CHECK_EQ(numSequences1, numSequences2); MatrixPtr inputValue1 = getInputValue(0); MatrixPtr inputValue2 = getInputValue(1); // reset output reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim); MatrixPtr outputValue = getOutputValue(); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; outputValue->subMatrix(offset, leftNumIns) ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns))); offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; outputValue->subMatrix(offset, rightNumIns) ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns))); offset += rightNumIns; } // modify the sequenceStartPositions ICpuGpuVector::resizeOrCreate( output_.sequenceStartPositions, numSequences1 + 1, false); int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) { tgtBuf[seqId] = starts1[seqId] + starts2[seqId]; } } if (biases_.get() != NULL) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); } /* activation */ forwardActivation(); }