void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); size_t batchSize = inV0->getHeight(); size_t numKeys = getSize(); CHECK_EQ(batchSize, inV1->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, numKeys); } MatrixPtr outV = getOutputValue(); CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*tmpMtx0); inputs.addArg(*tmpRow0); outputs.addArg(*tmpRow2, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } }
void MultiplexLayer::forward(PassType passType) { Layer::forward(passType); IVectorPtr copyIds = getInput(0).ids; MatrixPtr inV1 = getInputValue(1); CHECK_EQ(copyIds->getSize(), inV1->getHeight()); for (size_t i = 2; i < inputLayers_.size(); i++) { CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); } calculateCopySchedule(copyIds, inputLayers_.size() - 1); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(inV1->getHeight(), inV1->getWidth()); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { outV->subMatrix(info.startIdx, info.length, tmpDest_) ->copyFrom(*getInputValue(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpSrc_)); } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void CosSimLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); reserveOutput(batchSize, size); } MatrixPtr outV = getOutputValue(); /* activation */ { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); CHECK(outV && prevOut1 && prevOut2); BufferArgs inputs; BufferArgs outputs; inputs.addArg(*prevOut1); inputs.addArg(*prevOut2); outputs.addArg(*outV, ASSIGN_TO); forward_[0]->calc(inputs, outputs); } }
void PowerLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); size_t batchSize = inV1->getHeight(); size_t dataDim = inV1->getWidth(); CHECK_EQ(getSize(), dataDim); CHECK_EQ(1U, inV0->getWidth()); CHECK_EQ(batchSize, inV0->getHeight()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, dataDim); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwPowerTimer", getName().c_str()); outV->rowPow(0, *inV1, *inV0); } }
void ExpandLayer::forward(PassType passType) { Layer::forward(passType); // Expand layer should have exactly 2 input, one for data, one for size CHECK_EQ(2U, inputLayers_.size()); // using two input: // * first one for data; // * second one only for sequence info const Argument& shapeInput = getInput(1); const Argument& dataInput = getInput(0); size_t outputBatchSize = shapeInput.getBatchSize(); auto startPositions = type_ ? shapeInput.subSequenceStartPositions : shapeInput.sequenceStartPositions; size_t numSequences = startPositions->getSize() - 1; const int* starts = startPositions->getData(false); CHECK_EQ(starts[numSequences], shapeInput.getBatchSize()); if (type_) { // when trans_type = seq, input[1] must hasSubseq CHECK_EQ(shapeInput.hasSubseq(), 1UL); CHECK_EQ(dataInput.getNumSequences(), shapeInput.getNumSequences()); } else { CHECK_EQ(dataInput.getBatchSize(), shapeInput.getNumSequences()); } // set output sequence info as shape sequence output_.sequenceStartPositions = shapeInput.sequenceStartPositions; if (shapeInput.hasSubseq()) { output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions; } // reserve output: Expand output to batchsize of sequence data. reserveOutput(outputBatchSize, dataInput.value->getWidth()); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false); int* expandStarts = expandStartsPos_->getMutableData(false); for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; for (int j = 0; j < sequenceLength; j++) { expandStarts[starts[sequenceId] + j] = sequenceId; } } outputValue->copyByRowIndex(*inputValue, *expandStartsPos_->getVector(useGpu_)); if (biases_.get() != NULL) { outputValue->addBias(*(biases_->getW()), 1); } }
void ResizeLayer::forward(PassType passType) { Layer::forward(passType); const Argument& input = getInput(0); size_t height = input.value->getHeight(); size_t width = input.value->getWidth(); CHECK_EQ((height * width) % getSize(), 0UL); reserveOutput(height * width / getSize(), getSize()); MatrixPtr tmp = Matrix::create(output_.value->getData(), height, width, false, useGpu_); tmp->assign(*input.value); }
void HierarchicalSigmoidLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); reserveOutput(batchSize, size); Matrix::resizeOrCreate(preOutput_.value, batchSize, codeLength_, /* trans */ false, useGpu(deviceId_)); Matrix::resizeOrCreate(preOutput_.grad, batchSize, codeLength_, /* trans */ false, useGpu(deviceId_)); IVectorPtr label = getInput(*getLabelLayer()).ids; preOutput_.value->zeroMem(); /* add the bias-vector */ if (biases_.get() != NULL) { preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW()); } for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { MatrixPtr input = getInputValue(i); preOutput_.value->mulByBitCode( numClasses_, *label, *weights_[i]->getW(), *input); } // keep consistent with the clipping in the following softrelu preOutput_.value->clip(-40.0, 40.0); preOutput_.value->sumByBitCode(numClasses_, *label, *output_.value, -1); // scaleSum preOutput_.value->softrelu(*preOutput_.value); MatrixPtr sum = Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_)); preOutput_.value->rowSum(*sum); output_.value->add(*sum); }
void ConcatenateLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); int size = getSize(); reserveOutput(batchSize, size); const MatrixPtr& out = getOutputValue(); int offset = 0; for (size_t i = 0; i != inputLayers_.size(); ++i) { const MatrixPtr& in = getInputValue(i); size_t inSize = in->getWidth(); out->assignAtOffset(*in, offset); offset += inSize; } CHECK_EQ(size, offset); /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void SlopeInterceptLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inV = getInputValue(0); /* malloc memory for the output_ if necessary */ size_t batchSize = inV->getHeight(); size_t size = getSize(); CHECK_EQ(size, inV->getWidth()); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(batchSize, size); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwSlopeInterceptTimer", getName().c_str()); outV->mulScalar(*inV, config_.slope()); outV->add(config_.intercept()); } }
void SequenceConcatLayer::forward(PassType passType) { Layer::forward(passType); size_t dim = getSize(); const Argument& input1 = getInput(0); size_t numSequences1 = input1.getNumSequences(); auto startPositions1 = input1.sequenceStartPositions->getVector(false); const Argument& input2 = getInput(1); size_t numSequences2 = input2.getNumSequences(); auto startPositions2 = input2.sequenceStartPositions->getVector(false); CHECK_EQ(dim, input1.value->getWidth()); CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize()); CHECK_EQ(numSequences1, startPositions1->getSize() - 1); CHECK_EQ(dim, input2.value->getWidth()); CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize()); CHECK_EQ(numSequences2, startPositions2->getSize() - 1); CHECK_EQ(numSequences1, numSequences2); MatrixPtr inputValue1 = getInputValue(0); MatrixPtr inputValue2 = getInputValue(1); // reset output reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim); MatrixPtr outputValue = getOutputValue(); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; outputValue->subMatrix(offset, leftNumIns) ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns))); offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; outputValue->subMatrix(offset, rightNumIns) ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns))); offset += rightNumIns; } // modify the sequenceStartPositions ICpuGpuVector::resizeOrCreate( output_.sequenceStartPositions, numSequences1 + 1, false); int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) { tgtBuf[seqId] = starts1[seqId] + starts2[seqId]; } } if (biases_.get() != NULL) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); } /* activation */ forwardActivation(); }
void SelectiveFullyConnectedLayer::forward(PassType passType) { REGISTER_TIMER("selective_fc.forward"); Layer::forward(passType); getSelectiveCols(); size_t height = getInput(0).getBatchSize(); size_t width = getSize(); size_t nnz = height * width; if (!fullOutput_) { CHECK(selCols_); CHECK(height == selCols_->getHeight()); CHECK(width == selCols_->getWidth()); nnz = selCols_->getElementCnt(); } // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually // this outV should be used as input of MaxIdLayer and softmax activation reserveOutput(height, width, nnz); bool flag = true; for (size_t i = 0; i < inputNum_; i++) { MatrixPtr input = getInputValue(i); MatrixPtr weight = weights_[i]->getW(); size_t hsize = input->getHeight(); size_t wsize = weight->getHeight(); real scaleT = i == 0 ? real(0) : real(1); flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() && !fullOutput_; if (flag) { // if the indecies are highly sparse, // manully compute the multiplication of // the input vector and the selected rows. REGISTER_TIMER("selective.plain"); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { // if the indecies is not sparse enough, // use full mul instead REGISTER_TIMER("selective.mul"); if (fullOutput_) { interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { Matrix::resizeOrCreate(mmat_, hsize, wsize, /*trans=*/false, /*useGpu=*/useGpu_); mmat_->mul(*input, *weight->getTranspose()); interOutput_->add3(mmat_); } } } if (biases_) { interOutput_->addBias(*(biases_->getW()), 1); } flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() && !fullOutput_); if (flag) { // during generation, output of this layer is a sparse csr matrix, // which is probably the input of maxid layer // if the model is trained with multi-class-cross-entroy-with-selfnorm, // activiation of this layer should be exponential, not softmax. Argument arg; arg.value = Matrix::create(interOutput_->getData(), 1, nnz, /*trans=*/false, /*useGpu=*/useGpu_); //! TODO(yuyang18): Why we cannot invoke forwardActivation here? activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy // while training, every sample has a equal number of selected // columns to be activated. // note indices of multi-class-cross-entropy need to be remapped // to this index. // e.g. sample = [1,3,5] and 3 is gold, then label is 1 forwardActivation(); } }