void MultiplexLayer::forward(PassType passType) { Layer::forward(passType); IVectorPtr copyIds = getInput(0).ids; MatrixPtr inV1 = getInputValue(1); CHECK_EQ(copyIds->getSize(), inV1->getHeight()); for (size_t i = 2; i < inputLayers_.size(); i++) { CHECK_EQ(inV1->getHeight(), getInputValue(i)->getHeight()); CHECK_EQ(inV1->getWidth(), getInputValue(i)->getWidth()); } calculateCopySchedule(copyIds, inputLayers_.size() - 1); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); reserveOutput(inV1->getHeight(), inV1->getWidth()); } MatrixPtr outV = getOutputValue(); { REGISTER_TIMER_INFO("FwLMultplexingTimer", getName().c_str()); AsyncGpuBlock block; for (const CopyInfo& info : copySchedule_) { outV->subMatrix(info.startIdx, info.length, tmpDest_) ->copyFrom(*getInputValue(info.copyIdx + 1) ->subMatrix(info.startIdx, info.length, tmpSrc_)); } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void ConcatenateLayer2::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); int size = getSize(); resetOutput(batchSize, size); for (size_t i = 0; i < projections_.size(); i++) { size_t startCol = projCol_[i].first; size_t endCol = projCol_[i].second; projOutput_[i].value = output_.value->subColMatrix(startCol, endCol); if (output_.grad) { projOutput_[i].grad = output_.grad->subColMatrix(startCol, endCol); } } { AsyncGpuBlock block; for (size_t i = 0; i != inputLayers_.size(); ++i) { projections_[i]->forward(&getInput(i), &projOutput_[i], passType); } } /* add the bias-vector */ if (biases_) { REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); output_.value->addBias(*(biases_->getW()), 1, sharedBias_); } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void CudnnConvLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); resetOutput(batchSize, calOutputSize()); for (size_t i = 0; i != inputLayers_.size(); ++i) { projections_[i]->forward(&getInput(i), &getOutput(), passType); } if (biases_) { REGISTER_TIMER_INFO("CudnnConvBiasTimer", getName().c_str()); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); hl_tensor_reshape(outputDesc_, batchSize, numFilters_ / groups_[0], outputH_[0], outputW_[0], numFilters_ * outputH_[0] * outputW_[0], outputH_[0] * outputW_[0], outputW_[0], 1); outputOffset_ = getOutputValue()->getWidth() / groups_[0]; for (int g = 0; g < groups_[0]; ++g) { real *biasData = biases_->getW()->getData() + biasOffset_ * g; real *outData = getOutputValue()->getData() + outputOffset_ * g; hl_convolution_forward_add_bias(biasDesc_, biasData, outputDesc_, outData); } } forwardActivation(); }
void AverageLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); MatrixPtr inputValue = getInputValue(0); getOutputValue()->sequenceAvgForward( *inputValue, *startPositions_->getVector(useGpu_), mode_); /* add the bias-vector AFTER average operation */ if (biases_.get() != NULL) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); } /* activation */ { forwardActivation(); } }
void DeConv3DLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); int outWidth = getSize(); resetOutput(batchSize, outWidth); const MatrixPtr outMat = getOutputValue(); REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; int K = K_[i]; MatrixPtr wMat = weights_[i]->getW(); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); for (int n = 0; n < batchSize; ++n) { real *inData = inMat->getData() + n * inMat->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); MatrixPtr wMatSub = wMat->subMatrix(g * K, K); MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); colBufDataSub->mul(*wMatSub, *inMatSub, 1.0, 0.0); inData += M * N; } colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), numFilters_, imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], strideZ_[i], strideY_[i], stride_[i], paddingZ_[i], paddingY_[i], padding_[i], 1.0, 1.0); } } if (nullptr != this->biasParameter_) { this->addBias(); } forwardActivation(); }
void FeatureMapExpandLayer::forward(PassType passType) { Layer::forward(passType); MatrixPtr inputV = getInputValue(0); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inputV->getWidth(); resetOutput(batchSize, imgSize * numFilters_); MatrixPtr outputV = getOutputValue(); { AsyncGpuBlock asyncGpuBlock; if (asRowVector_) { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outVTmp = Matrix::create(outputV->getData() + i * imgSize * numFilters_, numFilters_, imgSize, false, useGpu_); MatrixPtr inVTmp = Matrix::create( inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_); outVTmp->addRowVector(*inVTmp); } } else { for (size_t i = 0; i < batchSize; i++) { MatrixPtr outVTmp = Matrix::create(outputV->getData() + i * imgSize * numFilters_, imgSize, numFilters_, false, useGpu_); MatrixPtr inVTmp = Matrix::create( inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_); outVTmp->addColVector(*inVTmp); } } } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void MixedLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); int size = getSize(); { REGISTER_TIMER_INFO("FwResetTimer", getName().c_str()); resetOutput(batchSize, size); } MatrixPtr outV = getOutputValue(); for (size_t i = 0; i != inputLayers_.size(); ++i) { if (projections_[i]) { projections_[i]->forward(&getInput(i), &output_, passType); } } std::vector<const Argument*> ins; for (auto& op : operators_) { ins.clear(); for (auto& input_index : op->getConfig().input_indices()) { ins.push_back(&getInput(input_index)); } op->forward(ins, &output_, passType); } /* add the bias-vector */ if (biases_.get() != NULL) { REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); outV->addBias(*(biases_->getW()), 1, sharedBias_); } /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void ConcatenateLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); int size = getSize(); reserveOutput(batchSize, size); const MatrixPtr& out = getOutputValue(); int offset = 0; for (size_t i = 0; i != inputLayers_.size(); ++i) { const MatrixPtr& in = getInputValue(i); size_t inSize = in->getWidth(); out->assignAtOffset(*in, offset); offset += inSize; } CHECK_EQ(size, offset); /* activation */ { REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str()); forwardActivation(); } }
void SequenceConcatLayer::forward(PassType passType) { Layer::forward(passType); size_t dim = getSize(); const Argument& input1 = getInput(0); size_t numSequences1 = input1.getNumSequences(); auto startPositions1 = input1.sequenceStartPositions->getVector(false); const Argument& input2 = getInput(1); size_t numSequences2 = input2.getNumSequences(); auto startPositions2 = input2.sequenceStartPositions->getVector(false); CHECK_EQ(dim, input1.value->getWidth()); CHECK_EQ(startPositions1->getData()[numSequences1], input1.getBatchSize()); CHECK_EQ(numSequences1, startPositions1->getSize() - 1); CHECK_EQ(dim, input2.value->getWidth()); CHECK_EQ(startPositions2->getData()[numSequences2], input2.getBatchSize()); CHECK_EQ(numSequences2, startPositions2->getSize() - 1); CHECK_EQ(numSequences1, numSequences2); MatrixPtr inputValue1 = getInputValue(0); MatrixPtr inputValue2 = getInputValue(1); // reset output reserveOutput(inputValue1->getHeight() + inputValue2->getHeight(), dim); MatrixPtr outputValue = getOutputValue(); const int* starts1 = startPositions1->getData(); const int* starts2 = startPositions2->getData(); { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceConcatLayerForward", getName().c_str()); size_t offset = 0; size_t leftNumIns = 0; size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; outputValue->subMatrix(offset, leftNumIns) ->assign(*(inputValue1->subMatrix(starts1[seqId], leftNumIns))); offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; outputValue->subMatrix(offset, rightNumIns) ->assign(*(inputValue2->subMatrix(starts2[seqId], rightNumIns))); offset += rightNumIns; } // modify the sequenceStartPositions ICpuGpuVector::resizeOrCreate( output_.sequenceStartPositions, numSequences1 + 1, false); int* tgtBuf = output_.sequenceStartPositions->getMutableData(false); for (size_t seqId = 0; seqId < numSequences1 + 1; ++seqId) { tgtBuf[seqId] = starts1[seqId] + starts2[seqId]; } } if (biases_.get() != NULL) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); } /* activation */ forwardActivation(); }
void SelectiveFullyConnectedLayer::forward(PassType passType) { REGISTER_TIMER("selective_fc.forward"); Layer::forward(passType); getSelectiveCols(); size_t height = getInput(0).getBatchSize(); size_t width = getSize(); size_t nnz = height * width; if (!fullOutput_) { CHECK(selCols_); CHECK(height == selCols_->getHeight()); CHECK(width == selCols_->getWidth()); nnz = selCols_->getElementCnt(); } // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually // this outV should be used as input of MaxIdLayer and softmax activation reserveOutput(height, width, nnz); bool flag = true; for (size_t i = 0; i < inputNum_; i++) { MatrixPtr input = getInputValue(i); MatrixPtr weight = weights_[i]->getW(); size_t hsize = input->getHeight(); size_t wsize = weight->getHeight(); real scaleT = i == 0 ? real(0) : real(1); flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() && !fullOutput_; if (flag) { // if the indecies are highly sparse, // manully compute the multiplication of // the input vector and the selected rows. REGISTER_TIMER("selective.plain"); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { // if the indecies is not sparse enough, // use full mul instead REGISTER_TIMER("selective.mul"); if (fullOutput_) { interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { Matrix::resizeOrCreate(mmat_, hsize, wsize, /*trans=*/false, /*useGpu=*/useGpu_); mmat_->mul(*input, *weight->getTranspose()); interOutput_->add3(mmat_); } } } if (biases_) { interOutput_->addBias(*(biases_->getW()), 1); } flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() && !fullOutput_); if (flag) { // during generation, output of this layer is a sparse csr matrix, // which is probably the input of maxid layer // if the model is trained with multi-class-cross-entroy-with-selfnorm, // activiation of this layer should be exponential, not softmax. Argument arg; arg.value = Matrix::create(interOutput_->getData(), 1, nnz, /*trans=*/false, /*useGpu=*/useGpu_); //! TODO(yuyang18): Why we cannot invoke forwardActivation here? activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy // while training, every sample has a equal number of selected // columns to be activated. // note indices of multi-class-cross-entropy need to be remapped // to this index. // e.g. sample = [1,3,5] and 3 is gold, then label is 1 forwardActivation(); } }