void DeConv3DLayer::backward(const UpdateCallback &callback) { backwardActivation(); int batchSize = getOutputGrad()->getHeight(); if (biases_ && biases_->getWGrad()) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); for (size_t i = 0; i < inputLayers_.size(); ++i) { if (weights_[i]->getWGrad() || this->needGradient_) { int M = M_[i]; int N = N_[i]; int K = K_[i]; Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); const MatrixPtr &inMat = getInputValue(i); for (int n = 0; n < batchSize; ++n) { colBuf_->vol2Col( getOutputGrad()->getData() + n * getOutputGrad()->getStride(), numFilters_, imgSizeD_[i], imgSizeH_[i], imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], strideZ_[i], strideY_[i], stride_[i], paddingZ_[i], paddingY_[i], padding_[i]); if (weights_[i]->getWGrad()) { real *inData = inMat->getData() + n * inMat->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr colBufDataSub = colBuf_->subMatrix(g * K, K); MatrixPtr wGradMatSub = weights_[i]->getWGrad()->subMatrix(g * K, K); MatrixPtr inMatSub = Matrix::create(inData, M, N, false, useGpu_); wGradMatSub->mul( *colBufDataSub, *(inMatSub->getTranspose()), 1.0, 1.0); inData += M * N; } } if (getInputGrad(i)) { real *preGrad = getInputGrad(i)->getData() + n * getInputGrad(i)->getStride(); for (int g = 0; g < groups_[i]; ++g) { MatrixPtr w = weights_[i]->getW()->subMatrix(g * K, K); MatrixPtr outGradMat = colBuf_->subMatrix(g * K, K); MatrixPtr inGradMatSub = Matrix::create(preGrad, M, N, false, useGpu_); inGradMatSub->mul(*(w->getTranspose()), *outGradMat, 1.0, 1.0); preGrad += M * N; } } } weights_[i]->getParameterPtr()->incUpdate(callback); } } }
void MKLPackedRecurrentLayer::backwardBatch(int batchSize, size_t numSequences, const int* starts) { if (!batchGrad_) { batchGrad_.reset(new SequenceToBatch(useGpu_)); } batchGrad_->shareIndexWith(*batchValue_); size_t numBatch = batchGrad_->getNumBatch(); bool backwardByBatch = numBatch < numSequences; batchGrad_->copyFromSeq(*output_.grad); { REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); /* backward one batch */ for (int n = (int)numBatch - 1; n >= 0; n--) { MatrixPtr batchGrad = batchGrad_->getBatchValue(n); MatrixPtr batchValue = batchValue_->getBatchValue(n, batchGrad->getHeight()); Argument arg; arg.value = batchValue; arg.grad = batchGrad; activation_->backward(arg).check(); if (n != 0) { batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight()); packed_weightT_->gemm_compute(batchGrad, batchValue); } if (backwardByBatch && weight_->getWGrad()) { if (n != 0) { /* backward weight */ batchValue = batchValue_->getBatchValue(n - 1, batchGrad->getHeight()); weight_->getWGrad()->mul( *batchValue->getTranspose(), *batchGrad, 1, 1); } } } } batchGrad_->copyBackSeq(*output_.grad); if (!backwardByBatch && weight_->getWGrad()) { REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); for (size_t seq = 0; seq < numSequences; ++seq) { int len = starts[seq + 1] - starts[seq]; weight_->getWGrad()->mul( *output_.value ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1) ->getTranspose(), *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1, len - 1), 1, 1); } } }
void SelectiveFullyConnectedLayer::forward(PassType passType) { REGISTER_TIMER("selective_fc.forward"); Layer::forward(passType); getSelectiveCols(); size_t height = getInput(0).getBatchSize(); size_t width = getSize(); size_t nnz = height * width; if (!fullOutput_) { CHECK(selCols_); CHECK(height == selCols_->getHeight()); CHECK(width == selCols_->getWidth()); nnz = selCols_->getElementCnt(); } // Layer::ResetOutput(), here we set outV/outG as SparseMatrix manually // this outV should be used as input of MaxIdLayer and softmax activation reserveOutput(height, width, nnz); bool flag = true; for (size_t i = 0; i < inputNum_; i++) { MatrixPtr input = getInputValue(i); MatrixPtr weight = weights_[i]->getW(); size_t hsize = input->getHeight(); size_t wsize = weight->getHeight(); real scaleT = i == 0 ? real(0) : real(1); flag = nnz < (hsize * wsize) * config_.selective_fc_full_mul_ratio() && !fullOutput_; if (flag) { // if the indecies are highly sparse, // manully compute the multiplication of // the input vector and the selected rows. REGISTER_TIMER("selective.plain"); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { // if the indecies is not sparse enough, // use full mul instead REGISTER_TIMER("selective.mul"); if (fullOutput_) { interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { Matrix::resizeOrCreate(mmat_, hsize, wsize, /*trans=*/false, /*useGpu=*/useGpu_); mmat_->mul(*input, *weight->getTranspose()); interOutput_->add3(mmat_); } } } if (biases_) { interOutput_->addBias(*(biases_->getW()), 1); } flag = (passType_ == PASS_TEST && config_.selective_fc_pass_generation() && !fullOutput_); if (flag) { // during generation, output of this layer is a sparse csr matrix, // which is probably the input of maxid layer // if the model is trained with multi-class-cross-entroy-with-selfnorm, // activiation of this layer should be exponential, not softmax. Argument arg; arg.value = Matrix::create(interOutput_->getData(), 1, nnz, /*trans=*/false, /*useGpu=*/useGpu_); //! TODO(yuyang18): Why we cannot invoke forwardActivation here? activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy // while training, every sample has a equal number of selected // columns to be activated. // note indices of multi-class-cross-entropy need to be remapped // to this index. // e.g. sample = [1,3,5] and 3 is gold, then label is 1 forwardActivation(); } }