/// Solve multiple systems w/ each column of the Matrix a single RHS MatrixType *p_solve(const MatrixType& B) const { VectorType b(B.communicator(), B.localRows()); VectorType X(B.communicator(), B.localRows()); MatrixType *result(new MatrixType(B.communicator(), B.localRows(), B.localCols(), Dense)); int ilo, ihi; X.localIndexRange(ilo, ihi); int nloc(X.localSize()); std::vector<IdxType> iidx; iidx.reserve(nloc); for (IdxType i = ilo; i < ihi; ++i) { iidx.push_back(i); } std::vector<IdxType> jidx(nloc); std::vector<TheType> locX(nloc); for (int j = 0; j < B.cols(); ++j) { column(B, j, b); X.zero(); X.ready(); if (j == 0) { this->solve(b, X); } else { this->resolve(b, X); } std::fill(jidx.begin(), jidx.end(), j); X.getElements(nloc, &iidx[0], &locX[0]); result->setElements(nloc, &iidx[0], &jidx[0], &locX[0]); } result->ready(); return result; }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { Ppattern_ = Ppattern; const RCP<const Map> uniqueMap = Ppattern_->getDomainMap(); const RCP<const Map> nonUniqueMap = Ppattern_->getColMap(); RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap); const size_t NSDim = Bc.getNumVectors(); X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim); X_->doImport(Bc, *importer, Xpetra::INSERT); size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[indices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false); Teuchos::BLAS<LO,SC> blas; blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(), XXtInv_[i].values(), XXtInv_[i].stride()); Teuchos::LAPACK<LO,SC> lapack; LO info, lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info); lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info); } }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { const size_t NSDim = Bc.getNumVectors(); Ppattern_ = Ppattern; size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); RCP<const Import> importer = Ppattern_->getImporter(); X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim); if (!importer.is_null()) X_->doImport(Bc, *importer, Xpetra::INSERT); else *X_ = Bc; std::vector<const SC*> Xval(NSDim); for (size_t j = 0; j < NSDim; j++) Xval[j] = X_->getData(j).get(); SC zero = Teuchos::ScalarTraits<SC>::zero(); SC one = Teuchos::ScalarTraits<SC>::one(); Teuchos::BLAS <LO,SC> blas; Teuchos::LAPACK<LO,SC> lapack; LO lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/); Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i]; if (NSDim == 1) { SC d = zero; for (size_t j = 0; j < nnz; j++) d += Xval[0][indices[j]] * Xval[0][indices[j]]; XXtInv(0,0) = one/d; } else { Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/); for (size_t j = 0; j < nnz; j++) for (size_t k = 0; k < NSDim; k++) locX(k,j) = Xval[k][indices[j]]; // XXtInv_ = (locX*locX^T)^{-1} blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, one, locX.values(), locX.stride(), locX.values(), locX.stride(), zero, XXtInv.values(), XXtInv.stride()); LO info; // Compute LU factorization using partial pivoting with row exchanges lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info); // Use the computed factorization to compute the inverse lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info); } } }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(const Matrix& P, Matrix& Projected) const { // We check only row maps. Column may be different. TEUCHOS_TEST_FOR_EXCEPTION(!P.getRowMap()->isSameAs(*Projected.getRowMap()), Exceptions::Incompatible, "Row maps are incompatible"); const size_t NSDim = X_->getNumVectors(); const size_t numRows = P.getNodeNumRows(); const Map& colMap = *P.getColMap(); const Map& PColMap = *Projected.getColMap(); Projected.resumeFill(); Teuchos::ArrayView<const LO> indices, pindices; Teuchos::ArrayView<const SC> values, pvalues; Teuchos::Array<SC> valuesAll(colMap.getNodeNumElements()), newValues; LO invalid = Teuchos::OrdinalTraits<LO>::invalid(); LO oneLO = Teuchos::OrdinalTraits<LO>::one(); SC zero = Teuchos::ScalarTraits<SC> ::zero(); SC one = Teuchos::ScalarTraits<SC> ::one(); std::vector<const SC*> Xval(NSDim); for (size_t j = 0; j < NSDim; j++) Xval[j] = X_->getData(j).get(); for (size_t i = 0; i < numRows; i++) { P .getLocalRowView(i, indices, values); Projected.getLocalRowView(i, pindices, pvalues); size_t nnz = indices.size(); // number of nonzeros in the supplied matrix size_t pnnz = pindices.size(); // number of nonzeros in the constrained matrix newValues.resize(pnnz); // Step 1: fix stencil // Projected *must* already have the correct stencil // Step 2: copy correct stencil values // The algorithm is very similar to the one used in the calculation of // Frobenius dot product, see src/Transfers/Energy-Minimization/Solvers/MueLu_CGSolver_def.hpp // NOTE: using extra array allows us to skip the search among indices for (size_t j = 0; j < nnz; j++) valuesAll[indices[j]] = values[j]; for (size_t j = 0; j < pnnz; j++) { LO ind = colMap.getLocalElement(PColMap.getGlobalElement(pindices[j])); // FIXME: we could do that before the full loop just once if (ind != invalid) // index indices[j] is part of template, copy corresponding value newValues[j] = valuesAll[ind]; else newValues[j] = zero; } for (size_t j = 0; j < nnz; j++) valuesAll[indices[j]] = zero; // Step 3: project to the space Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i]; Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, pnnz, false); for (size_t j = 0; j < pnnz; j++) for (size_t k = 0; k < NSDim; k++) locX(k,j) = Xval[k][pindices[j]]; Teuchos::SerialDenseVector<LO,SC> val(pnnz, false), val1(NSDim, false), val2(NSDim, false); for (size_t j = 0; j < pnnz; j++) val[j] = newValues[j]; Teuchos::BLAS<LO,SC> blas; // val1 = locX * val; blas.GEMV(Teuchos::NO_TRANS, NSDim, pnnz, one, locX.values(), locX.stride(), val.values(), oneLO, zero, val1.values(), oneLO); // val2 = XXtInv * val1 blas.GEMV(Teuchos::NO_TRANS, NSDim, NSDim, one, XXtInv.values(), XXtInv.stride(), val1.values(), oneLO, zero, val2.values(), oneLO); // val = X^T * val2 blas.GEMV(Teuchos::CONJ_TRANS, NSDim, pnnz, one, locX.values(), locX.stride(), val2.values(), oneLO, zero, val.values(), oneLO); for (size_t j = 0; j < pnnz; j++) newValues[j] -= val[j]; Projected.replaceLocalValues(i, pindices, newValues); } Projected.fillComplete(Projected.getDomainMap(), Projected.getRangeMap()); //FIXME: maps needed? }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Apply(const Matrix& P, Matrix& Projected) const { const size_t NSDim = X_->getNumVectors(); const size_t numRows = P.getNodeNumRows(); Projected.resumeFill(); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices, pindices; Teuchos::ArrayView<const SC> values, pvalues; P .getLocalRowView(i, indices, values); Projected.getLocalRowView(i, pindices, pvalues); size_t nnz = pindices.size(); // number of nonzeros in the constrained matrix size_t nnz1 = indices.size(); // number of nonzeros in the supplied matrix Teuchos::Array<SC> newValues(nnz, Teuchos::ScalarTraits<SC>::zero()); // step 1: fix stencil // Projected already has the correct stencil // step 2: copy correct stencil values for (size_t j = 0; j < nnz1; j++) { // this might be accelerated if we know smth about ordering size_t k = 0; for (; k < nnz; k++) if (pindices[k] == indices[j]) break; if (k != nnz) { // index indices[j] is part of template, copy corresponding value newValues[k] = values[j]; } } // step 3: project to the space Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[pindices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } Teuchos::SerialDenseVector<LO,SC> val(nnz, false), val1(NSDim, false), val2(NSDim, false); for (size_t j = 0; j < nnz; j++) val[j] = newValues[j]; Teuchos::BLAS<LO,SC> blas; blas.GEMV(Teuchos::NO_TRANS, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), val.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val1.values(), (LO)1); blas.GEMV(Teuchos::NO_TRANS, NSDim, NSDim, Teuchos::ScalarTraits<SC>::one(), XXtInv_[i].values(), XXtInv_[i].stride(), val1.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val2.values(), (LO)1); blas.GEMV(Teuchos::CONJ_TRANS, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), val2.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val.values(), (LO)1); for (size_t j = 0; j < nnz; j++) newValues[j] -= val[j]; Projected.replaceLocalValues(i, pindices, newValues); } Projected.fillComplete(Projected.getDomainMap(), Projected.getRangeMap()); //FIXME: maps needed? }