/// Solve multiple systems w/ each column of the Matrix a single RHS
  MatrixType *p_solve(const MatrixType& B) const
  {
    VectorType b(B.communicator(), B.localRows());
    VectorType X(B.communicator(), B.localRows());
    MatrixType *result(new MatrixType(B.communicator(), B.localRows(), B.localCols(), Dense));

    int ilo, ihi;
    X.localIndexRange(ilo, ihi);
    int nloc(X.localSize());
    std::vector<IdxType> iidx;
    iidx.reserve(nloc);
    for (IdxType i = ilo; i < ihi; ++i) { iidx.push_back(i); }
    std::vector<IdxType> jidx(nloc);
    std::vector<TheType> locX(nloc);

    for (int j = 0; j < B.cols(); ++j) {
      column(B, j, b);
      X.zero();
      X.ready();
      if (j == 0) {
        this->solve(b, X);
      } else {
        this->resolve(b, X);
      }
      std::fill(jidx.begin(), jidx.end(), j);
      X.getElements(nloc, &iidx[0], &locX[0]);
      result->setElements(nloc, &iidx[0], &jidx[0], &locX[0]);
    }
  
    result->ready();
    return result;
  }
Exemplo n.º 2
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    Ppattern_ = Ppattern;

    const RCP<const Map> uniqueMap    = Ppattern_->getDomainMap();
    const RCP<const Map> nonUniqueMap = Ppattern_->getColMap();
    RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);

    const size_t NSDim = Bc.getNumVectors();
    X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim);
    X_->doImport(Bc, *importer, Xpetra::INSERT);

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);
    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[indices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false);

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(),
                locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(),
                XXtInv_[i].values(), XXtInv_[i].stride());

      Teuchos::LAPACK<LO,SC> lapack;
      LO info, lwork = 3*NSDim;
      ArrayRCP<LO> IPIV(NSDim);
      ArrayRCP<SC> WORK(lwork);
      lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info);
      lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info);
    }
  }
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    const size_t NSDim = Bc.getNumVectors();

    Ppattern_ = Ppattern;

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);

    RCP<const Import> importer = Ppattern_->getImporter();

    X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
    if (!importer.is_null())
      X_->doImport(Bc, *importer, Xpetra::INSERT);
    else
      *X_ = Bc;

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    SC zero = Teuchos::ScalarTraits<SC>::zero();
    SC one  = Teuchos::ScalarTraits<SC>::one();

    Teuchos::BLAS  <LO,SC> blas;
    Teuchos::LAPACK<LO,SC> lapack;
    LO lwork = 3*NSDim;
    ArrayRCP<LO> IPIV(NSDim);
    ArrayRCP<SC> WORK(lwork);

    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/);
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      if (NSDim == 1) {
        SC d = zero;
        for (size_t j = 0; j < nnz; j++)
          d += Xval[0][indices[j]] * Xval[0][indices[j]];
        XXtInv(0,0) = one/d;

      } else {
        Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/);
        for (size_t j = 0; j < nnz; j++)
          for (size_t k = 0; k < NSDim; k++)
            locX(k,j) = Xval[k][indices[j]];

        // XXtInv_ = (locX*locX^T)^{-1}
        blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                   one,   locX.values(),   locX.stride(),
                          locX.values(),   locX.stride(),
                  zero, XXtInv.values(), XXtInv.stride());

        LO info;
        // Compute LU factorization using partial pivoting with row exchanges
        lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info);
        // Use the computed factorization to compute the inverse
        lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info);
      }
    }
  }
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(const Matrix& P, Matrix& Projected) const {
    // We check only row maps. Column may be different.
    TEUCHOS_TEST_FOR_EXCEPTION(!P.getRowMap()->isSameAs(*Projected.getRowMap()), Exceptions::Incompatible,
                               "Row maps are incompatible");
    const size_t NSDim   = X_->getNumVectors();
    const size_t numRows = P.getNodeNumRows();

    const Map& colMap  = *P.getColMap();
    const Map& PColMap = *Projected.getColMap();

    Projected.resumeFill();

    Teuchos::ArrayView<const LO> indices, pindices;
    Teuchos::ArrayView<const SC> values,  pvalues;
    Teuchos::Array<SC> valuesAll(colMap.getNodeNumElements()), newValues;

    LO invalid = Teuchos::OrdinalTraits<LO>::invalid();
    LO oneLO   = Teuchos::OrdinalTraits<LO>::one();
    SC zero    = Teuchos::ScalarTraits<SC> ::zero();
    SC one     = Teuchos::ScalarTraits<SC> ::one();

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    for (size_t i = 0; i < numRows; i++) {
      P        .getLocalRowView(i,  indices,  values);
      Projected.getLocalRowView(i, pindices, pvalues);

      size_t nnz  = indices.size();     // number of nonzeros in the supplied matrix
      size_t pnnz = pindices.size();    // number of nonzeros in the constrained matrix

      newValues.resize(pnnz);

      // Step 1: fix stencil
      // Projected *must* already have the correct stencil

      // Step 2: copy correct stencil values
      // The algorithm is very similar to the one used in the calculation of
      // Frobenius dot product, see src/Transfers/Energy-Minimization/Solvers/MueLu_CGSolver_def.hpp

      // NOTE: using extra array allows us to skip the search among indices
      for (size_t j = 0; j < nnz; j++)
        valuesAll[indices[j]] = values[j];
      for (size_t j = 0; j < pnnz; j++) {
        LO ind = colMap.getLocalElement(PColMap.getGlobalElement(pindices[j])); // FIXME: we could do that before the full loop just once
        if (ind != invalid)
          // index indices[j] is part of template, copy corresponding value
          newValues[j] = valuesAll[ind];
        else
          newValues[j] = zero;
      }
      for (size_t j = 0; j < nnz; j++)
        valuesAll[indices[j]] = zero;

      // Step 3: project to the space
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, pnnz, false);
      for (size_t j = 0; j < pnnz; j++)
        for (size_t k = 0; k < NSDim; k++)
          locX(k,j) = Xval[k][pindices[j]];

      Teuchos::SerialDenseVector<LO,SC> val(pnnz, false), val1(NSDim, false), val2(NSDim, false);
      for (size_t j = 0; j < pnnz; j++)
        val[j] = newValues[j];

      Teuchos::BLAS<LO,SC> blas;
      // val1 = locX * val;
      blas.GEMV(Teuchos::NO_TRANS, NSDim, pnnz,
                one, locX.values(), locX.stride(),
                val.values(), oneLO,
                zero, val1.values(), oneLO);
      // val2 = XXtInv * val1
      blas.GEMV(Teuchos::NO_TRANS, NSDim, NSDim,
                one, XXtInv.values(), XXtInv.stride(),
                val1.values(), oneLO,
                zero,   val2.values(), oneLO);
      // val = X^T * val2
      blas.GEMV(Teuchos::CONJ_TRANS, NSDim, pnnz,
                one, locX.values(), locX.stride(),
                val2.values(), oneLO,
                zero,  val.values(), oneLO);

      for (size_t j = 0; j < pnnz; j++)
        newValues[j] -= val[j];

      Projected.replaceLocalValues(i, pindices, newValues);
    }

    Projected.fillComplete(Projected.getDomainMap(), Projected.getRangeMap()); //FIXME: maps needed?
  }
Exemplo n.º 5
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Apply(const Matrix& P, Matrix& Projected) const {
    const size_t NSDim   = X_->getNumVectors();
    const size_t numRows = P.getNodeNumRows();

    Projected.resumeFill();

    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices, pindices;
      Teuchos::ArrayView<const SC> values,  pvalues;

      P        .getLocalRowView(i,  indices,  values);
      Projected.getLocalRowView(i, pindices, pvalues);

      size_t nnz  = pindices.size();    // number of nonzeros in the constrained matrix
      size_t nnz1 = indices.size();     // number of nonzeros in the supplied matrix

      Teuchos::Array<SC> newValues(nnz, Teuchos::ScalarTraits<SC>::zero());

      // step 1: fix stencil
      // Projected already has the correct stencil

      // step 2: copy correct stencil values
      for (size_t j = 0; j < nnz1; j++) {
        // this might be accelerated if we know smth about ordering
        size_t k = 0;
        for (; k < nnz; k++)
          if (pindices[k] == indices[j])
            break;
        if (k != nnz) {
          // index indices[j] is part of template, copy corresponding value
          newValues[k] = values[j];
        }
      }

      // step 3: project to the space
      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[pindices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      Teuchos::SerialDenseVector<LO,SC> val(nnz, false), val1(NSDim, false), val2(NSDim, false);
      for (size_t j = 0; j < nnz; j++)
        val[j] = newValues[j];

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMV(Teuchos::NO_TRANS, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(),
                locX.stride(), val.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val1.values(), (LO)1);
      blas.GEMV(Teuchos::NO_TRANS, NSDim, NSDim, Teuchos::ScalarTraits<SC>::one(), XXtInv_[i].values(),
                XXtInv_[i].stride(), val1.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val2.values(), (LO)1);
      blas.GEMV(Teuchos::CONJ_TRANS, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(),
                locX.stride(), val2.values(), (LO)1, Teuchos::ScalarTraits<SC>::zero(), val.values(), (LO)1);

      for (size_t j = 0; j < nnz; j++)
        newValues[j] -= val[j];

      Projected.replaceLocalValues(i, pindices, newValues);
    }

    Projected.fillComplete(Projected.getDomainMap(), Projected.getRangeMap()); //FIXME: maps needed?
  }