/// \brief Compute QR factorization [Q,R] = qr(A,0). /// /// \param A [in/out] On input: the multivector to factor. /// Overwritten with garbage on output. /// /// \param Q [out] On output: the (explicitly stored) Q factor in /// the QR factorization of the (input) multivector A. /// /// \param R [out] On output: the R factor in the QR factorization /// of the (input) multivector A. /// /// \param forceNonnegativeDiagonal [in] If true, then (if /// necessary) do extra work (modifying both the Q and R /// factors) in order to force the R factor to have a /// nonnegative diagonal. /// /// \warning Currently, this method only works if A and Q have the /// same communicator and row distribution ("Map," in Petra /// terms) as those of the multivector given to this adapter /// instance's constructor. Otherwise, the result of this /// method is undefined. void factorExplicit (MV& A, MV& Q, dense_matrix_type& R, const bool forceNonnegativeDiagonal=false) { TEUCHOS_TEST_FOR_EXCEPTION (! A.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector A must have constant stride."); TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 16 Jan 2016) Currently, TSQR is a host-only // implementation. A.template sync<Kokkos::HostSpace> (); A.template modify<Kokkos::HostSpace> (); Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto A_view = A.template getLocalView<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const A_ptr = reinterpret_cast<scalar_type*> (A_view.ptr_on_device ()); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; tsqr_->factorExplicitRaw (A_view.dimension_0 (), A_view.dimension_1 (), A_ptr, A.getStride (), Q_ptr, Q.getStride (), R.values (), R.stride (), contiguousCacheBlocks, forceNonnegativeDiagonal); }
/// \brief Rank-revealing decomposition /// /// Using the R factor and explicit Q factor from /// factorExplicit(), compute the singular value decomposition /// (SVD) of R: \f$R = U \Sigma V^*\f$. If R is full rank (with /// respect to the given relative tolerance \c tol), do not modify /// Q or R. Otherwise, compute \f$Q := Q \cdot U\f$ and \f$R := /// \Sigma V^*\f$ in place. If R was modified, then it may not /// necessarily be upper triangular on output. /// /// \param Q [in/out] On input: explicit Q factor computed by /// factorExplicit(). (Must be an orthogonal resp. unitary /// matrix.) On output: If R is of full numerical rank with /// respect to the tolerance tol, Q is unmodified. Otherwise, Q /// is updated so that the first \c rank columns of Q are a /// basis for the column space of A (the original matrix whose /// QR factorization was computed by factorExplicit()). The /// remaining columns of Q are a basis for the null space of A. /// /// \param R [in/out] On input: N by N upper triangular matrix /// with leading dimension LDR >= N. On output: if input is /// full rank, R is unchanged on output. Otherwise, if \f$R = U /// \Sigma V^*\f$ is the SVD of R, on output R is overwritten /// with \f$\Sigma \cdot V^*\f$. This is also an N by N matrix, /// but it may not necessarily be upper triangular. /// /// \param tol [in] Relative tolerance for computing the numerical /// rank of the matrix R. /// /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq N\f$. int revealRank (MV& Q, dense_matrix_type& R, const magnitude_type& tol) { TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "revealRank: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 18 Oct 2010) Check Teuchos::Comm<int> object in Q // to make sure it is the same communicator as the one we are // using in our dist_tsqr_type implementation. Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; return tsqr_->revealRankRaw (Q_view.dimension_0 (), Q_view.dimension_1 (), Q_ptr, Q.getStride (), R.values (), R.stride (), tol, contiguousCacheBlocks); }
/// \brief Extract A's underlying KokkosClassic::MultiVector instance. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. If A has nonconstant stride, this /// method will throw an exception. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument, "TSQR does not currently support Tpetra::MultiVector " "inputs that do not have constant stride."); return A.getLocalMVNonConst(); }
/// \brief Extract A's underlying KokkosClassic::MultiVector instance. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. If A has nonconstant stride, this /// method will throw an exception. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument, "TSQR does not currently support Tpetra::MultiVector " "inputs that do not have constant stride."); typedef typename Teuchos::ArrayRCP<mp_scalar_type>::size_type size_type; typedef typename MV::dual_view_type view_type; typedef typename view_type::t_dev::array_type flat_array_type; // Create new Kokkos::MultiVector reinterpreting the data as a longer // array of the base scalar type // Create new ArrayRCP holding data view_type pce_mv = A.getDualView(); flat_array_type flat_mv = pce_mv.d_view; const size_t num_rows = flat_mv.dimension_0(); const size_t num_cols = flat_mv.dimension_1(); const size_t size = num_rows * num_cols; ArrayRCP<scalar_type> vals = Teuchos::arcp(flat_mv.ptr_on_device(), size_type(0), size, false); // Create new MultiVector // Owing to the above comment, we don't need to worry about // non-constant stride size_t strides[2]; flat_mv.stride(strides); const size_t stride = strides[0]; KokkosClassic::MultiVector<scalar_type, node_type> mv(A.getMap()->getNode()); mv.initializeValues(num_rows, num_cols, vals, stride); return mv; }
//! Do the transpose or conjugate transpose solve. void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const { typedef Teuchos::ScalarTraits<Scalar> ST; using Teuchos::null; TEUCHOS_TEST_FOR_EXCEPTION (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error, "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor " "CONJ_TRANS. Should never get here! Please report this bug to the " "Tpetra developers."); const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors() != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); } } // solve(TRANS): DomainMap -> RangeMap // lclMatSolve_(TRANS): ColMap -> RowMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = importer o lclMatSolve_ o exporter // Domainmap -> ColMap -> RowMap -> RangeMap // // If we have a non-trivial importer, we must import elements that // are permuted or are on other processes. if (importer != null) { importMV_->doImport(X_in,*importer,INSERT); X = importMV_; } else if (X_in.isConstantStride() == false) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp(new MV(X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial exporter, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV; get a view. if (exporter != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_, Teuchos::CONJ_TRANS); // Make sure target is zero: necessary because we are adding Y_in.putScalar(ST::zero()); Y_in.doExport(*importMV_, *importer, ADD); } // otherwise, solve into Y else { if (Y_in.isConstantStride() == false) { // generate a strided copy of Y MV Y(Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS); Y_in = Y; } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS); } } }
//! Do the non-transpose solve. void applyNonTranspose (const MV& X_in, MV& Y_in) const { using Teuchos::NO_TRANS; using Teuchos::null; typedef Teuchos::ScalarTraits<Scalar> ST; // Solve U X = Y or L X = Y // X belongs to domain map, while Y belongs to range map const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors () != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors)); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors)); } } // solve(NO_TRANS): RangeMap -> DomainMap // lclMatSolve_: RowMap -> ColMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = reverse(exporter) o lclMatSolve_ o reverse(importer) // RangeMap -> RowMap -> ColMap -> DomainMap // // If we have a non-trivial exporter, we must import elements that // are permuted or are on other processors if (exporter != null) { exportMV_->doImport (X_in, *exporter, INSERT); X = exportMV_; } else if (! X_in.isConstantStride ()) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp (new MV (X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial importer, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV. if (importer != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS); // Make sure target is zero: necessary because we are adding. Y_in.putScalar (ST::zero ()); Y_in.doExport (*importMV_, *importer, ADD); } // otherwise, solve into Y else { // can't solve into non-strided multivector if (! Y_in.isConstantStride ()) { // generate a strided copy of Y MV Y (Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS); Tpetra::deep_copy (Y_in, Y); } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS); } } }