/// \brief Rank-revealing decomposition /// /// Using the R factor and explicit Q factor from /// factorExplicit(), compute the singular value decomposition /// (SVD) of R: \f$R = U \Sigma V^*\f$. If R is full rank (with /// respect to the given relative tolerance \c tol), do not modify /// Q or R. Otherwise, compute \f$Q := Q \cdot U\f$ and \f$R := /// \Sigma V^*\f$ in place. If R was modified, then it may not /// necessarily be upper triangular on output. /// /// \param Q [in/out] On input: explicit Q factor computed by /// factorExplicit(). (Must be an orthogonal resp. unitary /// matrix.) On output: If R is of full numerical rank with /// respect to the tolerance tol, Q is unmodified. Otherwise, Q /// is updated so that the first \c rank columns of Q are a /// basis for the column space of A (the original matrix whose /// QR factorization was computed by factorExplicit()). The /// remaining columns of Q are a basis for the null space of A. /// /// \param R [in/out] On input: N by N upper triangular matrix /// with leading dimension LDR >= N. On output: if input is /// full rank, R is unchanged on output. Otherwise, if \f$R = U /// \Sigma V^*\f$ is the SVD of R, on output R is overwritten /// with \f$\Sigma \cdot V^*\f$. This is also an N by N matrix, /// but it may not necessarily be upper triangular. /// /// \param tol [in] Relative tolerance for computing the numerical /// rank of the matrix R. /// /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq N\f$. int revealRank (MV& Q, dense_matrix_type& R, const magnitude_type& tol) { TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "revealRank: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 18 Oct 2010) Check Teuchos::Comm<int> object in Q // to make sure it is the same communicator as the one we are // using in our dist_tsqr_type implementation. Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; return tsqr_->revealRankRaw (Q_view.dimension_0 (), Q_view.dimension_1 (), Q_ptr, Q.getStride (), R.values (), R.stride (), tol, contiguousCacheBlocks); }
/// \brief Compute QR factorization of the multivector A. /// /// Compute the QR factorization in place of the multivector A. /// The Q factor is represented implicitly; part of that is /// stored in place in A (overwriting the input), and the other /// part is returned. The returned object as well as the /// representation in A are both inputs of \c explicitQ(). The R /// factor is copied into R. /// /// \param A [in/out] On input, the multivector whose QR /// factorization is to be computed. Overwritten on output /// with part of the implicit representation of the Q factor. /// /// \param R [out] On output, the R factor from the QR /// factorization of A. Represented as a square dense matrix /// (not in packed form) with the same number of columns as A. /// The lower triangle of R is overwritten with zeros on /// output. /// /// \param contiguousCacheBlocks [in] Whether the data in A has /// been reorganized so that the elements of each cache block /// are stored contiguously (i.e., via the output of /// cacheBlock()). The default is false, which means that /// each process' row block of A is stored as a matrix in /// column-major order, with leading dimension >= the number /// of rows in the row block. /// /// \return Additional information that, together with the A /// output, encodes the implicitly represented Q factor from /// the QR factorization of the A input. /// /// \note Virtual but implemented, because this default /// implementation is correct for all multivector_type types, /// but not necessarily efficient. It should be efficient if /// fetchNonConstView(A) does not require copying the contents /// of A (e.g., from GPU memory to CPU memory). virtual factor_output_type factor (multivector_type& A, dense_matrix_type& R, const bool contiguousCacheBlocks = false) { // Lazily init the intranode part of TSQR if necessary. initNodeTsqr (A); local_ordinal_type nrowsLocal, ncols, LDA; fetchDims (A, nrowsLocal, ncols, LDA); // This is guaranteed to be _correct_ for any Node type, but // won't necessary be efficient. The desired model is that // A_local requires no copying. Teuchos::ArrayRCP< scalar_type > A_local = fetchNonConstView (A); // Reshape R if necessary. This operation zeros out all the // entries of R, which is what we want anyway. if (R.numRows() != ncols || R.numCols() != ncols) { if (0 != R.shape (ncols, ncols)) throw std::runtime_error ("Failed to reshape matrix R"); } return pTsqr_->factor (nrowsLocal, ncols, A_local.get(), LDA, R.values(), R.stride(), contiguousCacheBlocks); }
/// \brief Compute QR factorization [Q,R] = qr(A,0). /// /// \param A [in/out] On input: the multivector to factor. /// Overwritten with garbage on output. /// /// \param Q [out] On output: the (explicitly stored) Q factor in /// the QR factorization of the (input) multivector A. /// /// \param R [out] On output: the R factor in the QR factorization /// of the (input) multivector A. /// /// \param forceNonnegativeDiagonal [in] If true, then (if /// necessary) do extra work (modifying both the Q and R /// factors) in order to force the R factor to have a /// nonnegative diagonal. /// /// \warning Currently, this method only works if A and Q have the /// same communicator and row distribution ("Map," in Petra /// terms) as those of the multivector given to this adapter /// instance's constructor. Otherwise, the result of this /// method is undefined. void factorExplicit (MV& A, MV& Q, dense_matrix_type& R, const bool forceNonnegativeDiagonal=false) { TEUCHOS_TEST_FOR_EXCEPTION (! A.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector A must have constant stride."); TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 16 Jan 2016) Currently, TSQR is a host-only // implementation. A.template sync<Kokkos::HostSpace> (); A.template modify<Kokkos::HostSpace> (); Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto A_view = A.template getLocalView<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const A_ptr = reinterpret_cast<scalar_type*> (A_view.ptr_on_device ()); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; tsqr_->factorExplicitRaw (A_view.dimension_0 (), A_view.dimension_1 (), A_ptr, A.getStride (), Q_ptr, Q.getStride (), R.values (), R.stride (), contiguousCacheBlocks, forceNonnegativeDiagonal); }
/// \brief Rank-revealing decomposition. /// /// Using the R factor from factor() and the explicit Q factor /// from explicitQ(), compute the SVD of R (\f$R = U \Sigma /// V^*\f$). R. If R is full rank (with respect to the given /// relative tolerance), don't change Q or R. Otherwise, /// compute \f$Q := Q \cdot U\f$ and \f$R := \Sigma V^*\f$ in /// place (the latter may be no longer upper triangular). /// /// \param Q [in/out] On input: the explicit Q factor computed /// by explicitQ(). On output: unchanged if R has full /// (numerical) rank, else \f$Q := Q \cdot U\f$, where \f$U\f$ /// is the ncols by ncols matrix of R's left singular vectors. /// /// \param R [in/out] On input: ncols by ncols upper triangular /// matrix stored in column-major order. On output: if input /// has full (numerical) rank, R is unchanged on output. /// Otherwise, if \f$R = U \Sigma V^*\f$ is the SVD of R, on /// output R is overwritten with \f$\Sigma \cdot V^*\f$. This /// is also an ncols by ncols matrix, but may not necessarily /// be upper triangular. /// /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq ncols\f$. /// local_ordinal_type revealRank (multivector_type& Q, dense_matrix_type& R, const magnitude_type relativeTolerance, const bool contiguousCacheBlocks = false) const { using Teuchos::ArrayRCP; // Lazily init the intranode part of TSQR if necessary. initNodeTsqr (Q); local_ordinal_type nrowsLocal, ncols, ldqLocal; fetchDims (Q, nrowsLocal, ncols, ldqLocal); ArrayRCP< scalar_type > Q_ptr = fetchNonConstView (Q); return pTsqr_->reveal_rank (nrowsLocal, ncols, Q_ptr.get(), ldqLocal, R.values(), R.stride(), relativeTolerance, contiguousCacheBlocks); }