Exemplo n.º 1
0
    /// \brief Rank-revealing decomposition
    ///
    /// Using the R factor and explicit Q factor from
    /// factorExplicit(), compute the singular value decomposition
    /// (SVD) of R: \f$R = U \Sigma V^*\f$.  If R is full rank (with
    /// respect to the given relative tolerance \c tol), do not modify
    /// Q or R.  Otherwise, compute \f$Q := Q \cdot U\f$ and \f$R :=
    /// \Sigma V^*\f$ in place.  If R was modified, then it may not
    /// necessarily be upper triangular on output.
    ///
    /// \param Q [in/out] On input: explicit Q factor computed by
    ///   factorExplicit().  (Must be an orthogonal resp. unitary
    ///   matrix.)  On output: If R is of full numerical rank with
    ///   respect to the tolerance tol, Q is unmodified.  Otherwise, Q
    ///   is updated so that the first \c rank columns of Q are a
    ///   basis for the column space of A (the original matrix whose
    ///   QR factorization was computed by factorExplicit()).  The
    ///   remaining columns of Q are a basis for the null space of A.
    ///
    /// \param R [in/out] On input: N by N upper triangular matrix
    ///   with leading dimension LDR >= N.  On output: if input is
    ///   full rank, R is unchanged on output.  Otherwise, if \f$R = U
    ///   \Sigma V^*\f$ is the SVD of R, on output R is overwritten
    ///   with \f$\Sigma \cdot V^*\f$.  This is also an N by N matrix,
    ///   but it may not necessarily be upper triangular.
    ///
    /// \param tol [in] Relative tolerance for computing the numerical
    ///   rank of the matrix R.
    ///
    /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq N\f$.
    int
    revealRank (MV& Q,
                dense_matrix_type& R,
                const magnitude_type& tol)
    {
      TEUCHOS_TEST_FOR_EXCEPTION
        (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "revealRank: Input MultiVector Q must have constant stride.");
      prepareTsqr (Q); // Finish initializing TSQR.
      // FIXME (mfh 18 Oct 2010) Check Teuchos::Comm<int> object in Q
      // to make sure it is the same communicator as the one we are
      // using in our dist_tsqr_type implementation.

      Q.template sync<Kokkos::HostSpace> ();
      Q.template modify<Kokkos::HostSpace> ();
      auto Q_view = Q.template getLocalView<Kokkos::HostSpace> ();
      scalar_type* const Q_ptr =
        reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ());
      const bool contiguousCacheBlocks = false;
      return tsqr_->revealRankRaw (Q_view.dimension_0 (),
                                   Q_view.dimension_1 (),
                                   Q_ptr, Q.getStride (),
                                   R.values (), R.stride (),
                                   tol, contiguousCacheBlocks);
    }
Exemplo n.º 2
0
      /// \brief Compute QR factorization of the multivector A.
      ///
      /// Compute the QR factorization in place of the multivector A.
      /// The Q factor is represented implicitly; part of that is
      /// stored in place in A (overwriting the input), and the other
      /// part is returned.  The returned object as well as the
      /// representation in A are both inputs of \c explicitQ().  The R
      /// factor is copied into R.
      ///
      /// \param A [in/out] On input, the multivector whose QR
      ///   factorization is to be computed.  Overwritten on output
      ///   with part of the implicit representation of the Q factor.
      ///
      /// \param R [out] On output, the R factor from the QR
      ///   factorization of A.  Represented as a square dense matrix
      ///   (not in packed form) with the same number of columns as A.
      ///   The lower triangle of R is overwritten with zeros on
      ///   output.
      ///
      /// \param contiguousCacheBlocks [in] Whether the data in A has
      ///   been reorganized so that the elements of each cache block
      ///   are stored contiguously (i.e., via the output of
      ///   cacheBlock()).  The default is false, which means that
      ///   each process' row block of A is stored as a matrix in
      ///   column-major order, with leading dimension >= the number
      ///   of rows in the row block.
      ///
      /// \return Additional information that, together with the A
      ///   output, encodes the implicitly represented Q factor from
      ///   the QR factorization of the A input.
      ///
      /// \note Virtual but implemented, because this default
      /// implementation is correct for all multivector_type types,
      /// but not necessarily efficient.  It should be efficient if
      /// fetchNonConstView(A) does not require copying the contents
      /// of A (e.g., from GPU memory to CPU memory).
      virtual factor_output_type
      factor (multivector_type& A, 
	      dense_matrix_type& R,
	      const bool contiguousCacheBlocks = false)
      {
	// Lazily init the intranode part of TSQR if necessary.
	initNodeTsqr (A);

	local_ordinal_type nrowsLocal, ncols, LDA;
	fetchDims (A, nrowsLocal, ncols, LDA);
	// This is guaranteed to be _correct_ for any Node type, but
	// won't necessary be efficient.  The desired model is that
	// A_local requires no copying.
	Teuchos::ArrayRCP< scalar_type > A_local = fetchNonConstView (A);

	// Reshape R if necessary.  This operation zeros out all the
	// entries of R, which is what we want anyway.
	if (R.numRows() != ncols || R.numCols() != ncols)
	  {
	    if (0 != R.shape (ncols, ncols))
	      throw std::runtime_error ("Failed to reshape matrix R");
	  }
	return pTsqr_->factor (nrowsLocal, ncols, A_local.get(), LDA, 
			       R.values(), R.stride(), contiguousCacheBlocks);
      }
Exemplo n.º 3
0
    /// \brief Compute QR factorization [Q,R] = qr(A,0).
    ///
    /// \param A [in/out] On input: the multivector to factor.
    ///   Overwritten with garbage on output.
    ///
    /// \param Q [out] On output: the (explicitly stored) Q factor in
    ///   the QR factorization of the (input) multivector A.
    ///
    /// \param R [out] On output: the R factor in the QR factorization
    ///   of the (input) multivector A.
    ///
    /// \param forceNonnegativeDiagonal [in] If true, then (if
    ///   necessary) do extra work (modifying both the Q and R
    ///   factors) in order to force the R factor to have a
    ///   nonnegative diagonal.
    ///
    /// \warning Currently, this method only works if A and Q have the
    ///   same communicator and row distribution ("Map," in Petra
    ///   terms) as those of the multivector given to this adapter
    ///   instance's constructor.  Otherwise, the result of this
    ///   method is undefined.
    void
    factorExplicit (MV& A,
                    MV& Q,
                    dense_matrix_type& R,
                    const bool forceNonnegativeDiagonal=false)
    {
      TEUCHOS_TEST_FOR_EXCEPTION
        (! A.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "factorExplicit: Input MultiVector A must have constant stride.");
      TEUCHOS_TEST_FOR_EXCEPTION
        (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "factorExplicit: Input MultiVector Q must have constant stride.");
      prepareTsqr (Q); // Finish initializing TSQR.

      // FIXME (mfh 16 Jan 2016) Currently, TSQR is a host-only
      // implementation.
      A.template sync<Kokkos::HostSpace> ();
      A.template modify<Kokkos::HostSpace> ();
      Q.template sync<Kokkos::HostSpace> ();
      Q.template modify<Kokkos::HostSpace> ();
      auto A_view = A.template getLocalView<Kokkos::HostSpace> ();
      auto Q_view = Q.template getLocalView<Kokkos::HostSpace> ();
      scalar_type* const A_ptr =
        reinterpret_cast<scalar_type*> (A_view.ptr_on_device ());
      scalar_type* const Q_ptr =
        reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ());
      const bool contiguousCacheBlocks = false;
      tsqr_->factorExplicitRaw (A_view.dimension_0 (),
                                A_view.dimension_1 (),
                                A_ptr, A.getStride (),
                                Q_ptr, Q.getStride (),
                                R.values (), R.stride (),
                                contiguousCacheBlocks,
                                forceNonnegativeDiagonal);
    }
Exemplo n.º 4
0
      /// \brief Rank-revealing decomposition.
      ///
      /// Using the R factor from factor() and the explicit Q factor
      /// from explicitQ(), compute the SVD of R (\f$R = U \Sigma
      /// V^*\f$).  R.  If R is full rank (with respect to the given
      /// relative tolerance), don't change Q or R.  Otherwise,
      /// compute \f$Q := Q \cdot U\f$ and \f$R := \Sigma V^*\f$ in
      /// place (the latter may be no longer upper triangular).
      ///
      /// \param Q [in/out] On input: the explicit Q factor computed
      ///   by explicitQ().  On output: unchanged if R has full
      ///   (numerical) rank, else \f$Q := Q \cdot U\f$, where \f$U\f$
      ///   is the ncols by ncols matrix of R's left singular vectors.
      ///
      /// \param R [in/out] On input: ncols by ncols upper triangular
      ///   matrix stored in column-major order.  On output: if input
      ///   has full (numerical) rank, R is unchanged on output.
      ///   Otherwise, if \f$R = U \Sigma V^*\f$ is the SVD of R, on
      ///   output R is overwritten with \f$\Sigma \cdot V^*\f$.  This
      ///   is also an ncols by ncols matrix, but may not necessarily
      ///   be upper triangular.
      ///
      /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq ncols\f$.
      ///
      local_ordinal_type
      revealRank (multivector_type& Q,
		  dense_matrix_type& R,
		  const magnitude_type relativeTolerance,
		  const bool contiguousCacheBlocks = false) const
      {
	using Teuchos::ArrayRCP;

	// Lazily init the intranode part of TSQR if necessary.
	initNodeTsqr (Q);

	local_ordinal_type nrowsLocal, ncols, ldqLocal;
	fetchDims (Q, nrowsLocal, ncols, ldqLocal);

	ArrayRCP< scalar_type > Q_ptr = fetchNonConstView (Q);
	return pTsqr_->reveal_rank (nrowsLocal, ncols, 
				    Q_ptr.get(), ldqLocal,
				    R.values(), R.stride(), 
				    relativeTolerance, 
				    contiguousCacheBlocks);
      }