示例#1
0
 void printVec(const Kokkos::MultiVector<float,Node> &vec) {
   const int n = vec.getNumRows();
   ArrayRCP<const float> vals = vec.getValues(0);
   ArrayRCP<const float> vals_h = vec.getNode()->viewBuffer(n,vals);
   for (int i=0; i<n; ++i) {
     cout << "   " << vals_h[i];
   }
   cout << endl;
 }
示例#2
0
    LocalOrdinal
    revealRank (Kokkos::MultiVector<Scalar, NodeType>& Q,
		Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R,
		const magnitude_type& tol,
		const bool contiguousCacheBlocks = false) const
    {
      typedef Kokkos::MultiVector<Scalar, NodeType> KMV;

      const LocalOrdinal nrows = static_cast<LocalOrdinal> (Q.getNumRows());
      const LocalOrdinal ncols = static_cast<LocalOrdinal> (Q.getNumCols());
      const LocalOrdinal ldq = static_cast<LocalOrdinal> (Q.getStride());
      Teuchos::ArrayRCP<Scalar> Q_ptr = Q.getValuesNonConst();

      // Take the easy exit if available.
      if (ncols == 0)
	return 0;

      //
      // FIXME (mfh 16 Jul 2010) We _should_ compute the SVD of R (as
      // the copy B) on Proc 0 only.  This would ensure that all
      // processors get the same SVD and rank (esp. in a heterogeneous
      // computing environment).  For now, we just do this computation
      // redundantly, and hope that all the returned rank values are
      // the same.
      //
      matrix_type U (ncols, ncols, STS::zero());
      const ordinal_type rank = 
	reveal_R_rank (ncols, R.values(), R.stride(), 
		       U.get(), U.lda(), tol);
      if (rank < ncols)
	{
	  // cerr << ">>> Rank of R: " << rank << " < ncols=" << ncols << endl;
	  // cerr << ">>> Resulting U:" << endl;
	  // print_local_matrix (cerr, ncols, ncols, R, ldr);
	  // cerr << endl;

	  // If R is not full rank: reveal_R_rank() already computed
	  // the SVD \f$R = U \Sigma V^*\f$ of (the input) R, and
	  // overwrote R with \f$\Sigma V^*\f$.  Now, we compute \f$Q
	  // := Q \cdot U\f$, respecting cache blocks of Q.
	  Q_times_B (nrows, ncols, Q_ptr.getRawPtr(), ldq, 
		     U.get(), U.lda(), contiguousCacheBlocks);
	}
      return rank;
    }
示例#3
0
    void
    factorExplicit (Kokkos::MultiVector<Scalar, NodeType>& A,
		    Kokkos::MultiVector<Scalar, NodeType>& Q,
		    Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R,
		    const bool contiguousCacheBlocks,
		    const bool forceNonnegativeDiagonal=false)
    {
      using Teuchos::asSafe;
      typedef Kokkos::MultiVector<Scalar, NodeType> KMV;

      // Tsqr currently likes LocalOrdinal ordinals, but
      // Kokkos::MultiVector has size_t ordinals.  Do conversions
      // here.  
      //
      // Teuchos::asSafe() can do safe conversion (e.g., checking for
      // overflow when casting to a narrower integer type), if a
      // custom specialization is defined for
      // Teuchos::ValueTypeConversionTraits<size_t, LocalOrdinal>.
      // Otherwise, this has the same (potentially) unsafe effect as
      // static_cast<LocalOrdinal>(...) would have.
      const LocalOrdinal A_numRows = asSafe<LocalOrdinal> (A.getNumRows());
      const LocalOrdinal A_numCols = asSafe<LocalOrdinal> (A.getNumCols());
      const LocalOrdinal A_stride = asSafe<LocalOrdinal> (A.getStride());
      const LocalOrdinal Q_numRows = asSafe<LocalOrdinal> (Q.getNumRows());
      const LocalOrdinal Q_numCols = asSafe<LocalOrdinal> (Q.getNumCols());
      const LocalOrdinal Q_stride = asSafe<LocalOrdinal> (Q.getStride());

      // Sanity checks for matrix dimensions
      if (A_numRows < A_numCols) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A has " << A_numRows 
	   << " local rows, and " << A_numCols << " columns.  The input "
	  "matrix must have at least as many rows on each processor as "
	  "there are columns.";
	throw std::invalid_argument(os.str());
      } else if (A_numRows != Q_numRows) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A and output matrix Q "
	  "must have the same number of rows.  A has " << A_numRows << " rows"
	  " and Q has " << Q_numRows << " rows.";
	throw std::invalid_argument(os.str());
      } else if (R.numRows() < R.numCols()) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: output matrix R must have at least "
	  "as many rows as columns.  R has " << R.numRows() << " rows and "
	   << R.numCols() << " columns.";
	throw std::invalid_argument(os.str());
      } else if (A_numCols != R.numCols()) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A and output matrix R "
	  "must have the same number of columns.  A has " << A_numCols 
	   << " columns and R has " << R.numCols() << " columns.";
	throw std::invalid_argument(os.str());
      }

      // Check for quick exit, based on matrix dimensions
      if (Q_numCols == 0)
	return;

      // Hold on to nonconst views of A and Q.  This will make TSQR
      // correct (if perhaps inefficient) for all possible Kokkos Node
      // types, even GPU nodes.
      Teuchos::ArrayRCP<scalar_type> A_ptr = A.getValuesNonConst();
      Teuchos::ArrayRCP<scalar_type> Q_ptr = Q.getValuesNonConst();

      R.putScalar (STS::zero());
      NodeOutput nodeResults = 
	nodeTsqr_->factor (A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride,
			   R.values(), R.stride(), contiguousCacheBlocks);
      // FIXME (mfh 19 Oct 2010) Replace actions on raw pointer with
      // actions on the Kokkos::MultiVector or at least the ArrayRCP.
      nodeTsqr_->fill_with_zeros (Q_numRows, Q_numCols, 
				  Q_ptr.getRawPtr(), Q_stride,
				  contiguousCacheBlocks);
      matview_type Q_rawView (Q_numRows, Q_numCols, 
			      Q_ptr.getRawPtr(), Q_stride);
      matview_type Q_top_block = 
	nodeTsqr_->top_block (Q_rawView, contiguousCacheBlocks);
      if (Q_top_block.nrows() < R.numCols()) {
	std::ostringstream os;
	os << "The top block of Q has too few rows.  This means that the "
	   << "the intranode TSQR implementation has a bug in its top_block"
	   << "() method.  The top block should have at least " << R.numCols()
	   << " rows, but instead has only " << Q_top_block.ncols() 
	   << " rows.";
	throw std::logic_error (os.str());
      }
      {
	matview_type Q_top (R.numCols(), Q_numCols, Q_top_block.get(), 
			    Q_top_block.lda());
	matview_type R_view (R.numRows(), R.numCols(), R.values(), R.stride());
	distTsqr_->factorExplicit (R_view, Q_top, forceNonnegativeDiagonal);
      }
      nodeTsqr_->apply (ApplyType::NoTranspose, 
			A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride,
			nodeResults, Q_numCols, Q_ptr.getRawPtr(), Q_stride,
			contiguousCacheBlocks);

      // If necessary, force the R factor to have a nonnegative diagonal.
      if (forceNonnegativeDiagonal && 
	  ! QR_produces_R_factor_with_nonnegative_diagonal()) {
	details::NonnegDiagForcer<LocalOrdinal, Scalar, STS::isComplex> forcer;
	matview_type Q_mine (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride);
	matview_type R_mine (R.numRows(), R.numCols(), R.values(), R.stride());
	forcer.force (Q_mine, R_mine);
      }

      // "Commit" the changes to the multivector.
      A_ptr = Teuchos::null;
      Q_ptr = Teuchos::null;
    }