Esempio n. 1
0
      /// \brief Verify the result of the "thin" QR factorization \f$A = QR\f$.
      ///
      /// This method returns a list of three magnitudes: 
      /// - \f$\| A - QR \|_F\f$
      /// - \f$\|I - Q^* Q\|_F\f$
      /// - \f$\|A\|_F\f$
      ///
      /// The notation $\f\| X \|\f$ denotes the Frobenius norm
      /// (square root of sum of squares) of a matrix \f$X\f$.
      /// Returning the Frobenius norm of \f$A\f$ allows you to scale
      /// or not scale the residual \f$\|A - QR\|\f$ as you prefer.
      virtual std::vector< magnitude_type >
      verify (const multivector_type& A,
	      const multivector_type& Q,
	      const Teuchos::SerialDenseMatrix< local_ordinal_type, scalar_type >& R)
      {
	using Teuchos::ArrayRCP;

	local_ordinal_type nrowsLocal_A, ncols_A, LDA;
	local_ordinal_type nrowsLocal_Q, ncols_Q, LDQ;
	fetchDims (A, nrowsLocal_A, ncols_A, LDA);
	fetchDims (Q, nrowsLocal_Q, ncols_Q, LDQ);
	if (nrowsLocal_A != nrowsLocal_Q)
	  throw std::runtime_error ("A and Q must have same number of rows");
	else if (ncols_A != ncols_Q)
	  throw std::runtime_error ("A and Q must have same number of columns");
	else if (ncols_A != R.numCols())
	  throw std::runtime_error ("A and R must have same number of columns");
	else if (R.numRows() < R.numCols())
	  throw std::runtime_error ("R must have no fewer rows than columns");

	// Const views suffice for verification
	ArrayRCP< const scalar_type > A_ptr = fetchConstView (A);
	ArrayRCP< const scalar_type > Q_ptr = fetchConstView (Q);
	return global_verify (nrowsLocal_A, ncols_A, A_ptr.get(), LDA,
			      Q_ptr.get(), LDQ, R.values(), R.stride(), 
			      pScalarMessenger_.get());
      }
void assembleIRKState(
  const int stageIndex,
  const Teuchos::SerialDenseMatrix<int,Scalar> &A_in,
  const Scalar dt,
  const Thyra::VectorBase<Scalar> &x_base,
  const Thyra::ProductVectorBase<Scalar> &x_stage_bar,
  Teuchos::Ptr<Thyra::VectorBase<Scalar> > x_out_ptr
  )
{

  typedef ScalarTraits<Scalar> ST;

  const int numStages_in = A_in.numRows();
  TEUCHOS_ASSERT_IN_RANGE_UPPER_EXCLUSIVE( stageIndex, 0, numStages_in );
  TEUCHOS_ASSERT_EQUALITY( A_in.numRows(), numStages_in );
  TEUCHOS_ASSERT_EQUALITY( A_in.numCols(), numStages_in );
  TEUCHOS_ASSERT_EQUALITY( x_stage_bar.productSpace()->numBlocks(), numStages_in );
  Thyra::VectorBase<Scalar>& x_out = *x_out_ptr;

  V_V( outArg(x_out), x_base );
  for ( int j = 0; j < numStages_in; ++j ) {
    Vp_StV( outArg(x_out), dt * A_in(stageIndex,j), *x_stage_bar.getVectorBlock(j) );
  }

}
Esempio n. 3
0
void
Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>::
transformPCE2QP_smolyak(
  const value_type& alpha, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input,
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, 
  const value_type& beta,
  bool trans) const {
  Teuchos::SerialDenseMatrix<ordinal_type,value_type> op_input, op_result;
  result.scale(beta);

  for (ordinal_type i=0; i<operators.size(); i++) {
    Teuchos::RCP<operator_type> op = operators[i];
    if (trans) {
      op_input.reshape(input.numRows(), op->coeff_size());
      op_result.reshape(result.numRows(), op->point_size());
    }
    else {
      op_input.reshape(op->coeff_size(), input.numCols());
      op_result.reshape(op->point_size(), result.numCols());
    }
    
    gather(scatter_maps[i], input, trans, op_input);
    op->transformPCE2QP(smolyak_coeffs[i], op_input, op_result, 0.0, trans);
    scatter(gather_maps[i], op_result, trans, result);
  }
}
    virtual ordinal_type ApplyInverse(
      const Teuchos::SerialDenseMatrix<ordinal_type, value_type>& Input, 
      Teuchos::SerialDenseMatrix<ordinal_type, value_type>& Result, 
      ordinal_type m) const {
      ordinal_type n=Input.numRows();
      Teuchos::SerialDenseMatrix<ordinal_type, value_type> G(A);
      Teuchos::SerialDenseMatrix<ordinal_type, value_type> z(n,1);
      for (ordinal_type j=0; j<m; j++){
	if (j==0){  // Compute z=D-1r
	  for (ordinal_type i=0; i<n; i++)
	    z(i,0)=Input(i,0)/A(i,i);
	}
	else {
	  //Compute G=invD(-L-U)=I-inv(D)A 
	  for (ordinal_type i=0; i<n; i++){
	    for (ordinal_type j=0; j<n; j++){
	      if (j==i)
		G(i,j)=0;
	      else 
		G(i,j)=-A(i,j)/A(i,i);
	    }
	  }
	  
	  Result.assign(z);
	  //z=Gz+inv(D)r
	  Result.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, G, z, 1.0);
	  
	}
      }

      return 0;
    }
 void EpetraOpMultiVec::MvTimesMatAddMv ( double alpha, const MultiVec<double>& A, 
     const Teuchos::SerialDenseMatrix<int,double>& B, double beta ) 
 {
   Epetra_LocalMap LocalMap(B.numRows(), 0, Epetra_MV->Map().Comm());
   Epetra_MultiVector B_Pvec(Epetra_DataAccess::View, LocalMap, B.values(), B.stride(), B.numCols());
   
   EpetraOpMultiVec *A_vec = dynamic_cast<EpetraOpMultiVec *>(&const_cast<MultiVec<double> &>(A)); 
   TEUCHOS_TEST_FOR_EXCEPTION( A_vec==NULL,  std::invalid_argument, "Anasazi::EpetraOpMultiVec::SetBlocks() cast of MultiVec<double> to EpetraOpMultiVec failed.");
   
   TEUCHOS_TEST_FOR_EXCEPTION( 
       Epetra_MV->Multiply( 'N', 'N', alpha, *(A_vec->GetEpetraMultiVector()), B_Pvec, beta ) != 0,
       EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTimesMatAddMv() call to Epetra_MultiVec::Multiply() returned a nonzero value.");
 }
Esempio n. 6
0
  // Update *this with alpha * A * B + beta * (*this). 
  void MvTimesMatAddMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType> &A, 
                        const Teuchos::SerialDenseMatrix<int, ScalarType> &B, 
                        ScalarType beta)
  {
    
    assert (Length_ == A.GetVecLength());
    assert (B.numRows() == A.GetNumberVecs());
    assert (B.numCols() <= NumberVecs_);

    MyMultiVec* MyA;
    MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); 
    assert(MyA!=NULL);

    if ((*this)[0] == (*MyA)[0]) {
      // If this == A, then need additional storage ...
      // This situation is a bit peculiar but it may be required by
      // certain algorithms.
      
      std::vector<ScalarType> tmp(NumberVecs_);

      for (int i = 0 ; i < Length_ ; ++i) {
        for (int v = 0; v < A.GetNumberVecs() ; ++v) {
          tmp[v] = (*MyA)(i, v);
        }

        for (int v = 0 ; v < B.numCols() ; ++v) {
          (*this)(i, v) *= beta; 
          ScalarType res = Teuchos::ScalarTraits<ScalarType>::zero();

          for (int j = 0 ; j < A.GetNumberVecs() ; ++j) {
            res +=  tmp[j] * B(j, v);
          }

          (*this)(i, v) += alpha * res;
        }
      }
    }
    else {
      for (int i = 0 ; i < Length_ ; ++i) {
        for (int v = 0 ; v < B.numCols() ; ++v) {
          (*this)(i, v) *= beta; 
          ScalarType res = 0.0;
          for (int j = 0 ; j < A.GetNumberVecs() ; ++j) {
            res +=  (*MyA)(i, j) * B(j, v);
          }

          (*this)(i, v) += alpha * res;
        }
      }
    }
  }
Esempio n. 7
0
void EpetraMultiVec::MvTransMv ( const double alpha, const MultiVec<double>& A,
				 Teuchos::SerialDenseMatrix<int,double>& B) const
{    
  EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A));
  
  if (A_vec) {
    Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm());
    Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols());
    
    int info = B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 );
    TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, 
		       "Belos::EpetraMultiVec::MvTransMv call to Multiply() returned a nonzero value.");
  }
}
        // Generic BLAS level 3 matrix multiplication
        // \f$\text{this}\leftarrow \alpha A B+\beta\text{this}\f$   
        void gemm(const Real alpha,
                  const MV& A,
                  const Teuchos::SerialDenseMatrix<int,Real> &B,
                  const Real beta) {

           // Scale this by beta
            this->scale(beta);

            for(int i=0;i<B.numRows();++i) {
                for(int j=0;j<B.numCols();++j) {
                    mvec_[j]->axpy(alpha*B(i,j),*A.getVector(i));  
                }
            }
        } 
Esempio n. 9
0
void EpetraMultiVec::MvTimesMatAddMv ( const double alpha, const MultiVec<double>& A, 
				       const Teuchos::SerialDenseMatrix<int,double>& B, const double beta ) 
{
  Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm());
  Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols());
  
  EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); 
  TEST_FOR_EXCEPTION(A_vec==NULL, EpetraMultiVecFailure,
                     "Belos::EpetraMultiVec::MvTimesMatAddMv cast from Belos::MultiVec<> to Belos::EpetraMultiVec failed.");
  
  int info = Multiply( 'N', 'N', alpha, *A_vec, B_Pvec, beta );
  TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, 
		     "Belos::EpetraMultiVec::MvTimesMatAddMv call to Multiply() returned a nonzero value.");

}
Esempio n. 10
0
    LocalOrdinal
    revealRank (Kokkos::MultiVector<Scalar, NodeType>& Q,
		Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R,
		const magnitude_type& tol,
		const bool contiguousCacheBlocks = false) const
    {
      typedef Kokkos::MultiVector<Scalar, NodeType> KMV;

      const LocalOrdinal nrows = static_cast<LocalOrdinal> (Q.getNumRows());
      const LocalOrdinal ncols = static_cast<LocalOrdinal> (Q.getNumCols());
      const LocalOrdinal ldq = static_cast<LocalOrdinal> (Q.getStride());
      Teuchos::ArrayRCP<Scalar> Q_ptr = Q.getValuesNonConst();

      // Take the easy exit if available.
      if (ncols == 0)
	return 0;

      //
      // FIXME (mfh 16 Jul 2010) We _should_ compute the SVD of R (as
      // the copy B) on Proc 0 only.  This would ensure that all
      // processors get the same SVD and rank (esp. in a heterogeneous
      // computing environment).  For now, we just do this computation
      // redundantly, and hope that all the returned rank values are
      // the same.
      //
      matrix_type U (ncols, ncols, STS::zero());
      const ordinal_type rank = 
	reveal_R_rank (ncols, R.values(), R.stride(), 
		       U.get(), U.lda(), tol);
      if (rank < ncols)
	{
	  // cerr << ">>> Rank of R: " << rank << " < ncols=" << ncols << endl;
	  // cerr << ">>> Resulting U:" << endl;
	  // print_local_matrix (cerr, ncols, ncols, R, ldr);
	  // cerr << endl;

	  // If R is not full rank: reveal_R_rank() already computed
	  // the SVD \f$R = U \Sigma V^*\f$ of (the input) R, and
	  // overwrote R with \f$\Sigma V^*\f$.  Now, we compute \f$Q
	  // := Q \cdot U\f$, respecting cache blocks of Q.
	  Q_times_B (nrows, ncols, Q_ptr.getRawPtr(), ldq, 
		     U.get(), U.lda(), contiguousCacheBlocks);
	}
      return rank;
    }
  void EpetraMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A,
                                   Teuchos::SerialDenseMatrix<int,double>& B
#ifdef HAVE_ANASAZI_EXPERIMENTAL
                                   , ConjType conj
#endif
                                  ) const
  {    
    EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A));
    
    if (A_vec) {
      Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm());
      Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols());
      
    TEUCHOS_TEST_FOR_EXCEPTION( 
        B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 ) != 0,
        EpetraMultiVecFailure, "Anasazi::EpetraMultiVec::MvTransMv() call to Epetra_MultiVec::Multiply() returned a nonzero value.");
    }
  }
Esempio n. 12
0
void
Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>::
scatter(
  const Teuchos::Array<ordinal_type>& map, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, 
  bool trans, 
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result) const {
  if (trans) {
    for (ordinal_type j=0; j<map.size(); j++)
      for (ordinal_type i=0; i<input.numRows(); i++)
	result(i,map[j]) += input(i,j);
  }
  else {
    for (ordinal_type j=0; j<input.numCols(); j++)
      for (ordinal_type i=0; i<map.size(); i++)
	result(map[i],j) += input(i,j);
  }
}
Esempio n. 13
0
  // Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this). 
  void MvTransMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType>& A, 
                  Teuchos::SerialDenseMatrix< int, ScalarType >& B
#ifdef HAVE_ANASAZI_EXPERIMENTAL
                  , Anasazi::ConjType conj
#endif
                 ) const
  {
    MyMultiVec* MyA;
    MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); 
    assert (MyA != 0);
    
    assert (A.GetVecLength() == Length_);
    assert (NumberVecs_ <= B.numCols());
    assert (A.GetNumberVecs() <= B.numRows());
    
#ifdef HAVE_ANASAZI_EXPERIMENTAL
    if (conj == Anasazi::CONJ) {
#endif
      for (int v = 0 ; v < A.GetNumberVecs() ; ++v) {
        for (int w = 0 ; w < NumberVecs_ ; ++w) {
          ScalarType value = 0.0;
          for (int i = 0 ; i < Length_ ; ++i) {
            value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w);
          }
          B(v, w) = alpha * value;
        }
      }
#ifdef HAVE_ANASAZI_EXPERIMENTAL
    } else {
      for (int v = 0 ; v < A.GetNumberVecs() ; ++v) {
        for (int w = 0 ; w < NumberVecs_ ; ++w) {
          ScalarType value = 0.0;
          for (int i = 0 ; i < Length_ ; ++i) {
            value += (*MyA)(i, v) * (*this)(i, w);
          }
          B(v, w) = alpha * value;
        }
      }
    }
#endif
  }
Esempio n. 14
0
  // Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this).
  void MvTransMv (const ScalarType alpha, const Belos::MultiVec<ScalarType>& A,
                  Teuchos::SerialDenseMatrix< int, ScalarType >& B) const
  {
    MyMultiVec* MyA;
    MyA = dynamic_cast<MyMultiVec*>(&const_cast<Belos::MultiVec<ScalarType> &>(A));
    TEUCHOS_ASSERT(MyA != NULL);

    assert (A.GetGlobalLength() == Length_);
    assert (NumberVecs_ <= B.numCols());
    assert (A.GetNumberVecs() <= B.numRows());

      for (int v = 0 ; v < A.GetNumberVecs() ; ++v) {
        for (int w = 0 ; w < NumberVecs_ ; ++w) {
          ScalarType value = 0.0;
          for (int i = 0 ; i < Length_ ; ++i) {
            value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w);
          }
          B(v, w) = alpha * value;
        }
      }
  }
  void EpetraOpMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A,
                                   Teuchos::SerialDenseMatrix<int,double>& B
#ifdef HAVE_ANASAZI_EXPERIMENTAL
                                   , ConjType conj
#endif
                                  ) const
  {    
    EpetraOpMultiVec *A_vec = dynamic_cast<EpetraOpMultiVec *>(&const_cast<MultiVec<double> &>(A));
    
    if (A_vec) {
      Epetra_LocalMap LocalMap(B.numRows(), 0, Epetra_MV->Map().Comm());
      Epetra_MultiVector B_Pvec(Epetra_DataAccess::View, LocalMap, B.values(), B.stride(), B.numCols());
     
      int info = Epetra_OP->Apply( *Epetra_MV, *Epetra_MV_Temp );
      TEUCHOS_TEST_FOR_EXCEPTION( info != 0, EpetraSpecializedMultiVecFailure, 
        "Anasazi::EpetraOpMultiVec::MvTransMv(): Error returned from Epetra_Operator::Apply()" );

      TEUCHOS_TEST_FOR_EXCEPTION( 
        B_Pvec.Multiply( 'T', 'N', alpha, *(A_vec->GetEpetraMultiVector()), *Epetra_MV_Temp, 0.0 ) != 0,
        EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTransMv() call to Epetra_MultiVector::Multiply() returned a nonzero value.");
    }
  }
void updateGuess(Teuchos::SerialDenseVector<int, std::complex<double> >& myCurrentGuess,
		Teuchos::SerialDenseVector<int, std::complex<double> >& myTargetsCalculated,
		Teuchos::SerialDenseMatrix<int, std::complex<double> >& myJacobian, 
		Teuchos::LAPACK<int, std::complex<double> >& myLAPACK
		 )
{
	//v = J(inverse) * (-F(x))
	//new guess = v + old guess
	myTargetsCalculated *= -1.0;

	//Perform an LU factorization of this matrix. 
	int ipiv[NUMDIMENSIONS], info;
	char TRANS = 'N';
	myLAPACK.GETRF( NUMDIMENSIONS, NUMDIMENSIONS, myJacobian.values(), myJacobian.stride(), ipiv, &info ); 

	// Solve the linear system.
	myLAPACK.GETRS( TRANS, NUMDIMENSIONS, 1, myJacobian.values(), myJacobian.stride(),
		       	ipiv, myTargetsCalculated.values(), myTargetsCalculated.stride(), &info );  

	//We have overwritten myTargetsCalculated with guess update values
	//myBLAS.AXPY(NUMDIMENSIONS, 1.0, myGuessAdjustment.values(), 1, myCurrentGuess.values(), 1);
	myCurrentGuess += myTargetsCalculated;
}
Esempio n. 17
0
  /*! \brief Update \c mv with \f$ \alpha A B + \beta mv \f$.
   */
  static void MvTimesMatAddMv( const double alpha, const  _MV & A, 
    const Teuchos::SerialDenseMatrix<int,double>& B, 
    const double beta,  _MV & mv )
    {
//      Out::os() << "MvTimesMatAddMv()" << endl;
      int n = B.numCols();
//      Out::os() << "B.numCols()=" << n << endl;

      TEST_FOR_EXCEPT(mv.size() != n);

      for (int j=0; j<mv.size(); j++)
      {
        Vector<double> tmp;
        if (beta==one())
        {
          tmp = mv[j].copy();
        }
        else if (beta==zero())
        {
          tmp = mv[j].copy();
          tmp.setToConstant(zero());
        }
        else
        {
          tmp = beta * mv[j];
        }
        if (alpha != zero())
        {
          for (int i=0; i<A.size(); i++)
          {
            tmp = tmp + alpha*B(i,j)*A[i];
          }
        }
        mv[j].acceptCopyOf(tmp);
      }
    }
ordinal_type
Stokhos::CGDivisionExpansionStrategy<ordinal_type,value_type,node_type>::
CG(const Teuchos::SerialDenseMatrix<ordinal_type, value_type> & A, 
   Teuchos::SerialDenseMatrix<ordinal_type,value_type> & X, 
   const Teuchos::SerialDenseMatrix<ordinal_type,value_type> & B, 
   ordinal_type max_iter, 
   value_type tolerance, 
   ordinal_type prec_iter, 
   ordinal_type order , 
   ordinal_type m, 
   ordinal_type PrecNum, 
   const Teuchos::SerialDenseMatrix<ordinal_type, value_type> & M, 
   ordinal_type diag)

{
  ordinal_type n = A.numRows();
  ordinal_type k=0;
  value_type resid;
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> Ax(n,1);
  Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0);
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> r(Teuchos::Copy,B);
  r-=Ax;
  resid=r.normFrobenius();
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> p(r);
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> rho(1,1);
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> oldrho(1,1);
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> pAp(1,1);
  Teuchos::SerialDenseMatrix<ordinal_type, value_type> Ap(n,1);
  value_type b;
  value_type a;
  while (resid > tolerance && k < max_iter){
    Teuchos::SerialDenseMatrix<ordinal_type, value_type> z(r);
    //Solve Mz=r
    if (PrecNum != 0){
      if (PrecNum == 1){
	Stokhos::DiagPreconditioner<ordinal_type, value_type> precond(M);
	precond.ApplyInverse(r,z,prec_iter);
      }
      else if (PrecNum == 2){
	Stokhos::JacobiPreconditioner<ordinal_type, value_type> precond(M);
	precond.ApplyInverse(r,z,2);
      }
      else if (PrecNum == 3){
	Stokhos::GSPreconditioner<ordinal_type, value_type> precond(M,0);
	precond.ApplyInverse(r,z,1);
      }
      else if (PrecNum == 4){
	Stokhos::SchurPreconditioner<ordinal_type, value_type> precond(M, order, m, diag);
	precond.ApplyInverse(r,z,prec_iter);            
      }
    }
    rho.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, r, z, 0.0);
    

    if (k==0){
      p.assign(z);
      rho.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, r, z, 0.0);  
    }
    else {
      b=rho(0,0)/oldrho(0,0);
      p.scale(b);
      p+=z; 
    }
    Ap.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, p, 0.0);
    pAp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, p, Ap, 0.0);
    a=rho(0,0)/pAp(0,0);
    Teuchos::SerialDenseMatrix<ordinal_type, value_type> scalep(p);
    scalep.scale(a);
    X+=scalep;
    Ap.scale(a);
    r-=Ap;
    oldrho.assign(rho);
    resid=r.normFrobenius();
    k++;
  }                      
 
  //std::cout << "iteration count  " << k << std::endl;
  return 0; 
}
Esempio n. 19
0
//Mean-Based Preconditioned GMRES  
int pregmres(const Teuchos::SerialDenseMatrix<int, double> &  A, const Teuchos::SerialDenseMatrix<int,double> &  X,const Teuchos::SerialDenseMatrix<int,double> &   B, int max_iter, double tolerance)
{
  int n; 
  int k;
  double resid;
  k=1;
  n=A.numRows();
  std::cout << A << std::endl;
  Teuchos::SerialDenseMatrix<int, double> D(n,1);

  //Get diagonal entries of A 
  for (int i=0; i<n; i++){
    D(i,0)=A(i,i);
  }
  

  Teuchos::SerialDenseMatrix<int, double> Ax(n,1);
  Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0);

  Teuchos::SerialDenseMatrix<int, double> r0(B);
  r0-=Ax;
  
  resid=r0.normFrobenius();
  
  //define vector v=r/norm(r) where r=b-Ax
  Teuchos::SerialDenseMatrix<int, double> v(n,1);
  r0.scale(1/resid);
  
  Teuchos::SerialDenseMatrix<int, double> h(1,1);

  //Matrix of orthog basis vectors V
  Teuchos::SerialDenseMatrix<int, double> V(n,1);
  
   //Set v=r0/norm(r0) to be 1st col of V
   for (int i=0; i<n; i++){
        V(i,0)=r0(i,0);
       }
   //right hand side
   Teuchos::SerialDenseMatrix<int, double> bb(1,1);
   bb(0,0)=resid;
   Teuchos::SerialDenseMatrix<int, double> w(n,1);
   Teuchos::SerialDenseMatrix<int, double> c;
   Teuchos::SerialDenseMatrix<int, double> s;
       
   while (resid > tolerance && k < max_iter){
    
    std::cout << "k = " << k << std::endl;
    h.reshape(k+1,k);
    //Arnoldi iteration(Gram-Schmidt )
    V.reshape(n,k+1);    
    //set vk to be kth col of V
    Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1);
    //Preconditioning step w=AMj(-1)vj
    w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1/D(k-1,0), A, vk, 0.0);  

   
    
    Teuchos::SerialDenseMatrix<int, double> vi(n,1);
    Teuchos::SerialDenseMatrix<int, double> ip(1,1);
    for (int i=0; i<k; i++){
       //set vi to be ith col of V
       Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i);    
       //Calculate inner product
       ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0);
       h(i,k-1)= ip(0,0);
       //scale vi by h(i,k-1)
       vi.scale(ip(0,0));     
       w-=vi;
       }         
    h(k,k-1)=w.normFrobenius();     
    
    w.scale(1.0/w.normFrobenius());   
    //add column vk+1=w to V
    for (int i=0; i<n; i++){
          V(i,k)=w(i,0);
         } 
   //Solve upper hessenberg least squares problem via Givens rotations
   //Compute previous Givens rotations
    for (int i=0; i<k-1; i++){
       h(i,k-1)=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1);
       h(i+1,k-1)=-s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1);
     }  
     //Compute next Givens rotations
     c.reshape(k,1);
     s.reshape(k,1); 
     bb.reshape(k+1,1);
     double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1));
     c(k-1,0)=h(k-1,k-1)/l;
     s(k-1,0)=h(k,k-1)/l;
     std::cout <<" h(k,k-1)= " << h(k,k-1) << std::endl;
     // Givens rotation on h and bb
     h(k-1,k-1)=l;
     h(k,k-1)=0;
     bb(k-1,0)=c(k-1,0)*bb(k-1,0);
     bb(k,0)=-s(k-1,0)*bb(k-1,0);

    //Determine residual    
    resid = fabs(bb(k,0));

    std::cout << "resid = " << resid << std::endl;
    k=k+1;
  } 
   //Extract upper triangular square matrix
   bb.reshape(h.numRows()-1 ,1);

   //Solve linear system
   int info;

   Teuchos::LAPACK<int, double> lapack;
   lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); 
   //Found y=Mx
   for (int i=0; i<k-1; i++){
      bb(i,0)=bb(i,0)/D(i,0);
   }

   V.reshape(n,k-1);
   Teuchos::SerialDenseMatrix<int, double> ans(X);
   ans.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 1.0);
   std::cout << "ans= " << ans << std::endl;

   std::cout << "h= " << h << std::endl;



return 0;
}
Esempio n. 20
0
void
Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>::
apply_direct(
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A,
  const value_type& alpha, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input,
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, 
  const value_type& beta,
  bool trans) const {
  if (trans) {
    TEUCHOS_ASSERT(input.numCols() <= A.numCols());
    TEUCHOS_ASSERT(result.numCols() == A.numRows());
    TEUCHOS_ASSERT(result.numRows() == input.numRows());
    blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, input.numRows(), 
	      A.numRows(), input.numCols(), alpha, input.values(), 
	      input.stride(), A.values(), A.stride(), beta, 
	      result.values(), result.stride());
  }
  else {
    TEUCHOS_ASSERT(input.numRows() <= A.numCols());
    TEUCHOS_ASSERT(result.numRows() == A.numRows());
    TEUCHOS_ASSERT(result.numCols() == input.numCols());
    blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, A.numRows(), 
	      input.numCols(), input.numRows(), alpha, A.values(), 
	      A.stride(), input.values(), input.stride(), beta, 
	      result.values(), result.stride());
  }
}
ordinal_type
Stokhos::MonomialGramSchmidtPCEBasis<ordinal_type, value_type>::
buildReducedBasis(
  ordinal_type max_p, 
  value_type threshold, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& F,
  const Teuchos::Array<value_type>& weights, 
  Teuchos::Array< Stokhos::MultiIndex<ordinal_type> >& terms_,
  Teuchos::Array<ordinal_type>& num_terms_,
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Qp_, 
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Q_)
{
  // Compute basis terms -- 2-D array giving powers for each linear index
  ordinal_type max_sz;
  CPBUtils::compute_terms(max_p, this->d, max_sz, terms_, num_terms_);

  // Compute B matrix -- monomials in F
  // for i=0,...,nqp-1
  //   for j=0,...,sz-1
  //      B(i,j) = F(i,1)^terms_[j][1] * ... * F(i,d)^terms_[j][d]
  // where sz is the total size of a basis up to order p and terms_[j] 
  // is an array of powers for each term in the total-order basis
  ordinal_type nqp = weights.size();
  SDM B(nqp, max_sz);
  for (ordinal_type i=0; i<nqp; i++) {
    for (ordinal_type j=0; j<max_sz; j++) {
      B(i,j) = 1.0;
      for (ordinal_type k=0; k<this->d; k++)
	B(i,j) *= std::pow(F(i,k), terms_[j][k]);
    }
  }

  // Rescale columns of B to have unit norm
  for (ordinal_type j=0; j<max_sz; j++) {
    value_type nrm = 0.0;
    for (ordinal_type i=0; i<nqp; i++)
      nrm += B(i,j)*B(i,j)*weights[i];
    nrm = std::sqrt(nrm);
    for (ordinal_type i=0; i<nqp; i++)
      B(i,j) /= nrm;
  }

  // Compute our new basis -- each column of Q is the new basis evaluated
  // at the original quadrature points.  Constraint pivoting so first d+1
  // columns and included in Q.
  SDM R;
  Teuchos::Array<ordinal_type> piv(max_sz);
  for (int i=0; i<this->d+1; i++)
    piv[i] = 1;
  typedef Stokhos::OrthogonalizationFactory<ordinal_type,value_type> SOF;
   ordinal_type sz_ = SOF::createOrthogonalBasis(
    this->orthogonalization_method, threshold, this->verbose, B, weights, 
    Q_, R, piv);

  // Compute Qp = A^T*W*Q
  SDM tmp(nqp, sz_);
  Qp_.reshape(this->pce_sz, sz_);
  for (ordinal_type i=0; i<nqp; i++)
    for (ordinal_type j=0; j<sz_; j++)
      tmp(i,j) = Q_(i,j)*weights[i];
  ordinal_type ret = 
    Qp_.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, A, tmp, 0.0);
  TEUCHOS_ASSERT(ret == 0);

  // It isn't clear that Qp is orthogonal, but if you derive the projection
  // matrix from the original space to the reduced, you end up with 
  // Q^T*W*A = Qp^T

  return sz_;
}
Esempio n. 22
0
//GMRES  
int gmres(const  Teuchos::SerialDenseMatrix<int, double> &  A, Teuchos::SerialDenseMatrix<int,double>   X,const Teuchos::SerialDenseMatrix<int,double> &   B, int max_iter, double tolerance)

{
  int n; 
  int k;
  double resid;
  k=1;
  n=A.numRows();
  std::cout << "A= " << A << std::endl;
  std::cout << "B= " << B << std::endl;
  //Teuchos::SerialDenseMatrix<int, double> Ax(n,1);
  //Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0);

  Teuchos::SerialDenseMatrix<int, double> r0(B);
  //r0-=Ax;
    
  resid=r0.normFrobenius();
  std::cout << "resid= " << resid << std::endl;
  //define vector v=r/norm(r) where r=b-Ax
  
  r0.scale(1/resid);
  
  Teuchos::SerialDenseMatrix<int, double> h(1,1);

  //Matrix of orthog basis vectors V
  Teuchos::SerialDenseMatrix<int, double> V(n,1);
  
   //Set v=r0/norm(r0) to be 1st col of V
   for (int i=0; i<n; i++){
        V(i,0)=r0(i,0);
       }
   //right hand side
   Teuchos::SerialDenseMatrix<int, double> bb(1,1);
   bb(0,0)=resid;
   Teuchos::SerialDenseMatrix<int, double> w(n,1);
   Teuchos::SerialDenseMatrix<int, double> c;
   Teuchos::SerialDenseMatrix<int, double> s;
  
   while (resid > tolerance && k < max_iter){
    
    std::cout << "k = " << k << std::endl;
    h.reshape(k+1,k);
    //Arnoldi iteration(Gram-Schmidt )
    V.reshape(n,k+1);    
    //set vk to be kth col of V
    Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1);
    
    w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, vk, 0.0);  
    Teuchos::SerialDenseMatrix<int, double> vi(n,1);
    Teuchos::SerialDenseMatrix<int, double> ip(1,1);
    for (int i=0; i<k; i++){
       //set vi to be ith col of V
       Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i);    
       //Calculate inner product
       ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0);
       h(i,k-1)= ip(0,0);
       //scale vi by h(i,k-1)
       vi.scale(ip(0,0));     
       w-=vi;
       }         
    h(k,k-1)=w.normFrobenius();     
 
    w.scale(1.0/w.normFrobenius());   
    //add column vk+1=w to V
    for (int i=0; i<n; i++){
          V(i,k)=w(i,0);
         } 
    
   //Solve upper hessenberg least squares problem via Givens rotations
   //Compute previous Givens rotations
    for (int i=0; i<k-1; i++){
     //  double hi=h(i,k-1);
     //  double hi1=h(i+1,k-1);

     // h(i,k-1)=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1);
     // h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1);
      // h(i,k-1)=c(i,0)*hi+s(i,0)*hi1;
      // h(i+1,k-1)=-1*s(i,0)*hi+c(i,0)*hi1;   
     
     double q=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1);
     h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1);
     h(i,k-1)=q;




     }  
     //Compute next Givens rotations
     c.reshape(k,1);
     s.reshape(k,1); 
     bb.reshape(k+1,1);
     double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1));
     c(k-1,0)=h(k-1,k-1)/l;
     s(k-1,0)=h(k,k-1)/l;
     
     std::cout << "c  "  <<  c(k-1,0)<<std::endl;
     std::cout << "s "  <<  s(k-1,0)<<std::endl;

    
     // Givens rotation on h and bb
       
   //  h(k-1,k-1)=l;
     
  //  h(k,k-1)=0;
       double hk=h(k,k-1);
       double hk1=h(k-1,k-1);

      h(k-1,k-1)=c(k-1,0)*hk1+s(k-1,0)*hk;
      h(k,k-1)=-1*s(k-1,0)*hk1+c(k-1,0)*hk;

     std::cout << "l = " << l <<std::endl;
     std::cout << "h(k-1,k-1) = should be l  " << h(k-1,k-1) <<std::endl;
     std::cout << "h(k,k-1) = should be 0  " << h(k,k-1) <<std::endl;
     bb(k,0)=-1*s(k-1,0)*bb(k-1,0); 
     bb(k-1,0)=c(k-1,0)*bb(k-1,0);
     
   
    //Determine residual    
     resid =fabs(bb(k,0));
      
     std::cout << "resid = " << resid <<std::endl;
     k++;
  } 
  
  //Extract upper triangular square matrix
   bb.reshape(h.numRows()-1 ,1);
   
   //Solve linear system
   int info;
   std::cout  << "bb pre solve = " << bb << std::endl;
   std::cout << "h= " << h << std::endl;
   Teuchos::LAPACK<int, double> lapack;
   lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); 

   V.reshape(n,k-1);
   
   std::cout  << "V= " << V << std::endl;
   std::cout  << "y= " << bb << std::endl;
   X.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 1.0);
   std::cout << "X=  " << X << std::endl;

  


   //Check V is orthogoanl
  // Teuchos::SerialDenseMatrix<int, double> vtv(V);
  // vtv.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, V, V, 0.0);
  // std::cout << "Vtv" << vtv << std::endl;

return 0;
}
Esempio n. 23
0
// CG
int CG(const  Teuchos::SerialDenseMatrix<int, double> &  A, Teuchos::SerialDenseMatrix<int,double>   X,const Teuchos::SerialDenseMatrix<int,double> &   B, int max_iter, double tolerance, Stokhos::DiagPreconditioner<int,double> prec)

{
  int n; 
  int k=0;
  double resid;
  
  n=A.numRows();
  std::cout << "A= " << A << std::endl;
  std::cout << "B= " << B << std::endl;
  Teuchos::SerialDenseMatrix<int, double> Ax(n,1);
  Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0);

  Teuchos::SerialDenseMatrix<int, double> r(B);
  r-=Ax;  
  resid=r.normFrobenius(); 
  Teuchos::SerialDenseMatrix<int, double> rho(1,1);
  Teuchos::SerialDenseMatrix<int, double> oldrho(1,1);
  Teuchos::SerialDenseMatrix<int, double> pAp(1,1);
  Teuchos::SerialDenseMatrix<int, double> Ap(n,1);
  
  double b;
  double a;
  Teuchos::SerialDenseMatrix<int, double> p(r);

  
 
  while (resid > tolerance && k < max_iter){
 
     Teuchos::SerialDenseMatrix<int, double> z(r);
     
     //z=M-1r
//     prec.ApplyInverse(r,z);

     rho.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, r, z, 0.0);
  
     if (k==0){
       p.assign(z);
       rho.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, r, z, 0.0);
      }  
      else {
        b=rho(0,0)/oldrho(0,0);
        p.scale(b);
        p+=z;
      }
      Ap.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, p, 0.0);
      pAp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, p, Ap, 0.0);
      a=rho(0,0)/pAp(0,0);
      Teuchos::SerialDenseMatrix<int, double> scalep(p);
      scalep.scale(a);
      X+=scalep;
      Ap.scale(a);
      r-=Ap;
      oldrho.assign(rho);
      resid=r.normFrobenius();
   
 
     k++;
  } 
  
 std::cout << "X=  " << X << std::endl;

 return 0;
}
Esempio n. 24
0
void DislocationDensity<EvalT, Traits>::
evaluateFields(typename Traits::EvalData workset)
{

  Teuchos::SerialDenseMatrix<int, double> A;
  Teuchos::SerialDenseMatrix<int, double> X;
  Teuchos::SerialDenseMatrix<int, double> B;
  Teuchos::SerialDenseSolver<int, double> solver;

  A.shape(numNodes,numNodes);
  X.shape(numNodes,numNodes);
  B.shape(numNodes,numNodes);
  
  // construct Identity for RHS
  for (int i = 0; i < numNodes; ++i)
    B(i,i) = 1.0;

  for (int i=0; i < G.size() ; i++) G[i] = 0.0;

  // construct the node --> point operator
  for (std::size_t cell=0; cell < workset.numCells; ++cell)
  {
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	A(qp,node) = BF(cell,node,qp);
    
    X = 0.0;

    solver.setMatrix( Teuchos::rcp( &A, false) );
    solver.setVectors( Teuchos::rcp( &X, false ), Teuchos::rcp( &B, false ) );

    // Solve the system A X = B to find A_inverse
    int status = 0;
    status = solver.factor();
    status = solver.solve();

    // compute nodal Fp
    nodalFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
      for (std::size_t qp=0; qp < numQPs; ++qp) 
	for (std::size_t i=0; i < numDims; ++i) 
	  for (std::size_t j=0; j < numDims; ++j) 
	    nodalFp(node,i,j) += X(node,qp) * Fp(cell,qp,i,j);

    // compute the curl using nodalFp
    curlFp.initialize(0.0);
    for (std::size_t node=0; node < numNodes; ++node) 
    {
      for (std::size_t qp=0; qp < numQPs; ++qp) 
      {
	curlFp(qp,0,0) += nodalFp(node,0,2) * GradBF(cell,node,qp,1) - nodalFp(node,0,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,1) += nodalFp(node,1,2) * GradBF(cell,node,qp,1) - nodalFp(node,1,1) * GradBF(cell,node,qp,2);
	curlFp(qp,0,2) += nodalFp(node,2,2) * GradBF(cell,node,qp,1) - nodalFp(node,2,1) * GradBF(cell,node,qp,2);

	curlFp(qp,1,0) += nodalFp(node,0,0) * GradBF(cell,node,qp,2) - nodalFp(node,0,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,1) += nodalFp(node,1,0) * GradBF(cell,node,qp,2) - nodalFp(node,1,2) * GradBF(cell,node,qp,0);
	curlFp(qp,1,2) += nodalFp(node,2,0) * GradBF(cell,node,qp,2) - nodalFp(node,2,2) * GradBF(cell,node,qp,0);

	curlFp(qp,2,0) += nodalFp(node,0,1) * GradBF(cell,node,qp,0) - nodalFp(node,0,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,1) += nodalFp(node,1,1) * GradBF(cell,node,qp,0) - nodalFp(node,1,0) * GradBF(cell,node,qp,1);
	curlFp(qp,2,2) += nodalFp(node,2,1) * GradBF(cell,node,qp,0) - nodalFp(node,2,0) * GradBF(cell,node,qp,1);
      }
    }

    for (std::size_t qp=0; qp < numQPs; ++qp) 
      for (std::size_t i=0; i < numDims; ++i) 
	for (std::size_t j=0; j < numDims; ++j) 
	  for (std::size_t k=0; k < numDims; ++k) 
	    G(cell,qp,i,j) += Fp(cell,qp,i,k) * curlFp(qp,k,j);
  }
}
Esempio n. 25
0
int
Stokhos::GMRESDivisionExpansionStrategy<ordinal_type,value_type,node_type>::
GMRES(const Teuchos::SerialDenseMatrix<int, double> &  A, Teuchos::SerialDenseMatrix<int,double> &  X, const Teuchos::SerialDenseMatrix<int,double> &   B, int max_iter, double tolerance, int prec_iter, int order, int dim, int PrecNum, const Teuchos::SerialDenseMatrix<int, double> & M, int diag)
{
    int n = A.numRows();
    int k = 1;
    double resid;
    Teuchos::SerialDenseMatrix<int, double> P(n,n);
    Teuchos::SerialDenseMatrix<int, double> Ax(n,1);
    Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0);
    Teuchos::SerialDenseMatrix<int, double> r0(B);
    r0-=Ax;
    resid=r0.normFrobenius();
//define vector v=r/norm(r) where r=b-Ax
    Teuchos::SerialDenseMatrix<int, double> v(n,1);
    r0.scale(1/resid);
    Teuchos::SerialDenseMatrix<int, double> h(1,1);
//Matrix of orthog basis vectors V
    Teuchos::SerialDenseMatrix<int, double> V(n,1);
//Set v=r0/norm(r0) to be 1st col of V
    for (int i=0; i<n; i++) {
        V(i,0)=r0(i,0);
    }
    //right hand side
    Teuchos::SerialDenseMatrix<int, double> bb(1,1);
    bb(0,0)=resid;
    Teuchos::SerialDenseMatrix<int, double> w(n,1);
    Teuchos::SerialDenseMatrix<int, double> c;
    Teuchos::SerialDenseMatrix<int, double> s;
    while (resid > tolerance && k < max_iter) {
        h.reshape(k+1,k);
        //Arnoldi iteration(Gram-Schmidt )
        V.reshape(n,k+1);
        //set vk to be kth col of V
        Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1);
        //Preconditioning step: solve Mz=vk
        Teuchos::SerialDenseMatrix<int, double> z(vk);
        if (PrecNum == 1) {
            Stokhos::DiagPreconditioner precond(M);
            precond.ApplyInverse(vk,z,prec_iter);
        }
        else if (PrecNum == 2) {
            Stokhos::JacobiPreconditioner precond(M);
            precond.ApplyInverse(vk,z,2);
        }
        else if (PrecNum == 3) {
            Stokhos::GSPreconditioner precond(M,1);
            precond.ApplyInverse(vk,z,1);
        }
        else if (PrecNum == 4) {
            Stokhos::SchurPreconditioner precond(M, order, dim, diag);
            precond.ApplyInverse(vk,z,prec_iter);
        }

        w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1, A, z, 0.0);
        Teuchos::SerialDenseMatrix<int, double> vi(n,1);
        Teuchos::SerialDenseMatrix<int, double> ip(1,1);
        for (int i=0; i<k; i++) {
            //set vi to be ith col of V
            Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i);
            //Calculate inner product
            ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0);
            h(i,k-1)= ip(0,0);
            //scale vi by h(i,k-1)
            vi.scale(ip(0,0));
            w-=vi;
        }
        h(k,k-1)=w.normFrobenius();
        w.scale(1.0/h(k,k-1));
        //add column vk+1=w to V
        for (int i=0; i<n; i++) {
            V(i,k)=w(i,0);
        }
        //Solve upper hessenberg least squares problem via Givens rotations
        //Compute previous Givens rotations
        for (int i=0; i<k-1; i++) {
            double q=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1);
            h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1);
            h(i,k-1)=q;

        }
        //Compute next Givens rotations
        c.reshape(k,1);
        s.reshape(k,1);
        bb.reshape(k+1,1);
        double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1));
        c(k-1,0)=h(k-1,k-1)/l;
        s(k-1,0)=h(k,k-1)/l;

        // Givens rotation on h and bb
        h(k-1,k-1)=l;
        h(k,k-1)=0;

        bb(k,0)=-s(k-1,0)*bb(k-1,0);
        bb(k-1,0)=c(k-1,0)*bb(k-1,0);

        //Determine residual
        resid = fabs(bb(k,0));
        k++;
    }
    //Extract upper triangular square matrix
    bb.reshape(h.numRows()-1 ,1);
    //Solve linear system
    int info;
    Teuchos::LAPACK<int, double> lapack;
    lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info);
    Teuchos::SerialDenseMatrix<int, double> ans(X);
    V.reshape(n,k-1);
    ans.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 0.0);
    if (PrecNum == 1) {
        Stokhos::DiagPreconditioner precond(M);
        precond.ApplyInverse(ans,ans,prec_iter);
    }
    else if (PrecNum == 2) {
        Stokhos::JacobiPreconditioner precond(M);
        precond.ApplyInverse(ans,ans,2);
    }
    else if (PrecNum == 3) {
        Stokhos::GSPreconditioner precond(M,1);
        precond.ApplyInverse(ans,ans,1);
    }
    else if (PrecNum == 4) {
        Stokhos::SchurPreconditioner precond(M, order, dim, diag);
        precond.ApplyInverse(ans,ans,prec_iter);
    }
    X+=ans;

    std::cout << "iteration count=  " << k-1 << std::endl;


    return 0;
}
Esempio n. 26
0
ordinal_type
Stokhos::MonomialProjGramSchmidtPCEBasis<ordinal_type, value_type>::
buildReducedBasis(
  ordinal_type max_p, 
  value_type threshold,
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, 
  const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& F,
  const Teuchos::Array<value_type>& weights, 
  Teuchos::Array< Stokhos::MultiIndex<ordinal_type> >& terms_,
  Teuchos::Array<ordinal_type>& num_terms_,
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Qp_, 
  Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Q_)
{
  // Compute basis terms -- 2-D array giving powers for each linear index
  ordinal_type max_sz;
  CPBUtils::compute_terms(max_p, this->d, max_sz, terms_, num_terms_);

  // Compute B matrix -- monomials in F
  // for i=0,...,nqp-1
  //   for j=0,...,sz-1
  //      B(i,j) = F(i,1)^terms_[j][1] * ... * F(i,d)^terms_[j][d]
  // where sz is the total size of a basis up to order p and terms_[j] 
  // is an array of powers for each term in the total-order basis
  ordinal_type nqp = weights.size();
  SDM B(nqp, max_sz);
  for (ordinal_type i=0; i<nqp; i++) {
    for (ordinal_type j=0; j<max_sz; j++) {
      B(i,j) = 1.0;
      for (ordinal_type k=0; k<this->d; k++)
	B(i,j) *= std::pow(F(i,k), terms_[j][k]);
    }
  }

  // Project B into original basis -- should use SPAM for this
  SDM Bp(this->pce_sz, max_sz);
  const Teuchos::Array<value_type>& basis_norms = 
    this->pce_basis->norm_squared();
  for (ordinal_type i=0; i<this->pce_sz; i++) {
    for (ordinal_type j=0; j<max_sz; j++) {
      Bp(i,j) = 0.0;
      for (ordinal_type k=0; k<nqp; k++)
	Bp(i,j) += weights[k]*B(k,j)*A(k,i);
      Bp(i,j) /= basis_norms[i];
    }
  }

  // Rescale columns of Bp to have unit norm
  for (ordinal_type j=0; j<max_sz; j++) {
    value_type nrm = 0.0;
    for (ordinal_type i=0; i<this->pce_sz; i++)
      nrm += Bp(i,j)*Bp(i,j)*basis_norms[i];
    nrm = std::sqrt(nrm);
    for (ordinal_type i=0; i<this->pce_sz; i++)
      Bp(i,j) /= nrm;
  }

  // Compute our new basis -- each column of Qp is the coefficients of the
  // new basis in the original basis.  Constraint pivoting so first d+1
  // columns and included in Qp.
  Teuchos::Array<value_type> w(this->pce_sz, 1.0);
  SDM R;
  Teuchos::Array<ordinal_type> piv(max_sz);
  for (int i=0; i<this->d+1; i++)
    piv[i] = 1;
  typedef Stokhos::OrthogonalizationFactory<ordinal_type,value_type> SOF;
  ordinal_type sz_ = SOF::createOrthogonalBasis(
    this->orthogonalization_method, threshold, this->verbose, Bp, w, 
    Qp_, R, piv);

  // Evaluate new basis at original quadrature points
  Q_.reshape(nqp, sz_);
  ordinal_type ret = 
    Q_.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, Qp_, 0.0);
  TEUCHOS_ASSERT(ret == 0);

  return sz_;
}
Esempio n. 27
0
int
main (int argc, char *argv[])
{
  using namespace Anasazi;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using std::endl;

#ifdef HAVE_MPI
  // Initialize MPI
  MPI_Init (&argc, &argv);
#endif // HAVE_MPI

  // Create an Epetra communicator
#ifdef HAVE_MPI
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif // HAVE_MPI

  // Create an Anasazi output manager
  BasicOutputManager<double> printer;
  printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl;

  // Get the sorting std::string from the command line
  std::string which ("LM");
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM).");
  if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Dimension of the matrix
  //
  // Discretization points in any one direction.
  const int nx = 10;
  // Size of matrix nx*nx
  const int NumGlobalElements = nx*nx;

  // Construct a Map that puts approximately the same number of
  // equations on each process.
  Epetra_Map Map (NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.
  int NumMyElements = Map.NumMyElements ();

  std::vector<int> MyGlobalElements (NumMyElements);
  Map.MyGlobalElements (&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra
  // matrix.  NumNz[i] is the number of OFF-DIAGONAL terms for the
  // i-th global equation on this process.
  std::vector<int> NumNz (NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.
  */
  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    }
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    }
    else {
      NumNz[i] = 5;
    }
  }

  // Create an Epetra_Matrix
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, &NumNz[0]));

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double rho = 0.0;
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries;

  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i]==0) {
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx*(nx-1)) {
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx-1) {
      Indices[0] = nx-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == NumGlobalElements-1) {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] < nx) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] > nx*(nx-1)) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i]%nx == 0) {
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if ((MyGlobalElements[i]+1)%nx == 0) {
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    // Put in the diagonal entry
    int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
       << info << " != 0." );
  }

  // Finish up
  int info = A->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "A->FillComplete() returned info = "
     << info << " != 0." );
  A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  // Create a identity matrix for the temporary mass matrix
  RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, 1));
  for (int i=0; i<NumMyElements; i++) {
    Values[0] = one;
    Indices[0] = i;
    NumEntries = 1;
    info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "M->InsertGlobalValues() returned info = "
       << info << " != 0." );
  }
  // Finish up
  info = M->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "M->FillComplete() returned info = "
     << info << " != 0." );
  M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Call the LOBPCG solver manager
  //***********************************
  //
  // Variables used for the LOBPCG Method
  const int nev       = 10;
  const int blockSize = 5;
  const int maxIters  = 500;
  const double tol    = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the
  // solver.  Note: This needs to have the same number of columns as
  // the blocksize.
  RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize));
  ivec->Random (); // fill the initial vector with random values

  // Create the eigenproblem.
  RCP<BasicEigenproblem<double, MV, OP> > MyProblem =
    rcp (new BasicEigenproblem<double, MV, OP> (A, ivec));

  // Inform the eigenproblem that the operator A is symmetric
  MyProblem->setHermitian (true);

  // Set the number of eigenvalues requested
  MyProblem->setNEV (nev);

  // Tell the eigenproblem that you are finishing passing it information.
  const bool success = MyProblem->setProblem ();
  if (! success) {
    printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n");
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set ("Which", which);
  MyPL.set ("Block Size", blockSize);
  MyPL.set ("Maximum Iterations", maxIters);
  MyPL.set ("Convergence Tolerance", tol);
  MyPL.set ("Full Ortho", true);
  MyPL.set ("Use Locking", true);

  // Create the solver manager
  LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL);

  // Solve the problem
  ReturnType returnCode = MySolverMan.solve ();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Eigensolution<double,MV> sol = MyProblem->getSolution ();
  std::vector<Value<double> > evals = sol.Evals;
  RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR (sol.numVecs);
  if (sol.numVecs > 0) {
    Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs);
    Epetra_MultiVector tempAevec (Map, sol.numVecs );
    T.putScalar (0.0);
    for (int i = 0; i < sol.numVecs; ++i) {
      T(i,i) = evals[i].realpart;
    }
    A->Apply (*evecs, tempAevec);
    MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec);
    MVT::MvNorm (tempAevec, normR);
  }

  // Print the results
  std::ostringstream os;
  os.setf (std::ios_base::right, std::ios_base::adjustfield);
  os << "Solver manager returned "
     << (returnCode == Converged ? "converged." : "unconverged.") << endl;
  os << endl;
  os << "------------------------------------------------------" << endl;
  os << std::setw(16) << "Eigenvalue"
     << std::setw(18) << "Direct Residual"
     << endl;
  os << "------------------------------------------------------" << endl;
  for (int i = 0; i < sol.numVecs; ++i) {
    os << std::setw(16) << evals[i].realpart
       << std::setw(18) << normR[i] / evals[i].realpart
       << endl;
  }
  os << "------------------------------------------------------" << endl;
  printer.print (Errors, os.str ());

#ifdef HAVE_MPI
  MPI_Finalize ();
#endif // HAVE_MPI
  return 0;
}
Esempio n. 28
0
    void
    factorExplicit (Kokkos::MultiVector<Scalar, NodeType>& A,
		    Kokkos::MultiVector<Scalar, NodeType>& Q,
		    Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R,
		    const bool contiguousCacheBlocks,
		    const bool forceNonnegativeDiagonal=false)
    {
      using Teuchos::asSafe;
      typedef Kokkos::MultiVector<Scalar, NodeType> KMV;

      // Tsqr currently likes LocalOrdinal ordinals, but
      // Kokkos::MultiVector has size_t ordinals.  Do conversions
      // here.  
      //
      // Teuchos::asSafe() can do safe conversion (e.g., checking for
      // overflow when casting to a narrower integer type), if a
      // custom specialization is defined for
      // Teuchos::ValueTypeConversionTraits<size_t, LocalOrdinal>.
      // Otherwise, this has the same (potentially) unsafe effect as
      // static_cast<LocalOrdinal>(...) would have.
      const LocalOrdinal A_numRows = asSafe<LocalOrdinal> (A.getNumRows());
      const LocalOrdinal A_numCols = asSafe<LocalOrdinal> (A.getNumCols());
      const LocalOrdinal A_stride = asSafe<LocalOrdinal> (A.getStride());
      const LocalOrdinal Q_numRows = asSafe<LocalOrdinal> (Q.getNumRows());
      const LocalOrdinal Q_numCols = asSafe<LocalOrdinal> (Q.getNumCols());
      const LocalOrdinal Q_stride = asSafe<LocalOrdinal> (Q.getStride());

      // Sanity checks for matrix dimensions
      if (A_numRows < A_numCols) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A has " << A_numRows 
	   << " local rows, and " << A_numCols << " columns.  The input "
	  "matrix must have at least as many rows on each processor as "
	  "there are columns.";
	throw std::invalid_argument(os.str());
      } else if (A_numRows != Q_numRows) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A and output matrix Q "
	  "must have the same number of rows.  A has " << A_numRows << " rows"
	  " and Q has " << Q_numRows << " rows.";
	throw std::invalid_argument(os.str());
      } else if (R.numRows() < R.numCols()) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: output matrix R must have at least "
	  "as many rows as columns.  R has " << R.numRows() << " rows and "
	   << R.numCols() << " columns.";
	throw std::invalid_argument(os.str());
      } else if (A_numCols != R.numCols()) {
	std::ostringstream os;
	os << "In Tsqr::factorExplicit: input matrix A and output matrix R "
	  "must have the same number of columns.  A has " << A_numCols 
	   << " columns and R has " << R.numCols() << " columns.";
	throw std::invalid_argument(os.str());
      }

      // Check for quick exit, based on matrix dimensions
      if (Q_numCols == 0)
	return;

      // Hold on to nonconst views of A and Q.  This will make TSQR
      // correct (if perhaps inefficient) for all possible Kokkos Node
      // types, even GPU nodes.
      Teuchos::ArrayRCP<scalar_type> A_ptr = A.getValuesNonConst();
      Teuchos::ArrayRCP<scalar_type> Q_ptr = Q.getValuesNonConst();

      R.putScalar (STS::zero());
      NodeOutput nodeResults = 
	nodeTsqr_->factor (A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride,
			   R.values(), R.stride(), contiguousCacheBlocks);
      // FIXME (mfh 19 Oct 2010) Replace actions on raw pointer with
      // actions on the Kokkos::MultiVector or at least the ArrayRCP.
      nodeTsqr_->fill_with_zeros (Q_numRows, Q_numCols, 
				  Q_ptr.getRawPtr(), Q_stride,
				  contiguousCacheBlocks);
      matview_type Q_rawView (Q_numRows, Q_numCols, 
			      Q_ptr.getRawPtr(), Q_stride);
      matview_type Q_top_block = 
	nodeTsqr_->top_block (Q_rawView, contiguousCacheBlocks);
      if (Q_top_block.nrows() < R.numCols()) {
	std::ostringstream os;
	os << "The top block of Q has too few rows.  This means that the "
	   << "the intranode TSQR implementation has a bug in its top_block"
	   << "() method.  The top block should have at least " << R.numCols()
	   << " rows, but instead has only " << Q_top_block.ncols() 
	   << " rows.";
	throw std::logic_error (os.str());
      }
      {
	matview_type Q_top (R.numCols(), Q_numCols, Q_top_block.get(), 
			    Q_top_block.lda());
	matview_type R_view (R.numRows(), R.numCols(), R.values(), R.stride());
	distTsqr_->factorExplicit (R_view, Q_top, forceNonnegativeDiagonal);
      }
      nodeTsqr_->apply (ApplyType::NoTranspose, 
			A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride,
			nodeResults, Q_numCols, Q_ptr.getRawPtr(), Q_stride,
			contiguousCacheBlocks);

      // If necessary, force the R factor to have a nonnegative diagonal.
      if (forceNonnegativeDiagonal && 
	  ! QR_produces_R_factor_with_nonnegative_diagonal()) {
	details::NonnegDiagForcer<LocalOrdinal, Scalar, STS::isComplex> forcer;
	matview_type Q_mine (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride);
	matview_type R_mine (R.numRows(), R.numCols(), R.values(), R.stride());
	forcer.force (Q_mine, R_mine);
      }

      // "Commit" the changes to the multivector.
      A_ptr = Teuchos::null;
      Q_ptr = Teuchos::null;
    }