/// \brief Verify the result of the "thin" QR factorization \f$A = QR\f$. /// /// This method returns a list of three magnitudes: /// - \f$\| A - QR \|_F\f$ /// - \f$\|I - Q^* Q\|_F\f$ /// - \f$\|A\|_F\f$ /// /// The notation $\f\| X \|\f$ denotes the Frobenius norm /// (square root of sum of squares) of a matrix \f$X\f$. /// Returning the Frobenius norm of \f$A\f$ allows you to scale /// or not scale the residual \f$\|A - QR\|\f$ as you prefer. virtual std::vector< magnitude_type > verify (const multivector_type& A, const multivector_type& Q, const Teuchos::SerialDenseMatrix< local_ordinal_type, scalar_type >& R) { using Teuchos::ArrayRCP; local_ordinal_type nrowsLocal_A, ncols_A, LDA; local_ordinal_type nrowsLocal_Q, ncols_Q, LDQ; fetchDims (A, nrowsLocal_A, ncols_A, LDA); fetchDims (Q, nrowsLocal_Q, ncols_Q, LDQ); if (nrowsLocal_A != nrowsLocal_Q) throw std::runtime_error ("A and Q must have same number of rows"); else if (ncols_A != ncols_Q) throw std::runtime_error ("A and Q must have same number of columns"); else if (ncols_A != R.numCols()) throw std::runtime_error ("A and R must have same number of columns"); else if (R.numRows() < R.numCols()) throw std::runtime_error ("R must have no fewer rows than columns"); // Const views suffice for verification ArrayRCP< const scalar_type > A_ptr = fetchConstView (A); ArrayRCP< const scalar_type > Q_ptr = fetchConstView (Q); return global_verify (nrowsLocal_A, ncols_A, A_ptr.get(), LDA, Q_ptr.get(), LDQ, R.values(), R.stride(), pScalarMessenger_.get()); }
void assembleIRKState( const int stageIndex, const Teuchos::SerialDenseMatrix<int,Scalar> &A_in, const Scalar dt, const Thyra::VectorBase<Scalar> &x_base, const Thyra::ProductVectorBase<Scalar> &x_stage_bar, Teuchos::Ptr<Thyra::VectorBase<Scalar> > x_out_ptr ) { typedef ScalarTraits<Scalar> ST; const int numStages_in = A_in.numRows(); TEUCHOS_ASSERT_IN_RANGE_UPPER_EXCLUSIVE( stageIndex, 0, numStages_in ); TEUCHOS_ASSERT_EQUALITY( A_in.numRows(), numStages_in ); TEUCHOS_ASSERT_EQUALITY( A_in.numCols(), numStages_in ); TEUCHOS_ASSERT_EQUALITY( x_stage_bar.productSpace()->numBlocks(), numStages_in ); Thyra::VectorBase<Scalar>& x_out = *x_out_ptr; V_V( outArg(x_out), x_base ); for ( int j = 0; j < numStages_in; ++j ) { Vp_StV( outArg(x_out), dt * A_in(stageIndex,j), *x_stage_bar.getVectorBlock(j) ); } }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: transformPCE2QP_smolyak( const value_type& alpha, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, const value_type& beta, bool trans) const { Teuchos::SerialDenseMatrix<ordinal_type,value_type> op_input, op_result; result.scale(beta); for (ordinal_type i=0; i<operators.size(); i++) { Teuchos::RCP<operator_type> op = operators[i]; if (trans) { op_input.reshape(input.numRows(), op->coeff_size()); op_result.reshape(result.numRows(), op->point_size()); } else { op_input.reshape(op->coeff_size(), input.numCols()); op_result.reshape(op->point_size(), result.numCols()); } gather(scatter_maps[i], input, trans, op_input); op->transformPCE2QP(smolyak_coeffs[i], op_input, op_result, 0.0, trans); scatter(gather_maps[i], op_result, trans, result); } }
virtual ordinal_type ApplyInverse( const Teuchos::SerialDenseMatrix<ordinal_type, value_type>& Input, Teuchos::SerialDenseMatrix<ordinal_type, value_type>& Result, ordinal_type m) const { ordinal_type n=Input.numRows(); Teuchos::SerialDenseMatrix<ordinal_type, value_type> G(A); Teuchos::SerialDenseMatrix<ordinal_type, value_type> z(n,1); for (ordinal_type j=0; j<m; j++){ if (j==0){ // Compute z=D-1r for (ordinal_type i=0; i<n; i++) z(i,0)=Input(i,0)/A(i,i); } else { //Compute G=invD(-L-U)=I-inv(D)A for (ordinal_type i=0; i<n; i++){ for (ordinal_type j=0; j<n; j++){ if (j==i) G(i,j)=0; else G(i,j)=-A(i,j)/A(i,i); } } Result.assign(z); //z=Gz+inv(D)r Result.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, G, z, 1.0); } } return 0; }
void EpetraOpMultiVec::MvTimesMatAddMv ( double alpha, const MultiVec<double>& A, const Teuchos::SerialDenseMatrix<int,double>& B, double beta ) { Epetra_LocalMap LocalMap(B.numRows(), 0, Epetra_MV->Map().Comm()); Epetra_MultiVector B_Pvec(Epetra_DataAccess::View, LocalMap, B.values(), B.stride(), B.numCols()); EpetraOpMultiVec *A_vec = dynamic_cast<EpetraOpMultiVec *>(&const_cast<MultiVec<double> &>(A)); TEUCHOS_TEST_FOR_EXCEPTION( A_vec==NULL, std::invalid_argument, "Anasazi::EpetraOpMultiVec::SetBlocks() cast of MultiVec<double> to EpetraOpMultiVec failed."); TEUCHOS_TEST_FOR_EXCEPTION( Epetra_MV->Multiply( 'N', 'N', alpha, *(A_vec->GetEpetraMultiVector()), B_Pvec, beta ) != 0, EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTimesMatAddMv() call to Epetra_MultiVec::Multiply() returned a nonzero value."); }
// Update *this with alpha * A * B + beta * (*this). void MvTimesMatAddMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType> &A, const Teuchos::SerialDenseMatrix<int, ScalarType> &B, ScalarType beta) { assert (Length_ == A.GetVecLength()); assert (B.numRows() == A.GetNumberVecs()); assert (B.numCols() <= NumberVecs_); MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); assert(MyA!=NULL); if ((*this)[0] == (*MyA)[0]) { // If this == A, then need additional storage ... // This situation is a bit peculiar but it may be required by // certain algorithms. std::vector<ScalarType> tmp(NumberVecs_); for (int i = 0 ; i < Length_ ; ++i) { for (int v = 0; v < A.GetNumberVecs() ; ++v) { tmp[v] = (*MyA)(i, v); } for (int v = 0 ; v < B.numCols() ; ++v) { (*this)(i, v) *= beta; ScalarType res = Teuchos::ScalarTraits<ScalarType>::zero(); for (int j = 0 ; j < A.GetNumberVecs() ; ++j) { res += tmp[j] * B(j, v); } (*this)(i, v) += alpha * res; } } } else { for (int i = 0 ; i < Length_ ; ++i) { for (int v = 0 ; v < B.numCols() ; ++v) { (*this)(i, v) *= beta; ScalarType res = 0.0; for (int j = 0 ; j < A.GetNumberVecs() ; ++j) { res += (*MyA)(i, j) * B(j, v); } (*this)(i, v) += alpha * res; } } } }
void EpetraMultiVec::MvTransMv ( const double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B) const { EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); int info = B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 ); TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTransMv call to Multiply() returned a nonzero value."); } }
// Generic BLAS level 3 matrix multiplication // \f$\text{this}\leftarrow \alpha A B+\beta\text{this}\f$ void gemm(const Real alpha, const MV& A, const Teuchos::SerialDenseMatrix<int,Real> &B, const Real beta) { // Scale this by beta this->scale(beta); for(int i=0;i<B.numRows();++i) { for(int j=0;j<B.numCols();++j) { mvec_[j]->axpy(alpha*B(i,j),*A.getVector(i)); } } }
void EpetraMultiVec::MvTimesMatAddMv ( const double alpha, const MultiVec<double>& A, const Teuchos::SerialDenseMatrix<int,double>& B, const double beta ) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); TEST_FOR_EXCEPTION(A_vec==NULL, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTimesMatAddMv cast from Belos::MultiVec<> to Belos::EpetraMultiVec failed."); int info = Multiply( 'N', 'N', alpha, *A_vec, B_Pvec, beta ); TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTimesMatAddMv call to Multiply() returned a nonzero value."); }
LocalOrdinal revealRank (Kokkos::MultiVector<Scalar, NodeType>& Q, Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R, const magnitude_type& tol, const bool contiguousCacheBlocks = false) const { typedef Kokkos::MultiVector<Scalar, NodeType> KMV; const LocalOrdinal nrows = static_cast<LocalOrdinal> (Q.getNumRows()); const LocalOrdinal ncols = static_cast<LocalOrdinal> (Q.getNumCols()); const LocalOrdinal ldq = static_cast<LocalOrdinal> (Q.getStride()); Teuchos::ArrayRCP<Scalar> Q_ptr = Q.getValuesNonConst(); // Take the easy exit if available. if (ncols == 0) return 0; // // FIXME (mfh 16 Jul 2010) We _should_ compute the SVD of R (as // the copy B) on Proc 0 only. This would ensure that all // processors get the same SVD and rank (esp. in a heterogeneous // computing environment). For now, we just do this computation // redundantly, and hope that all the returned rank values are // the same. // matrix_type U (ncols, ncols, STS::zero()); const ordinal_type rank = reveal_R_rank (ncols, R.values(), R.stride(), U.get(), U.lda(), tol); if (rank < ncols) { // cerr << ">>> Rank of R: " << rank << " < ncols=" << ncols << endl; // cerr << ">>> Resulting U:" << endl; // print_local_matrix (cerr, ncols, ncols, R, ldr); // cerr << endl; // If R is not full rank: reveal_R_rank() already computed // the SVD \f$R = U \Sigma V^*\f$ of (the input) R, and // overwrote R with \f$\Sigma V^*\f$. Now, we compute \f$Q // := Q \cdot U\f$, respecting cache blocks of Q. Q_times_B (nrows, ncols, Q_ptr.getRawPtr(), ldq, U.get(), U.lda(), contiguousCacheBlocks); } return rank; }
void EpetraMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , ConjType conj #endif ) const { EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); TEUCHOS_TEST_FOR_EXCEPTION( B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 ) != 0, EpetraMultiVecFailure, "Anasazi::EpetraMultiVec::MvTransMv() call to Epetra_MultiVec::Multiply() returned a nonzero value."); } }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: scatter( const Teuchos::Array<ordinal_type>& map, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, bool trans, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result) const { if (trans) { for (ordinal_type j=0; j<map.size(); j++) for (ordinal_type i=0; i<input.numRows(); i++) result(i,map[j]) += input(i,j); } else { for (ordinal_type j=0; j<input.numCols(); j++) for (ordinal_type i=0; i<map.size(); i++) result(map[i],j) += input(i,j); } }
// Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this). void MvTransMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType>& A, Teuchos::SerialDenseMatrix< int, ScalarType >& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , Anasazi::ConjType conj #endif ) const { MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); assert (MyA != 0); assert (A.GetVecLength() == Length_); assert (NumberVecs_ <= B.numCols()); assert (A.GetNumberVecs() <= B.numRows()); #ifdef HAVE_ANASAZI_EXPERIMENTAL if (conj == Anasazi::CONJ) { #endif for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w); } B(v, w) = alpha * value; } } #ifdef HAVE_ANASAZI_EXPERIMENTAL } else { for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += (*MyA)(i, v) * (*this)(i, w); } B(v, w) = alpha * value; } } } #endif }
// Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this). void MvTransMv (const ScalarType alpha, const Belos::MultiVec<ScalarType>& A, Teuchos::SerialDenseMatrix< int, ScalarType >& B) const { MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Belos::MultiVec<ScalarType> &>(A)); TEUCHOS_ASSERT(MyA != NULL); assert (A.GetGlobalLength() == Length_); assert (NumberVecs_ <= B.numCols()); assert (A.GetNumberVecs() <= B.numRows()); for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w); } B(v, w) = alpha * value; } } }
void EpetraOpMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , ConjType conj #endif ) const { EpetraOpMultiVec *A_vec = dynamic_cast<EpetraOpMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Epetra_MV->Map().Comm()); Epetra_MultiVector B_Pvec(Epetra_DataAccess::View, LocalMap, B.values(), B.stride(), B.numCols()); int info = Epetra_OP->Apply( *Epetra_MV, *Epetra_MV_Temp ); TEUCHOS_TEST_FOR_EXCEPTION( info != 0, EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTransMv(): Error returned from Epetra_Operator::Apply()" ); TEUCHOS_TEST_FOR_EXCEPTION( B_Pvec.Multiply( 'T', 'N', alpha, *(A_vec->GetEpetraMultiVector()), *Epetra_MV_Temp, 0.0 ) != 0, EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTransMv() call to Epetra_MultiVector::Multiply() returned a nonzero value."); } }
void updateGuess(Teuchos::SerialDenseVector<int, std::complex<double> >& myCurrentGuess, Teuchos::SerialDenseVector<int, std::complex<double> >& myTargetsCalculated, Teuchos::SerialDenseMatrix<int, std::complex<double> >& myJacobian, Teuchos::LAPACK<int, std::complex<double> >& myLAPACK ) { //v = J(inverse) * (-F(x)) //new guess = v + old guess myTargetsCalculated *= -1.0; //Perform an LU factorization of this matrix. int ipiv[NUMDIMENSIONS], info; char TRANS = 'N'; myLAPACK.GETRF( NUMDIMENSIONS, NUMDIMENSIONS, myJacobian.values(), myJacobian.stride(), ipiv, &info ); // Solve the linear system. myLAPACK.GETRS( TRANS, NUMDIMENSIONS, 1, myJacobian.values(), myJacobian.stride(), ipiv, myTargetsCalculated.values(), myTargetsCalculated.stride(), &info ); //We have overwritten myTargetsCalculated with guess update values //myBLAS.AXPY(NUMDIMENSIONS, 1.0, myGuessAdjustment.values(), 1, myCurrentGuess.values(), 1); myCurrentGuess += myTargetsCalculated; }
/*! \brief Update \c mv with \f$ \alpha A B + \beta mv \f$. */ static void MvTimesMatAddMv( const double alpha, const _MV & A, const Teuchos::SerialDenseMatrix<int,double>& B, const double beta, _MV & mv ) { // Out::os() << "MvTimesMatAddMv()" << endl; int n = B.numCols(); // Out::os() << "B.numCols()=" << n << endl; TEST_FOR_EXCEPT(mv.size() != n); for (int j=0; j<mv.size(); j++) { Vector<double> tmp; if (beta==one()) { tmp = mv[j].copy(); } else if (beta==zero()) { tmp = mv[j].copy(); tmp.setToConstant(zero()); } else { tmp = beta * mv[j]; } if (alpha != zero()) { for (int i=0; i<A.size(); i++) { tmp = tmp + alpha*B(i,j)*A[i]; } } mv[j].acceptCopyOf(tmp); } }
ordinal_type Stokhos::CGDivisionExpansionStrategy<ordinal_type,value_type,node_type>:: CG(const Teuchos::SerialDenseMatrix<ordinal_type, value_type> & A, Teuchos::SerialDenseMatrix<ordinal_type,value_type> & X, const Teuchos::SerialDenseMatrix<ordinal_type,value_type> & B, ordinal_type max_iter, value_type tolerance, ordinal_type prec_iter, ordinal_type order , ordinal_type m, ordinal_type PrecNum, const Teuchos::SerialDenseMatrix<ordinal_type, value_type> & M, ordinal_type diag) { ordinal_type n = A.numRows(); ordinal_type k=0; value_type resid; Teuchos::SerialDenseMatrix<ordinal_type, value_type> Ax(n,1); Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<ordinal_type, value_type> r(Teuchos::Copy,B); r-=Ax; resid=r.normFrobenius(); Teuchos::SerialDenseMatrix<ordinal_type, value_type> p(r); Teuchos::SerialDenseMatrix<ordinal_type, value_type> rho(1,1); Teuchos::SerialDenseMatrix<ordinal_type, value_type> oldrho(1,1); Teuchos::SerialDenseMatrix<ordinal_type, value_type> pAp(1,1); Teuchos::SerialDenseMatrix<ordinal_type, value_type> Ap(n,1); value_type b; value_type a; while (resid > tolerance && k < max_iter){ Teuchos::SerialDenseMatrix<ordinal_type, value_type> z(r); //Solve Mz=r if (PrecNum != 0){ if (PrecNum == 1){ Stokhos::DiagPreconditioner<ordinal_type, value_type> precond(M); precond.ApplyInverse(r,z,prec_iter); } else if (PrecNum == 2){ Stokhos::JacobiPreconditioner<ordinal_type, value_type> precond(M); precond.ApplyInverse(r,z,2); } else if (PrecNum == 3){ Stokhos::GSPreconditioner<ordinal_type, value_type> precond(M,0); precond.ApplyInverse(r,z,1); } else if (PrecNum == 4){ Stokhos::SchurPreconditioner<ordinal_type, value_type> precond(M, order, m, diag); precond.ApplyInverse(r,z,prec_iter); } } rho.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, r, z, 0.0); if (k==0){ p.assign(z); rho.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, r, z, 0.0); } else { b=rho(0,0)/oldrho(0,0); p.scale(b); p+=z; } Ap.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, p, 0.0); pAp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, p, Ap, 0.0); a=rho(0,0)/pAp(0,0); Teuchos::SerialDenseMatrix<ordinal_type, value_type> scalep(p); scalep.scale(a); X+=scalep; Ap.scale(a); r-=Ap; oldrho.assign(rho); resid=r.normFrobenius(); k++; } //std::cout << "iteration count " << k << std::endl; return 0; }
//Mean-Based Preconditioned GMRES int pregmres(const Teuchos::SerialDenseMatrix<int, double> & A, const Teuchos::SerialDenseMatrix<int,double> & X,const Teuchos::SerialDenseMatrix<int,double> & B, int max_iter, double tolerance) { int n; int k; double resid; k=1; n=A.numRows(); std::cout << A << std::endl; Teuchos::SerialDenseMatrix<int, double> D(n,1); //Get diagonal entries of A for (int i=0; i<n; i++){ D(i,0)=A(i,i); } Teuchos::SerialDenseMatrix<int, double> Ax(n,1); Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<int, double> r0(B); r0-=Ax; resid=r0.normFrobenius(); //define vector v=r/norm(r) where r=b-Ax Teuchos::SerialDenseMatrix<int, double> v(n,1); r0.scale(1/resid); Teuchos::SerialDenseMatrix<int, double> h(1,1); //Matrix of orthog basis vectors V Teuchos::SerialDenseMatrix<int, double> V(n,1); //Set v=r0/norm(r0) to be 1st col of V for (int i=0; i<n; i++){ V(i,0)=r0(i,0); } //right hand side Teuchos::SerialDenseMatrix<int, double> bb(1,1); bb(0,0)=resid; Teuchos::SerialDenseMatrix<int, double> w(n,1); Teuchos::SerialDenseMatrix<int, double> c; Teuchos::SerialDenseMatrix<int, double> s; while (resid > tolerance && k < max_iter){ std::cout << "k = " << k << std::endl; h.reshape(k+1,k); //Arnoldi iteration(Gram-Schmidt ) V.reshape(n,k+1); //set vk to be kth col of V Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1); //Preconditioning step w=AMj(-1)vj w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1/D(k-1,0), A, vk, 0.0); Teuchos::SerialDenseMatrix<int, double> vi(n,1); Teuchos::SerialDenseMatrix<int, double> ip(1,1); for (int i=0; i<k; i++){ //set vi to be ith col of V Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i); //Calculate inner product ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0); h(i,k-1)= ip(0,0); //scale vi by h(i,k-1) vi.scale(ip(0,0)); w-=vi; } h(k,k-1)=w.normFrobenius(); w.scale(1.0/w.normFrobenius()); //add column vk+1=w to V for (int i=0; i<n; i++){ V(i,k)=w(i,0); } //Solve upper hessenberg least squares problem via Givens rotations //Compute previous Givens rotations for (int i=0; i<k-1; i++){ h(i,k-1)=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1); h(i+1,k-1)=-s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1); } //Compute next Givens rotations c.reshape(k,1); s.reshape(k,1); bb.reshape(k+1,1); double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1)); c(k-1,0)=h(k-1,k-1)/l; s(k-1,0)=h(k,k-1)/l; std::cout <<" h(k,k-1)= " << h(k,k-1) << std::endl; // Givens rotation on h and bb h(k-1,k-1)=l; h(k,k-1)=0; bb(k-1,0)=c(k-1,0)*bb(k-1,0); bb(k,0)=-s(k-1,0)*bb(k-1,0); //Determine residual resid = fabs(bb(k,0)); std::cout << "resid = " << resid << std::endl; k=k+1; } //Extract upper triangular square matrix bb.reshape(h.numRows()-1 ,1); //Solve linear system int info; Teuchos::LAPACK<int, double> lapack; lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); //Found y=Mx for (int i=0; i<k-1; i++){ bb(i,0)=bb(i,0)/D(i,0); } V.reshape(n,k-1); Teuchos::SerialDenseMatrix<int, double> ans(X); ans.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 1.0); std::cout << "ans= " << ans << std::endl; std::cout << "h= " << h << std::endl; return 0; }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: apply_direct( const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, const value_type& alpha, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, const value_type& beta, bool trans) const { if (trans) { TEUCHOS_ASSERT(input.numCols() <= A.numCols()); TEUCHOS_ASSERT(result.numCols() == A.numRows()); TEUCHOS_ASSERT(result.numRows() == input.numRows()); blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, input.numRows(), A.numRows(), input.numCols(), alpha, input.values(), input.stride(), A.values(), A.stride(), beta, result.values(), result.stride()); } else { TEUCHOS_ASSERT(input.numRows() <= A.numCols()); TEUCHOS_ASSERT(result.numRows() == A.numRows()); TEUCHOS_ASSERT(result.numCols() == input.numCols()); blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, A.numRows(), input.numCols(), input.numRows(), alpha, A.values(), A.stride(), input.values(), input.stride(), beta, result.values(), result.stride()); } }
ordinal_type Stokhos::MonomialGramSchmidtPCEBasis<ordinal_type, value_type>:: buildReducedBasis( ordinal_type max_p, value_type threshold, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& F, const Teuchos::Array<value_type>& weights, Teuchos::Array< Stokhos::MultiIndex<ordinal_type> >& terms_, Teuchos::Array<ordinal_type>& num_terms_, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Qp_, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Q_) { // Compute basis terms -- 2-D array giving powers for each linear index ordinal_type max_sz; CPBUtils::compute_terms(max_p, this->d, max_sz, terms_, num_terms_); // Compute B matrix -- monomials in F // for i=0,...,nqp-1 // for j=0,...,sz-1 // B(i,j) = F(i,1)^terms_[j][1] * ... * F(i,d)^terms_[j][d] // where sz is the total size of a basis up to order p and terms_[j] // is an array of powers for each term in the total-order basis ordinal_type nqp = weights.size(); SDM B(nqp, max_sz); for (ordinal_type i=0; i<nqp; i++) { for (ordinal_type j=0; j<max_sz; j++) { B(i,j) = 1.0; for (ordinal_type k=0; k<this->d; k++) B(i,j) *= std::pow(F(i,k), terms_[j][k]); } } // Rescale columns of B to have unit norm for (ordinal_type j=0; j<max_sz; j++) { value_type nrm = 0.0; for (ordinal_type i=0; i<nqp; i++) nrm += B(i,j)*B(i,j)*weights[i]; nrm = std::sqrt(nrm); for (ordinal_type i=0; i<nqp; i++) B(i,j) /= nrm; } // Compute our new basis -- each column of Q is the new basis evaluated // at the original quadrature points. Constraint pivoting so first d+1 // columns and included in Q. SDM R; Teuchos::Array<ordinal_type> piv(max_sz); for (int i=0; i<this->d+1; i++) piv[i] = 1; typedef Stokhos::OrthogonalizationFactory<ordinal_type,value_type> SOF; ordinal_type sz_ = SOF::createOrthogonalBasis( this->orthogonalization_method, threshold, this->verbose, B, weights, Q_, R, piv); // Compute Qp = A^T*W*Q SDM tmp(nqp, sz_); Qp_.reshape(this->pce_sz, sz_); for (ordinal_type i=0; i<nqp; i++) for (ordinal_type j=0; j<sz_; j++) tmp(i,j) = Q_(i,j)*weights[i]; ordinal_type ret = Qp_.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, A, tmp, 0.0); TEUCHOS_ASSERT(ret == 0); // It isn't clear that Qp is orthogonal, but if you derive the projection // matrix from the original space to the reduced, you end up with // Q^T*W*A = Qp^T return sz_; }
//GMRES int gmres(const Teuchos::SerialDenseMatrix<int, double> & A, Teuchos::SerialDenseMatrix<int,double> X,const Teuchos::SerialDenseMatrix<int,double> & B, int max_iter, double tolerance) { int n; int k; double resid; k=1; n=A.numRows(); std::cout << "A= " << A << std::endl; std::cout << "B= " << B << std::endl; //Teuchos::SerialDenseMatrix<int, double> Ax(n,1); //Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<int, double> r0(B); //r0-=Ax; resid=r0.normFrobenius(); std::cout << "resid= " << resid << std::endl; //define vector v=r/norm(r) where r=b-Ax r0.scale(1/resid); Teuchos::SerialDenseMatrix<int, double> h(1,1); //Matrix of orthog basis vectors V Teuchos::SerialDenseMatrix<int, double> V(n,1); //Set v=r0/norm(r0) to be 1st col of V for (int i=0; i<n; i++){ V(i,0)=r0(i,0); } //right hand side Teuchos::SerialDenseMatrix<int, double> bb(1,1); bb(0,0)=resid; Teuchos::SerialDenseMatrix<int, double> w(n,1); Teuchos::SerialDenseMatrix<int, double> c; Teuchos::SerialDenseMatrix<int, double> s; while (resid > tolerance && k < max_iter){ std::cout << "k = " << k << std::endl; h.reshape(k+1,k); //Arnoldi iteration(Gram-Schmidt ) V.reshape(n,k+1); //set vk to be kth col of V Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1); w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, vk, 0.0); Teuchos::SerialDenseMatrix<int, double> vi(n,1); Teuchos::SerialDenseMatrix<int, double> ip(1,1); for (int i=0; i<k; i++){ //set vi to be ith col of V Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i); //Calculate inner product ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0); h(i,k-1)= ip(0,0); //scale vi by h(i,k-1) vi.scale(ip(0,0)); w-=vi; } h(k,k-1)=w.normFrobenius(); w.scale(1.0/w.normFrobenius()); //add column vk+1=w to V for (int i=0; i<n; i++){ V(i,k)=w(i,0); } //Solve upper hessenberg least squares problem via Givens rotations //Compute previous Givens rotations for (int i=0; i<k-1; i++){ // double hi=h(i,k-1); // double hi1=h(i+1,k-1); // h(i,k-1)=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1); // h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1); // h(i,k-1)=c(i,0)*hi+s(i,0)*hi1; // h(i+1,k-1)=-1*s(i,0)*hi+c(i,0)*hi1; double q=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1); h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1); h(i,k-1)=q; } //Compute next Givens rotations c.reshape(k,1); s.reshape(k,1); bb.reshape(k+1,1); double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1)); c(k-1,0)=h(k-1,k-1)/l; s(k-1,0)=h(k,k-1)/l; std::cout << "c " << c(k-1,0)<<std::endl; std::cout << "s " << s(k-1,0)<<std::endl; // Givens rotation on h and bb // h(k-1,k-1)=l; // h(k,k-1)=0; double hk=h(k,k-1); double hk1=h(k-1,k-1); h(k-1,k-1)=c(k-1,0)*hk1+s(k-1,0)*hk; h(k,k-1)=-1*s(k-1,0)*hk1+c(k-1,0)*hk; std::cout << "l = " << l <<std::endl; std::cout << "h(k-1,k-1) = should be l " << h(k-1,k-1) <<std::endl; std::cout << "h(k,k-1) = should be 0 " << h(k,k-1) <<std::endl; bb(k,0)=-1*s(k-1,0)*bb(k-1,0); bb(k-1,0)=c(k-1,0)*bb(k-1,0); //Determine residual resid =fabs(bb(k,0)); std::cout << "resid = " << resid <<std::endl; k++; } //Extract upper triangular square matrix bb.reshape(h.numRows()-1 ,1); //Solve linear system int info; std::cout << "bb pre solve = " << bb << std::endl; std::cout << "h= " << h << std::endl; Teuchos::LAPACK<int, double> lapack; lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); V.reshape(n,k-1); std::cout << "V= " << V << std::endl; std::cout << "y= " << bb << std::endl; X.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 1.0); std::cout << "X= " << X << std::endl; //Check V is orthogoanl // Teuchos::SerialDenseMatrix<int, double> vtv(V); // vtv.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, V, V, 0.0); // std::cout << "Vtv" << vtv << std::endl; return 0; }
// CG int CG(const Teuchos::SerialDenseMatrix<int, double> & A, Teuchos::SerialDenseMatrix<int,double> X,const Teuchos::SerialDenseMatrix<int,double> & B, int max_iter, double tolerance, Stokhos::DiagPreconditioner<int,double> prec) { int n; int k=0; double resid; n=A.numRows(); std::cout << "A= " << A << std::endl; std::cout << "B= " << B << std::endl; Teuchos::SerialDenseMatrix<int, double> Ax(n,1); Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<int, double> r(B); r-=Ax; resid=r.normFrobenius(); Teuchos::SerialDenseMatrix<int, double> rho(1,1); Teuchos::SerialDenseMatrix<int, double> oldrho(1,1); Teuchos::SerialDenseMatrix<int, double> pAp(1,1); Teuchos::SerialDenseMatrix<int, double> Ap(n,1); double b; double a; Teuchos::SerialDenseMatrix<int, double> p(r); while (resid > tolerance && k < max_iter){ Teuchos::SerialDenseMatrix<int, double> z(r); //z=M-1r // prec.ApplyInverse(r,z); rho.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, r, z, 0.0); if (k==0){ p.assign(z); rho.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, r, z, 0.0); } else { b=rho(0,0)/oldrho(0,0); p.scale(b); p+=z; } Ap.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, p, 0.0); pAp.multiply(Teuchos::TRANS,Teuchos::NO_TRANS,1.0, p, Ap, 0.0); a=rho(0,0)/pAp(0,0); Teuchos::SerialDenseMatrix<int, double> scalep(p); scalep.scale(a); X+=scalep; Ap.scale(a); r-=Ap; oldrho.assign(rho); resid=r.normFrobenius(); k++; } std::cout << "X= " << X << std::endl; return 0; }
void DislocationDensity<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { Teuchos::SerialDenseMatrix<int, double> A; Teuchos::SerialDenseMatrix<int, double> X; Teuchos::SerialDenseMatrix<int, double> B; Teuchos::SerialDenseSolver<int, double> solver; A.shape(numNodes,numNodes); X.shape(numNodes,numNodes); B.shape(numNodes,numNodes); // construct Identity for RHS for (int i = 0; i < numNodes; ++i) B(i,i) = 1.0; for (int i=0; i < G.size() ; i++) G[i] = 0.0; // construct the node --> point operator for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) for (std::size_t qp=0; qp < numQPs; ++qp) A(qp,node) = BF(cell,node,qp); X = 0.0; solver.setMatrix( Teuchos::rcp( &A, false) ); solver.setVectors( Teuchos::rcp( &X, false ), Teuchos::rcp( &B, false ) ); // Solve the system A X = B to find A_inverse int status = 0; status = solver.factor(); status = solver.solve(); // compute nodal Fp nodalFp.initialize(0.0); for (std::size_t node=0; node < numNodes; ++node) for (std::size_t qp=0; qp < numQPs; ++qp) for (std::size_t i=0; i < numDims; ++i) for (std::size_t j=0; j < numDims; ++j) nodalFp(node,i,j) += X(node,qp) * Fp(cell,qp,i,j); // compute the curl using nodalFp curlFp.initialize(0.0); for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { curlFp(qp,0,0) += nodalFp(node,0,2) * GradBF(cell,node,qp,1) - nodalFp(node,0,1) * GradBF(cell,node,qp,2); curlFp(qp,0,1) += nodalFp(node,1,2) * GradBF(cell,node,qp,1) - nodalFp(node,1,1) * GradBF(cell,node,qp,2); curlFp(qp,0,2) += nodalFp(node,2,2) * GradBF(cell,node,qp,1) - nodalFp(node,2,1) * GradBF(cell,node,qp,2); curlFp(qp,1,0) += nodalFp(node,0,0) * GradBF(cell,node,qp,2) - nodalFp(node,0,2) * GradBF(cell,node,qp,0); curlFp(qp,1,1) += nodalFp(node,1,0) * GradBF(cell,node,qp,2) - nodalFp(node,1,2) * GradBF(cell,node,qp,0); curlFp(qp,1,2) += nodalFp(node,2,0) * GradBF(cell,node,qp,2) - nodalFp(node,2,2) * GradBF(cell,node,qp,0); curlFp(qp,2,0) += nodalFp(node,0,1) * GradBF(cell,node,qp,0) - nodalFp(node,0,0) * GradBF(cell,node,qp,1); curlFp(qp,2,1) += nodalFp(node,1,1) * GradBF(cell,node,qp,0) - nodalFp(node,1,0) * GradBF(cell,node,qp,1); curlFp(qp,2,2) += nodalFp(node,2,1) * GradBF(cell,node,qp,0) - nodalFp(node,2,0) * GradBF(cell,node,qp,1); } } for (std::size_t qp=0; qp < numQPs; ++qp) for (std::size_t i=0; i < numDims; ++i) for (std::size_t j=0; j < numDims; ++j) for (std::size_t k=0; k < numDims; ++k) G(cell,qp,i,j) += Fp(cell,qp,i,k) * curlFp(qp,k,j); } }
int Stokhos::GMRESDivisionExpansionStrategy<ordinal_type,value_type,node_type>:: GMRES(const Teuchos::SerialDenseMatrix<int, double> & A, Teuchos::SerialDenseMatrix<int,double> & X, const Teuchos::SerialDenseMatrix<int,double> & B, int max_iter, double tolerance, int prec_iter, int order, int dim, int PrecNum, const Teuchos::SerialDenseMatrix<int, double> & M, int diag) { int n = A.numRows(); int k = 1; double resid; Teuchos::SerialDenseMatrix<int, double> P(n,n); Teuchos::SerialDenseMatrix<int, double> Ax(n,1); Ax.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,1.0, A, X, 0.0); Teuchos::SerialDenseMatrix<int, double> r0(B); r0-=Ax; resid=r0.normFrobenius(); //define vector v=r/norm(r) where r=b-Ax Teuchos::SerialDenseMatrix<int, double> v(n,1); r0.scale(1/resid); Teuchos::SerialDenseMatrix<int, double> h(1,1); //Matrix of orthog basis vectors V Teuchos::SerialDenseMatrix<int, double> V(n,1); //Set v=r0/norm(r0) to be 1st col of V for (int i=0; i<n; i++) { V(i,0)=r0(i,0); } //right hand side Teuchos::SerialDenseMatrix<int, double> bb(1,1); bb(0,0)=resid; Teuchos::SerialDenseMatrix<int, double> w(n,1); Teuchos::SerialDenseMatrix<int, double> c; Teuchos::SerialDenseMatrix<int, double> s; while (resid > tolerance && k < max_iter) { h.reshape(k+1,k); //Arnoldi iteration(Gram-Schmidt ) V.reshape(n,k+1); //set vk to be kth col of V Teuchos::SerialDenseMatrix<int, double> vk(Teuchos::Copy, V, n,1,0,k-1); //Preconditioning step: solve Mz=vk Teuchos::SerialDenseMatrix<int, double> z(vk); if (PrecNum == 1) { Stokhos::DiagPreconditioner precond(M); precond.ApplyInverse(vk,z,prec_iter); } else if (PrecNum == 2) { Stokhos::JacobiPreconditioner precond(M); precond.ApplyInverse(vk,z,2); } else if (PrecNum == 3) { Stokhos::GSPreconditioner precond(M,1); precond.ApplyInverse(vk,z,1); } else if (PrecNum == 4) { Stokhos::SchurPreconditioner precond(M, order, dim, diag); precond.ApplyInverse(vk,z,prec_iter); } w.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1, A, z, 0.0); Teuchos::SerialDenseMatrix<int, double> vi(n,1); Teuchos::SerialDenseMatrix<int, double> ip(1,1); for (int i=0; i<k; i++) { //set vi to be ith col of V Teuchos::SerialDenseMatrix<int, double> vi(Teuchos::Copy, V, n,1,0,i); //Calculate inner product ip.multiply(Teuchos::TRANS, Teuchos::NO_TRANS, 1.0, vi, w, 0.0); h(i,k-1)= ip(0,0); //scale vi by h(i,k-1) vi.scale(ip(0,0)); w-=vi; } h(k,k-1)=w.normFrobenius(); w.scale(1.0/h(k,k-1)); //add column vk+1=w to V for (int i=0; i<n; i++) { V(i,k)=w(i,0); } //Solve upper hessenberg least squares problem via Givens rotations //Compute previous Givens rotations for (int i=0; i<k-1; i++) { double q=c(i,0)*h(i,k-1)+s(i,0)*h(i+1,k-1); h(i+1,k-1)=-1*s(i,0)*h(i,k-1)+c(i,0)*h(i+1,k-1); h(i,k-1)=q; } //Compute next Givens rotations c.reshape(k,1); s.reshape(k,1); bb.reshape(k+1,1); double l = sqrt(h(k-1,k-1)*h(k-1,k-1)+h(k,k-1)*h(k,k-1)); c(k-1,0)=h(k-1,k-1)/l; s(k-1,0)=h(k,k-1)/l; // Givens rotation on h and bb h(k-1,k-1)=l; h(k,k-1)=0; bb(k,0)=-s(k-1,0)*bb(k-1,0); bb(k-1,0)=c(k-1,0)*bb(k-1,0); //Determine residual resid = fabs(bb(k,0)); k++; } //Extract upper triangular square matrix bb.reshape(h.numRows()-1 ,1); //Solve linear system int info; Teuchos::LAPACK<int, double> lapack; lapack.TRTRS('U', 'N', 'N', h.numRows()-1, 1, h.values(), h.stride(), bb.values(), bb.stride(),&info); Teuchos::SerialDenseMatrix<int, double> ans(X); V.reshape(n,k-1); ans.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, V, bb, 0.0); if (PrecNum == 1) { Stokhos::DiagPreconditioner precond(M); precond.ApplyInverse(ans,ans,prec_iter); } else if (PrecNum == 2) { Stokhos::JacobiPreconditioner precond(M); precond.ApplyInverse(ans,ans,2); } else if (PrecNum == 3) { Stokhos::GSPreconditioner precond(M,1); precond.ApplyInverse(ans,ans,1); } else if (PrecNum == 4) { Stokhos::SchurPreconditioner precond(M, order, dim, diag); precond.ApplyInverse(ans,ans,prec_iter); } X+=ans; std::cout << "iteration count= " << k-1 << std::endl; return 0; }
ordinal_type Stokhos::MonomialProjGramSchmidtPCEBasis<ordinal_type, value_type>:: buildReducedBasis( ordinal_type max_p, value_type threshold, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& F, const Teuchos::Array<value_type>& weights, Teuchos::Array< Stokhos::MultiIndex<ordinal_type> >& terms_, Teuchos::Array<ordinal_type>& num_terms_, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Qp_, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& Q_) { // Compute basis terms -- 2-D array giving powers for each linear index ordinal_type max_sz; CPBUtils::compute_terms(max_p, this->d, max_sz, terms_, num_terms_); // Compute B matrix -- monomials in F // for i=0,...,nqp-1 // for j=0,...,sz-1 // B(i,j) = F(i,1)^terms_[j][1] * ... * F(i,d)^terms_[j][d] // where sz is the total size of a basis up to order p and terms_[j] // is an array of powers for each term in the total-order basis ordinal_type nqp = weights.size(); SDM B(nqp, max_sz); for (ordinal_type i=0; i<nqp; i++) { for (ordinal_type j=0; j<max_sz; j++) { B(i,j) = 1.0; for (ordinal_type k=0; k<this->d; k++) B(i,j) *= std::pow(F(i,k), terms_[j][k]); } } // Project B into original basis -- should use SPAM for this SDM Bp(this->pce_sz, max_sz); const Teuchos::Array<value_type>& basis_norms = this->pce_basis->norm_squared(); for (ordinal_type i=0; i<this->pce_sz; i++) { for (ordinal_type j=0; j<max_sz; j++) { Bp(i,j) = 0.0; for (ordinal_type k=0; k<nqp; k++) Bp(i,j) += weights[k]*B(k,j)*A(k,i); Bp(i,j) /= basis_norms[i]; } } // Rescale columns of Bp to have unit norm for (ordinal_type j=0; j<max_sz; j++) { value_type nrm = 0.0; for (ordinal_type i=0; i<this->pce_sz; i++) nrm += Bp(i,j)*Bp(i,j)*basis_norms[i]; nrm = std::sqrt(nrm); for (ordinal_type i=0; i<this->pce_sz; i++) Bp(i,j) /= nrm; } // Compute our new basis -- each column of Qp is the coefficients of the // new basis in the original basis. Constraint pivoting so first d+1 // columns and included in Qp. Teuchos::Array<value_type> w(this->pce_sz, 1.0); SDM R; Teuchos::Array<ordinal_type> piv(max_sz); for (int i=0; i<this->d+1; i++) piv[i] = 1; typedef Stokhos::OrthogonalizationFactory<ordinal_type,value_type> SOF; ordinal_type sz_ = SOF::createOrthogonalBasis( this->orthogonalization_method, threshold, this->verbose, Bp, w, Qp_, R, piv); // Evaluate new basis at original quadrature points Q_.reshape(nqp, sz_); ordinal_type ret = Q_.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, 1.0, A, Qp_, 0.0); TEUCHOS_ASSERT(ret == 0); return sz_; }
int main (int argc, char *argv[]) { using namespace Anasazi; using Teuchos::RCP; using Teuchos::rcp; using std::endl; #ifdef HAVE_MPI // Initialize MPI MPI_Init (&argc, &argv); #endif // HAVE_MPI // Create an Epetra communicator #ifdef HAVE_MPI Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // HAVE_MPI // Create an Anasazi output manager BasicOutputManager<double> printer; printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl; // Get the sorting std::string from the command line std::string which ("LM"); Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM)."); if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Dimension of the matrix // // Discretization points in any one direction. const int nx = 10; // Size of matrix nx*nx const int NumGlobalElements = nx*nx; // Construct a Map that puts approximately the same number of // equations on each process. Epetra_Map Map (NumGlobalElements, 0, Comm); // Get update list and number of local equations from newly created Map. int NumMyElements = Map.NumMyElements (); std::vector<int> MyGlobalElements (NumMyElements); Map.MyGlobalElements (&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra // matrix. NumNz[i] is the number of OFF-DIAGONAL terms for the // i-th global equation on this process. std::vector<int> NumNz (NumMyElements); /* We are building a matrix of block structure: | T -I | |-I T -I | | -I T | | ... -I| | -I T| where each block is dimension nx by nx and the matrix is on the order of nx*nx. The block T is a tridiagonal matrix. */ for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 || MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) { NumNz[i] = 3; } else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) || MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) { NumNz[i] = 4; } else { NumNz[i] = 5; } } // Create an Epetra_Matrix RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, &NumNz[0])); // Compute coefficients for discrete convection-diffution operator const double one = 1.0; std::vector<double> Values(4); std::vector<int> Indices(4); double rho = 0.0; double h = one /(nx+1); double h2 = h*h; double c = 5.0e-01*rho/ h; Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2; double diag = 4.0 / h2; int NumEntries; for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i]==0) { Indices[0] = 1; Indices[1] = nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == nx*(nx-1)) { Indices[0] = nx*(nx-1)+1; Indices[1] = nx*(nx-2); NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == nx-1) { Indices[0] = nx-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = 2*nx-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = nx*(nx-1)-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] < nx) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] > nx*(nx-1)) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i]%nx == 0) { Indices[0] = MyGlobalElements[i]+1; Indices[1] = MyGlobalElements[i]-nx; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if ((MyGlobalElements[i]+1)%nx == 0) { Indices[0] = MyGlobalElements[i]-nx; Indices[1] = MyGlobalElements[i]+nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = MyGlobalElements[i]-1; NumEntries = 1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; Indices[3] = MyGlobalElements[i]+nx; NumEntries = 4; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } // Put in the diagonal entry int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } // Finish up int info = A->FillComplete (); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "A->FillComplete() returned info = " << info << " != 0." ); A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks // Create a identity matrix for the temporary mass matrix RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, 1)); for (int i=0; i<NumMyElements; i++) { Values[0] = one; Indices[0] = i; NumEntries = 1; info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "M->InsertGlobalValues() returned info = " << info << " != 0." ); } // Finish up info = M->FillComplete (); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "M->FillComplete() returned info = " << info << " != 0." ); M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks //************************************ // Call the LOBPCG solver manager //*********************************** // // Variables used for the LOBPCG Method const int nev = 10; const int blockSize = 5; const int maxIters = 500; const double tol = 1.0e-8; typedef Epetra_MultiVector MV; typedef Epetra_Operator OP; typedef MultiVecTraits<double, Epetra_MultiVector> MVT; // Create an Epetra_MultiVector for an initial vector to start the // solver. Note: This needs to have the same number of columns as // the blocksize. RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize)); ivec->Random (); // fill the initial vector with random values // Create the eigenproblem. RCP<BasicEigenproblem<double, MV, OP> > MyProblem = rcp (new BasicEigenproblem<double, MV, OP> (A, ivec)); // Inform the eigenproblem that the operator A is symmetric MyProblem->setHermitian (true); // Set the number of eigenvalues requested MyProblem->setNEV (nev); // Tell the eigenproblem that you are finishing passing it information. const bool success = MyProblem->setProblem (); if (! success) { printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n"); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Create parameter list to pass into the solver manager Teuchos::ParameterList MyPL; MyPL.set ("Which", which); MyPL.set ("Block Size", blockSize); MyPL.set ("Maximum Iterations", maxIters); MyPL.set ("Convergence Tolerance", tol); MyPL.set ("Full Ortho", true); MyPL.set ("Use Locking", true); // Create the solver manager LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL); // Solve the problem ReturnType returnCode = MySolverMan.solve (); // Get the eigenvalues and eigenvectors from the eigenproblem Eigensolution<double,MV> sol = MyProblem->getSolution (); std::vector<Value<double> > evals = sol.Evals; RCP<MV> evecs = sol.Evecs; // Compute residuals. std::vector<double> normR (sol.numVecs); if (sol.numVecs > 0) { Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs); Epetra_MultiVector tempAevec (Map, sol.numVecs ); T.putScalar (0.0); for (int i = 0; i < sol.numVecs; ++i) { T(i,i) = evals[i].realpart; } A->Apply (*evecs, tempAevec); MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec); MVT::MvNorm (tempAevec, normR); } // Print the results std::ostringstream os; os.setf (std::ios_base::right, std::ios_base::adjustfield); os << "Solver manager returned " << (returnCode == Converged ? "converged." : "unconverged.") << endl; os << endl; os << "------------------------------------------------------" << endl; os << std::setw(16) << "Eigenvalue" << std::setw(18) << "Direct Residual" << endl; os << "------------------------------------------------------" << endl; for (int i = 0; i < sol.numVecs; ++i) { os << std::setw(16) << evals[i].realpart << std::setw(18) << normR[i] / evals[i].realpart << endl; } os << "------------------------------------------------------" << endl; printer.print (Errors, os.str ()); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return 0; }
void factorExplicit (Kokkos::MultiVector<Scalar, NodeType>& A, Kokkos::MultiVector<Scalar, NodeType>& Q, Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R, const bool contiguousCacheBlocks, const bool forceNonnegativeDiagonal=false) { using Teuchos::asSafe; typedef Kokkos::MultiVector<Scalar, NodeType> KMV; // Tsqr currently likes LocalOrdinal ordinals, but // Kokkos::MultiVector has size_t ordinals. Do conversions // here. // // Teuchos::asSafe() can do safe conversion (e.g., checking for // overflow when casting to a narrower integer type), if a // custom specialization is defined for // Teuchos::ValueTypeConversionTraits<size_t, LocalOrdinal>. // Otherwise, this has the same (potentially) unsafe effect as // static_cast<LocalOrdinal>(...) would have. const LocalOrdinal A_numRows = asSafe<LocalOrdinal> (A.getNumRows()); const LocalOrdinal A_numCols = asSafe<LocalOrdinal> (A.getNumCols()); const LocalOrdinal A_stride = asSafe<LocalOrdinal> (A.getStride()); const LocalOrdinal Q_numRows = asSafe<LocalOrdinal> (Q.getNumRows()); const LocalOrdinal Q_numCols = asSafe<LocalOrdinal> (Q.getNumCols()); const LocalOrdinal Q_stride = asSafe<LocalOrdinal> (Q.getStride()); // Sanity checks for matrix dimensions if (A_numRows < A_numCols) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A has " << A_numRows << " local rows, and " << A_numCols << " columns. The input " "matrix must have at least as many rows on each processor as " "there are columns."; throw std::invalid_argument(os.str()); } else if (A_numRows != Q_numRows) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A and output matrix Q " "must have the same number of rows. A has " << A_numRows << " rows" " and Q has " << Q_numRows << " rows."; throw std::invalid_argument(os.str()); } else if (R.numRows() < R.numCols()) { std::ostringstream os; os << "In Tsqr::factorExplicit: output matrix R must have at least " "as many rows as columns. R has " << R.numRows() << " rows and " << R.numCols() << " columns."; throw std::invalid_argument(os.str()); } else if (A_numCols != R.numCols()) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A and output matrix R " "must have the same number of columns. A has " << A_numCols << " columns and R has " << R.numCols() << " columns."; throw std::invalid_argument(os.str()); } // Check for quick exit, based on matrix dimensions if (Q_numCols == 0) return; // Hold on to nonconst views of A and Q. This will make TSQR // correct (if perhaps inefficient) for all possible Kokkos Node // types, even GPU nodes. Teuchos::ArrayRCP<scalar_type> A_ptr = A.getValuesNonConst(); Teuchos::ArrayRCP<scalar_type> Q_ptr = Q.getValuesNonConst(); R.putScalar (STS::zero()); NodeOutput nodeResults = nodeTsqr_->factor (A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride, R.values(), R.stride(), contiguousCacheBlocks); // FIXME (mfh 19 Oct 2010) Replace actions on raw pointer with // actions on the Kokkos::MultiVector or at least the ArrayRCP. nodeTsqr_->fill_with_zeros (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride, contiguousCacheBlocks); matview_type Q_rawView (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride); matview_type Q_top_block = nodeTsqr_->top_block (Q_rawView, contiguousCacheBlocks); if (Q_top_block.nrows() < R.numCols()) { std::ostringstream os; os << "The top block of Q has too few rows. This means that the " << "the intranode TSQR implementation has a bug in its top_block" << "() method. The top block should have at least " << R.numCols() << " rows, but instead has only " << Q_top_block.ncols() << " rows."; throw std::logic_error (os.str()); } { matview_type Q_top (R.numCols(), Q_numCols, Q_top_block.get(), Q_top_block.lda()); matview_type R_view (R.numRows(), R.numCols(), R.values(), R.stride()); distTsqr_->factorExplicit (R_view, Q_top, forceNonnegativeDiagonal); } nodeTsqr_->apply (ApplyType::NoTranspose, A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride, nodeResults, Q_numCols, Q_ptr.getRawPtr(), Q_stride, contiguousCacheBlocks); // If necessary, force the R factor to have a nonnegative diagonal. if (forceNonnegativeDiagonal && ! QR_produces_R_factor_with_nonnegative_diagonal()) { details::NonnegDiagForcer<LocalOrdinal, Scalar, STS::isComplex> forcer; matview_type Q_mine (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride); matview_type R_mine (R.numRows(), R.numCols(), R.values(), R.stride()); forcer.force (Q_mine, R_mine); } // "Commit" the changes to the multivector. A_ptr = Teuchos::null; Q_ptr = Teuchos::null; }