/// \brief Verify the result of the "thin" QR factorization \f$A = QR\f$. /// /// This method returns a list of three magnitudes: /// - \f$\| A - QR \|_F\f$ /// - \f$\|I - Q^* Q\|_F\f$ /// - \f$\|A\|_F\f$ /// /// The notation $\f\| X \|\f$ denotes the Frobenius norm /// (square root of sum of squares) of a matrix \f$X\f$. /// Returning the Frobenius norm of \f$A\f$ allows you to scale /// or not scale the residual \f$\|A - QR\|\f$ as you prefer. virtual std::vector< magnitude_type > verify (const multivector_type& A, const multivector_type& Q, const Teuchos::SerialDenseMatrix< local_ordinal_type, scalar_type >& R) { using Teuchos::ArrayRCP; local_ordinal_type nrowsLocal_A, ncols_A, LDA; local_ordinal_type nrowsLocal_Q, ncols_Q, LDQ; fetchDims (A, nrowsLocal_A, ncols_A, LDA); fetchDims (Q, nrowsLocal_Q, ncols_Q, LDQ); if (nrowsLocal_A != nrowsLocal_Q) throw std::runtime_error ("A and Q must have same number of rows"); else if (ncols_A != ncols_Q) throw std::runtime_error ("A and Q must have same number of columns"); else if (ncols_A != R.numCols()) throw std::runtime_error ("A and R must have same number of columns"); else if (R.numRows() < R.numCols()) throw std::runtime_error ("R must have no fewer rows than columns"); // Const views suffice for verification ArrayRCP< const scalar_type > A_ptr = fetchConstView (A); ArrayRCP< const scalar_type > Q_ptr = fetchConstView (Q); return global_verify (nrowsLocal_A, ncols_A, A_ptr.get(), LDA, Q_ptr.get(), LDQ, R.values(), R.stride(), pScalarMessenger_.get()); }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: transformPCE2QP_smolyak( const value_type& alpha, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, const value_type& beta, bool trans) const { Teuchos::SerialDenseMatrix<ordinal_type,value_type> op_input, op_result; result.scale(beta); for (ordinal_type i=0; i<operators.size(); i++) { Teuchos::RCP<operator_type> op = operators[i]; if (trans) { op_input.reshape(input.numRows(), op->coeff_size()); op_result.reshape(result.numRows(), op->point_size()); } else { op_input.reshape(op->coeff_size(), input.numCols()); op_result.reshape(op->point_size(), result.numCols()); } gather(scatter_maps[i], input, trans, op_input); op->transformPCE2QP(smolyak_coeffs[i], op_input, op_result, 0.0, trans); scatter(gather_maps[i], op_result, trans, result); } }
// Update *this with alpha * A * B + beta * (*this). void MvTimesMatAddMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType> &A, const Teuchos::SerialDenseMatrix<int, ScalarType> &B, ScalarType beta) { assert (Length_ == A.GetVecLength()); assert (B.numRows() == A.GetNumberVecs()); assert (B.numCols() <= NumberVecs_); MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); assert(MyA!=NULL); if ((*this)[0] == (*MyA)[0]) { // If this == A, then need additional storage ... // This situation is a bit peculiar but it may be required by // certain algorithms. std::vector<ScalarType> tmp(NumberVecs_); for (int i = 0 ; i < Length_ ; ++i) { for (int v = 0; v < A.GetNumberVecs() ; ++v) { tmp[v] = (*MyA)(i, v); } for (int v = 0 ; v < B.numCols() ; ++v) { (*this)(i, v) *= beta; ScalarType res = Teuchos::ScalarTraits<ScalarType>::zero(); for (int j = 0 ; j < A.GetNumberVecs() ; ++j) { res += tmp[j] * B(j, v); } (*this)(i, v) += alpha * res; } } } else { for (int i = 0 ; i < Length_ ; ++i) { for (int v = 0 ; v < B.numCols() ; ++v) { (*this)(i, v) *= beta; ScalarType res = 0.0; for (int j = 0 ; j < A.GetNumberVecs() ; ++j) { res += (*MyA)(i, j) * B(j, v); } (*this)(i, v) += alpha * res; } } } }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: apply_direct( const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& A, const value_type& alpha, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result, const value_type& beta, bool trans) const { if (trans) { TEUCHOS_ASSERT(input.numCols() <= A.numCols()); TEUCHOS_ASSERT(result.numCols() == A.numRows()); TEUCHOS_ASSERT(result.numRows() == input.numRows()); blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, input.numRows(), A.numRows(), input.numCols(), alpha, input.values(), input.stride(), A.values(), A.stride(), beta, result.values(), result.stride()); } else { TEUCHOS_ASSERT(input.numRows() <= A.numCols()); TEUCHOS_ASSERT(result.numRows() == A.numRows()); TEUCHOS_ASSERT(result.numCols() == input.numCols()); blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, A.numRows(), input.numCols(), input.numRows(), alpha, A.values(), A.stride(), input.values(), input.stride(), beta, result.values(), result.stride()); } }
void assembleIRKState( const int stageIndex, const Teuchos::SerialDenseMatrix<int,Scalar> &A_in, const Scalar dt, const Thyra::VectorBase<Scalar> &x_base, const Thyra::ProductVectorBase<Scalar> &x_stage_bar, Teuchos::Ptr<Thyra::VectorBase<Scalar> > x_out_ptr ) { typedef ScalarTraits<Scalar> ST; const int numStages_in = A_in.numRows(); TEUCHOS_ASSERT_IN_RANGE_UPPER_EXCLUSIVE( stageIndex, 0, numStages_in ); TEUCHOS_ASSERT_EQUALITY( A_in.numRows(), numStages_in ); TEUCHOS_ASSERT_EQUALITY( A_in.numCols(), numStages_in ); TEUCHOS_ASSERT_EQUALITY( x_stage_bar.productSpace()->numBlocks(), numStages_in ); Thyra::VectorBase<Scalar>& x_out = *x_out_ptr; V_V( outArg(x_out), x_base ); for ( int j = 0; j < numStages_in; ++j ) { Vp_StV( outArg(x_out), dt * A_in(stageIndex,j), *x_stage_bar.getVectorBlock(j) ); } }
void EpetraMultiVec::MvTimesMatAddMv ( double alpha, const MultiVec<double>& A, const Teuchos::SerialDenseMatrix<int,double>& B, double beta ) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); TEUCHOS_TEST_FOR_EXCEPTION( A_vec==NULL, std::invalid_argument, "Anasazi::EpetraMultiVec::SetBlocks() cast of MultiVec<double> to EpetraMultiVec failed."); TEUCHOS_TEST_FOR_EXCEPTION( Multiply( 'N', 'N', alpha, *A_vec, B_Pvec, beta ) != 0, EpetraMultiVecFailure, "Anasazi::EpetraMultiVec::MvTimesMatAddMv() call to Epetra_MultiVec::Multiply() returned a nonzero value."); }
void EpetraMultiVec::MvTransMv ( const double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B) const { EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); int info = B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 ); TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTransMv call to Multiply() returned a nonzero value."); } }
// Generic BLAS level 3 matrix multiplication // \f$\text{this}\leftarrow \alpha A B+\beta\text{this}\f$ void gemm(const Real alpha, const MV& A, const Teuchos::SerialDenseMatrix<int,Real> &B, const Real beta) { // Scale this by beta this->scale(beta); for(int i=0;i<B.numRows();++i) { for(int j=0;j<B.numCols();++j) { mvec_[j]->axpy(alpha*B(i,j),*A.getVector(i)); } } }
void EpetraMultiVec::MvTimesMatAddMv ( const double alpha, const MultiVec<double>& A, const Teuchos::SerialDenseMatrix<int,double>& B, const double beta ) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); TEST_FOR_EXCEPTION(A_vec==NULL, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTimesMatAddMv cast from Belos::MultiVec<> to Belos::EpetraMultiVec failed."); int info = Multiply( 'N', 'N', alpha, *A_vec, B_Pvec, beta ); TEST_FOR_EXCEPTION(info!=0, EpetraMultiVecFailure, "Belos::EpetraMultiVec::MvTimesMatAddMv call to Multiply() returned a nonzero value."); }
void Stokhos::SmolyakPseudoSpectralOperator<ordinal_type,value_type,point_compare_type>:: scatter( const Teuchos::Array<ordinal_type>& map, const Teuchos::SerialDenseMatrix<ordinal_type,value_type>& input, bool trans, Teuchos::SerialDenseMatrix<ordinal_type,value_type>& result) const { if (trans) { for (ordinal_type j=0; j<map.size(); j++) for (ordinal_type i=0; i<input.numRows(); i++) result(i,map[j]) += input(i,j); } else { for (ordinal_type j=0; j<input.numCols(); j++) for (ordinal_type i=0; i<map.size(); i++) result(map[i],j) += input(i,j); } }
void EpetraMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , ConjType conj #endif ) const { EpetraMultiVec *A_vec = dynamic_cast<EpetraMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Map().Comm()); Epetra_MultiVector B_Pvec(View, LocalMap, B.values(), B.stride(), B.numCols()); TEUCHOS_TEST_FOR_EXCEPTION( B_Pvec.Multiply( 'T', 'N', alpha, *A_vec, *this, 0.0 ) != 0, EpetraMultiVecFailure, "Anasazi::EpetraMultiVec::MvTransMv() call to Epetra_MultiVec::Multiply() returned a nonzero value."); } }
// Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this). void MvTransMv (ScalarType alpha, const Anasazi::MultiVec<ScalarType>& A, Teuchos::SerialDenseMatrix< int, ScalarType >& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , Anasazi::ConjType conj #endif ) const { MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Anasazi::MultiVec<ScalarType> &>(A)); assert (MyA != 0); assert (A.GetVecLength() == Length_); assert (NumberVecs_ <= B.numCols()); assert (A.GetNumberVecs() <= B.numRows()); #ifdef HAVE_ANASAZI_EXPERIMENTAL if (conj == Anasazi::CONJ) { #endif for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w); } B(v, w) = alpha * value; } } #ifdef HAVE_ANASAZI_EXPERIMENTAL } else { for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += (*MyA)(i, v) * (*this)(i, w); } B(v, w) = alpha * value; } } } #endif }
// Compute a dense matrix B through the matrix-matrix multiply alpha * A^H * (*this). void MvTransMv (const ScalarType alpha, const Belos::MultiVec<ScalarType>& A, Teuchos::SerialDenseMatrix< int, ScalarType >& B) const { MyMultiVec* MyA; MyA = dynamic_cast<MyMultiVec*>(&const_cast<Belos::MultiVec<ScalarType> &>(A)); TEUCHOS_ASSERT(MyA != NULL); assert (A.GetGlobalLength() == Length_); assert (NumberVecs_ <= B.numCols()); assert (A.GetNumberVecs() <= B.numRows()); for (int v = 0 ; v < A.GetNumberVecs() ; ++v) { for (int w = 0 ; w < NumberVecs_ ; ++w) { ScalarType value = 0.0; for (int i = 0 ; i < Length_ ; ++i) { value += Teuchos::ScalarTraits<ScalarType>::conjugate((*MyA)(i, v)) * (*this)(i, w); } B(v, w) = alpha * value; } } }
/*! \brief Update \c mv with \f$ \alpha A B + \beta mv \f$. */ static void MvTimesMatAddMv( const double alpha, const _MV & A, const Teuchos::SerialDenseMatrix<int,double>& B, const double beta, _MV & mv ) { // Out::os() << "MvTimesMatAddMv()" << endl; int n = B.numCols(); // Out::os() << "B.numCols()=" << n << endl; TEST_FOR_EXCEPT(mv.size() != n); for (int j=0; j<mv.size(); j++) { Vector<double> tmp; if (beta==one()) { tmp = mv[j].copy(); } else if (beta==zero()) { tmp = mv[j].copy(); tmp.setToConstant(zero()); } else { tmp = beta * mv[j]; } if (alpha != zero()) { for (int i=0; i<A.size(); i++) { tmp = tmp + alpha*B(i,j)*A[i]; } } mv[j].acceptCopyOf(tmp); } }
void EpetraOpMultiVec::MvTransMv ( double alpha, const MultiVec<double>& A, Teuchos::SerialDenseMatrix<int,double>& B #ifdef HAVE_ANASAZI_EXPERIMENTAL , ConjType conj #endif ) const { EpetraOpMultiVec *A_vec = dynamic_cast<EpetraOpMultiVec *>(&const_cast<MultiVec<double> &>(A)); if (A_vec) { Epetra_LocalMap LocalMap(B.numRows(), 0, Epetra_MV->Map().Comm()); Epetra_MultiVector B_Pvec(Epetra_DataAccess::View, LocalMap, B.values(), B.stride(), B.numCols()); int info = Epetra_OP->Apply( *Epetra_MV, *Epetra_MV_Temp ); TEUCHOS_TEST_FOR_EXCEPTION( info != 0, EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTransMv(): Error returned from Epetra_Operator::Apply()" ); TEUCHOS_TEST_FOR_EXCEPTION( B_Pvec.Multiply( 'T', 'N', alpha, *(A_vec->GetEpetraMultiVector()), *Epetra_MV_Temp, 0.0 ) != 0, EpetraSpecializedMultiVecFailure, "Anasazi::EpetraOpMultiVec::MvTransMv() call to Epetra_MultiVector::Multiply() returned a nonzero value."); } }
void factorExplicit (Kokkos::MultiVector<Scalar, NodeType>& A, Kokkos::MultiVector<Scalar, NodeType>& Q, Teuchos::SerialDenseMatrix<LocalOrdinal, Scalar>& R, const bool contiguousCacheBlocks, const bool forceNonnegativeDiagonal=false) { using Teuchos::asSafe; typedef Kokkos::MultiVector<Scalar, NodeType> KMV; // Tsqr currently likes LocalOrdinal ordinals, but // Kokkos::MultiVector has size_t ordinals. Do conversions // here. // // Teuchos::asSafe() can do safe conversion (e.g., checking for // overflow when casting to a narrower integer type), if a // custom specialization is defined for // Teuchos::ValueTypeConversionTraits<size_t, LocalOrdinal>. // Otherwise, this has the same (potentially) unsafe effect as // static_cast<LocalOrdinal>(...) would have. const LocalOrdinal A_numRows = asSafe<LocalOrdinal> (A.getNumRows()); const LocalOrdinal A_numCols = asSafe<LocalOrdinal> (A.getNumCols()); const LocalOrdinal A_stride = asSafe<LocalOrdinal> (A.getStride()); const LocalOrdinal Q_numRows = asSafe<LocalOrdinal> (Q.getNumRows()); const LocalOrdinal Q_numCols = asSafe<LocalOrdinal> (Q.getNumCols()); const LocalOrdinal Q_stride = asSafe<LocalOrdinal> (Q.getStride()); // Sanity checks for matrix dimensions if (A_numRows < A_numCols) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A has " << A_numRows << " local rows, and " << A_numCols << " columns. The input " "matrix must have at least as many rows on each processor as " "there are columns."; throw std::invalid_argument(os.str()); } else if (A_numRows != Q_numRows) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A and output matrix Q " "must have the same number of rows. A has " << A_numRows << " rows" " and Q has " << Q_numRows << " rows."; throw std::invalid_argument(os.str()); } else if (R.numRows() < R.numCols()) { std::ostringstream os; os << "In Tsqr::factorExplicit: output matrix R must have at least " "as many rows as columns. R has " << R.numRows() << " rows and " << R.numCols() << " columns."; throw std::invalid_argument(os.str()); } else if (A_numCols != R.numCols()) { std::ostringstream os; os << "In Tsqr::factorExplicit: input matrix A and output matrix R " "must have the same number of columns. A has " << A_numCols << " columns and R has " << R.numCols() << " columns."; throw std::invalid_argument(os.str()); } // Check for quick exit, based on matrix dimensions if (Q_numCols == 0) return; // Hold on to nonconst views of A and Q. This will make TSQR // correct (if perhaps inefficient) for all possible Kokkos Node // types, even GPU nodes. Teuchos::ArrayRCP<scalar_type> A_ptr = A.getValuesNonConst(); Teuchos::ArrayRCP<scalar_type> Q_ptr = Q.getValuesNonConst(); R.putScalar (STS::zero()); NodeOutput nodeResults = nodeTsqr_->factor (A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride, R.values(), R.stride(), contiguousCacheBlocks); // FIXME (mfh 19 Oct 2010) Replace actions on raw pointer with // actions on the Kokkos::MultiVector or at least the ArrayRCP. nodeTsqr_->fill_with_zeros (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride, contiguousCacheBlocks); matview_type Q_rawView (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride); matview_type Q_top_block = nodeTsqr_->top_block (Q_rawView, contiguousCacheBlocks); if (Q_top_block.nrows() < R.numCols()) { std::ostringstream os; os << "The top block of Q has too few rows. This means that the " << "the intranode TSQR implementation has a bug in its top_block" << "() method. The top block should have at least " << R.numCols() << " rows, but instead has only " << Q_top_block.ncols() << " rows."; throw std::logic_error (os.str()); } { matview_type Q_top (R.numCols(), Q_numCols, Q_top_block.get(), Q_top_block.lda()); matview_type R_view (R.numRows(), R.numCols(), R.values(), R.stride()); distTsqr_->factorExplicit (R_view, Q_top, forceNonnegativeDiagonal); } nodeTsqr_->apply (ApplyType::NoTranspose, A_numRows, A_numCols, A_ptr.getRawPtr(), A_stride, nodeResults, Q_numCols, Q_ptr.getRawPtr(), Q_stride, contiguousCacheBlocks); // If necessary, force the R factor to have a nonnegative diagonal. if (forceNonnegativeDiagonal && ! QR_produces_R_factor_with_nonnegative_diagonal()) { details::NonnegDiagForcer<LocalOrdinal, Scalar, STS::isComplex> forcer; matview_type Q_mine (Q_numRows, Q_numCols, Q_ptr.getRawPtr(), Q_stride); matview_type R_mine (R.numRows(), R.numCols(), R.values(), R.stride()); forcer.force (Q_mine, R_mine); } // "Commit" the changes to the multivector. A_ptr = Teuchos::null; Q_ptr = Teuchos::null; }