예제 #1
    /// \brief Return a nonconstant view of the input MultiVector.
    /// TSQR represents the local (to each MPI process) part of a
    /// multivector as a KokkosClassic::MultiVector (KMV), which gives a
    /// nonconstant view of the original multivector's data.  This
    /// class method tells TSQR how to get the KMV from the input
    /// multivector.  The KMV is not a persistent view of the data;
    /// its scope is contained within the scope of the multivector.
    /// \warning TSQR does not currently support multivectors with
    ///   nonconstant stride.  This method will raise an exception
    ///   if A has nonconstant stride.
    static KokkosClassic::MultiVector<scalar_type, node_type>
    getNonConstView (MV& A)
      // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if
      // storage of A uses nonconstant stride internally.  We would
      // have to copy and pack into a matrix with constant stride, and
      // then unpack on exit.  For now we choose just to raise an
      // exception.
      TEUCHOS_TEST_FOR_EXCEPTION(! A.ConstantStride(), std::invalid_argument,
                                 "TSQR does not currently support Epetra_MultiVector "
                                 "inputs that do not have constant stride.");
      const int numRows = A.MyLength();
      const int numCols = A.NumVectors();
      const int stride  = A.Stride();
      // A_ptr does _not_ own the data.  TSQR only operates within the
      // scope of the multivector objects on which it operates, so it
      // doesn't need ownership of the data.
      Teuchos::ArrayRCP<double> A_ptr (A.Values(), 0, numRows*stride, false);

      typedef KokkosClassic::MultiVector<scalar_type, node_type> KMV;
      // KMV objects want a Kokkos Node instance.  Epetra objects
      // don't have a Kokkos Node, so we make a temporary node just
      // for the KMV.
      // KokkosClassic::SerialNode wants an empty ParameterList.
      Teuchos::ParameterList plist;
      Teuchos::RCP<node_type> node (new node_type (plist));
      KMV A_kmv (node);
      A_kmv.initializeValues (numRows, numCols, A_ptr, stride);
      return A_kmv;
예제 #2
    /// \brief Compute QR factorization [Q,R] = qr(A,0).
    /// \param A [in/out] On input: the multivector to factor.
    ///   Overwritten with garbage on output.
    /// \param Q [out] On output: the (explicitly stored) Q factor in
    ///   the QR factorization of the (input) multivector A.
    /// \param R [out] On output: the R factor in the QR factorization
    ///   of the (input) multivector A.
    /// \param forceNonnegativeDiagonal [in] If true, then (if
    ///   necessary) do extra work (modifying both the Q and R
    ///   factors) in order to force the R factor to have a
    ///   nonnegative diagonal.
    /// \warning Currently, this method only works if A and Q have the
    ///   same communicator and row distribution ("Map," in Petra
    ///   terms) as those of the multivector given to this adapter
    ///   instance's constructor.  Otherwise, the result of this
    ///   method is undefined.
    factorExplicit (MV& A,
                    MV& Q,
                    dense_matrix_type& R,
                    const bool forceNonnegativeDiagonal=false)
        (! A.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "factorExplicit: Input MultiVector A must have constant stride.");
        (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "factorExplicit: Input MultiVector Q must have constant stride.");
      prepareTsqr (Q); // Finish initializing TSQR.

      // FIXME (mfh 16 Jan 2016) Currently, TSQR is a host-only
      // implementation.
      A.template sync<Kokkos::HostSpace> ();
      A.template modify<Kokkos::HostSpace> ();
      Q.template sync<Kokkos::HostSpace> ();
      Q.template modify<Kokkos::HostSpace> ();
      auto A_view = A.template getLocalView<Kokkos::HostSpace> ();
      auto Q_view = Q.template getLocalView<Kokkos::HostSpace> ();
      scalar_type* const A_ptr =
        reinterpret_cast<scalar_type*> (A_view.ptr_on_device ());
      scalar_type* const Q_ptr =
        reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ());
      const bool contiguousCacheBlocks = false;
      tsqr_->factorExplicitRaw (A_view.dimension_0 (),
                                A_view.dimension_1 (),
                                A_ptr, A.getStride (),
                                Q_ptr, Q.getStride (),
                                R.values (), R.stride (),
예제 #3
    /// \brief Rank-revealing decomposition
    /// Using the R factor and explicit Q factor from
    /// factorExplicit(), compute the singular value decomposition
    /// (SVD) of R: \f$R = U \Sigma V^*\f$.  If R is full rank (with
    /// respect to the given relative tolerance \c tol), do not modify
    /// Q or R.  Otherwise, compute \f$Q := Q \cdot U\f$ and \f$R :=
    /// \Sigma V^*\f$ in place.  If R was modified, then it may not
    /// necessarily be upper triangular on output.
    /// \param Q [in/out] On input: explicit Q factor computed by
    ///   factorExplicit().  (Must be an orthogonal resp. unitary
    ///   matrix.)  On output: If R is of full numerical rank with
    ///   respect to the tolerance tol, Q is unmodified.  Otherwise, Q
    ///   is updated so that the first \c rank columns of Q are a
    ///   basis for the column space of A (the original matrix whose
    ///   QR factorization was computed by factorExplicit()).  The
    ///   remaining columns of Q are a basis for the null space of A.
    /// \param R [in/out] On input: N by N upper triangular matrix
    ///   with leading dimension LDR >= N.  On output: if input is
    ///   full rank, R is unchanged on output.  Otherwise, if \f$R = U
    ///   \Sigma V^*\f$ is the SVD of R, on output R is overwritten
    ///   with \f$\Sigma \cdot V^*\f$.  This is also an N by N matrix,
    ///   but it may not necessarily be upper triangular.
    /// \param tol [in] Relative tolerance for computing the numerical
    ///   rank of the matrix R.
    /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq N\f$.
    revealRank (MV& Q,
                dense_matrix_type& R,
                const magnitude_type& tol)
        (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::"
         "revealRank: Input MultiVector Q must have constant stride.");
      prepareTsqr (Q); // Finish initializing TSQR.
      // FIXME (mfh 18 Oct 2010) Check Teuchos::Comm<int> object in Q
      // to make sure it is the same communicator as the one we are
      // using in our dist_tsqr_type implementation.

      Q.template sync<Kokkos::HostSpace> ();
      Q.template modify<Kokkos::HostSpace> ();
      auto Q_view = Q.template getLocalView<Kokkos::HostSpace> ();
      scalar_type* const Q_ptr =
        reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ());
      const bool contiguousCacheBlocks = false;
      return tsqr_->revealRankRaw (Q_view.dimension_0 (),
                                   Q_view.dimension_1 (),
                                   Q_ptr, Q.getStride (),
                                   R.values (), R.stride (),
                                   tol, contiguousCacheBlocks);
localApply (const MV& X,
            MV& Y,
            const Teuchos::ETransp mode,
            const scalar_type& alpha,
            const scalar_type& beta) const
  using Teuchos::RCP;
  typedef scalar_type ST;
  typedef Teuchos::ScalarTraits<ST> STS;

  if (beta == STS::zero ()) {
    if (alpha == STS::zero ()) {
      Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
    else { // alpha != 0
      A_crs_->template localSolve<ST, ST> (X, Y, mode);
      if (alpha != STS::one ()) {
        Y.scale (alpha);
  else { // beta != 0
    if (alpha == STS::zero ()) {
      Y.scale (beta); // Y := beta * Y
    else { // alpha != 0
      MV Y_tmp (Y, Teuchos::Copy);
      A_crs_->template localSolve<ST, ST> (X, Y_tmp, mode); // Y_tmp := M * X
      Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
예제 #5
 /// \brief Extract A's underlying KokkosClassic::MultiVector instance.
 /// TSQR represents the local (to each MPI process) part of a
 /// multivector as a KokkosClassic::MultiVector (KMV), which gives a
 /// nonconstant view of the original multivector's data.  This
 /// class method tells TSQR how to get the KMV from the input
 /// multivector.  The KMV is not a persistent view of the data;
 /// its scope is contained within the scope of the multivector.
 /// \warning TSQR does not currently support multivectors with
 ///   nonconstant stride.  If A has nonconstant stride, this
 ///   method will throw an exception.
 static KokkosClassic::MultiVector<scalar_type, node_type>
 getNonConstView (MV& A)
   // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if
   // storage of A uses nonconstant stride internally.  We would
   // have to copy and pack into a matrix with constant stride, and
   // then unpack on exit.  For now we choose just to raise an
   // exception.
   TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument,
                              "TSQR does not currently support Tpetra::MultiVector "
                              "inputs that do not have constant stride.");
   return A.getLocalMVNonConst();
        // Compute \f$\alpha A^\top \text{this}\f$ 
        void innerProducts(const Real alpha,
                           const MV &A,
                           Teuchos::SerialDenseMatrix<int,Real> &B) const {

//            TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A),
//                std::invalid_argument,
//                "Error: MultiVectors must have the same dimensions.");

            for(int i=0;i<A.getNumberOfVectors();++i) {
                for(int j=0;j<numVectors_;++j) {
                    B(i,j) = alpha*mvec_[j]->dot(*A.getVector(i));
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const

#ifdef KDEBUG

    Y.elementWiseMultiply(STS::one(), *densityOnDensityMatrix_, X, STS::zero());

        // Set some of the vectors in this MultiVector equal to corresponding 
        // vectors in another MultiVector
        void set(const MV &A, const std::vector<int> &index) {

//            TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A),
//                std::invalid_argument,
//                "Error: MultiVectors must have the same dimensions.");

            int n = index.size();
            for(int i=0;i<n;++i) {
                int k = index[i];
                if(k<numVectors_ && i<A.getNumberOfVectors()) { 
예제 #9
  //! Compute rank-revealing decomposition using results of factorExplicit().
  revealRank (MV& Q,
	      dense_matrix_type& R,
	      const magnitude_type& tol)
    return Q.revealRank (R, tol);
예제 #10
int plugin_exec( PluginParam *par )
        MV epg;

	if ( epg.error() == errorCodeNone ) {

		// Stop enigma passing rogue keypress to
		// the EPG on startup

		KeyCatcher kc; showExecHide( &kc );


	return 0;
(const MV& X, MV& Y) const
    // Our algorithm is:
    // Y = D \ X

#ifdef KDEBUG

    Y.elementWiseMultiply(STS::one(), *densityOnDensityInverse_, X, STS::zero());

예제 #12
  //! Compute QR factorization A = QR, using TSQR.
  factorExplicit (MV& A,
		  MV& Q,
		  dense_matrix_type& R,
		  const bool forceNonnegativeDiagonal=false)
    A.factorExplicit (Q, R, forceNonnegativeDiagonal);
예제 #13
파일: ftpcg_util.cpp 프로젝트: zar1/serp2
void dump(const MV& v, const std::string& name) {
  std::cout << name << std::endl;
  Teuchos::ArrayRCP<const Scalar> view = v.get1dView();
  for (Teuchos::ArrayRCP<const Scalar>::iterator it = view.begin(); it != view.end(); ++it) {
    std::cout << *it << std::endl;
  std::cout << std::endl;
        // Set the MultiVector equal to another MultiVector
        void set(const MV &A) {

//            TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A),
//                std::invalid_argument,
//                "Error: MultiVectors must have the same dimensions.");

            for(int i=0;i<numVectors_;++i) {
        // Compute dot products of pairs of vectors
        void dots(const MV &A,
                  std::vector<Real> &b) const {

            TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A),
                "Error: MultiVectors must have the same dimensions.");

            for(int i=0;i<numVectors_;++i) {
                b[i] = mvec_[i]->dot(*A.getVector(i));
예제 #16
    /// \brief Extract A's underlying KokkosClassic::MultiVector instance.
    /// TSQR represents the local (to each MPI process) part of a
    /// multivector as a KokkosClassic::MultiVector (KMV), which gives a
    /// nonconstant view of the original multivector's data.  This
    /// class method tells TSQR how to get the KMV from the input
    /// multivector.  The KMV is not a persistent view of the data;
    /// its scope is contained within the scope of the multivector.
    /// \warning TSQR does not currently support multivectors with
    ///   nonconstant stride.  If A has nonconstant stride, this
    ///   method will throw an exception.
    static KokkosClassic::MultiVector<scalar_type, node_type>
    getNonConstView (MV& A)
        // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if
        // storage of A uses nonconstant stride internally.  We would
        // have to copy and pack into a matrix with constant stride, and
        // then unpack on exit.  For now we choose just to raise an
        // exception.
        TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument,
                                   "TSQR does not currently support Tpetra::MultiVector "
                                   "inputs that do not have constant stride.");

        typedef typename Teuchos::ArrayRCP<mp_scalar_type>::size_type size_type;
        typedef typename MV::dual_view_type view_type;
        typedef typename view_type::t_dev::array_type flat_array_type;

        // Create new Kokkos::MultiVector reinterpreting the data as a longer
        // array of the base scalar type

        // Create new ArrayRCP holding data
        view_type pce_mv = A.getDualView();
        flat_array_type flat_mv = pce_mv.d_view;
        const size_t num_rows = flat_mv.dimension_0();
        const size_t num_cols = flat_mv.dimension_1();
        const size_t size = num_rows * num_cols;
        ArrayRCP<scalar_type> vals =
            Teuchos::arcp(flat_mv.ptr_on_device(), size_type(0), size, false);

        // Create new MultiVector
        // Owing to the above comment, we don't need to worry about
        // non-constant stride
        size_t strides[2];
        const size_t stride = strides[0];
        KokkosClassic::MultiVector<scalar_type, node_type> mv(A.getMap()->getNode());
        mv.initializeValues(num_rows, num_cols, vals, stride);

        return mv;
예제 #17
    /// \brief Finish internode TSQR initialization.
    /// \param mv [in] A valid Tpetra::MultiVector instance whose
    ///   communicator wrapper we will use to prepare TSQR.
    /// \note It's OK to call this method more than once; it is idempotent.
    prepareDistTsqr (const MV& mv)
        using Teuchos::RCP;
        using Teuchos::rcp_implicit_cast;
        typedef TSQR::TeuchosMessenger<scalar_type> mess_type;
        typedef TSQR::MessengerBase<scalar_type> base_mess_type;

        RCP<const Teuchos::Comm<int> > comm = mv.getMap()->getComm();
        RCP<mess_type> mess (new mess_type (comm));
        RCP<base_mess_type> messBase = rcp_implicit_cast<base_mess_type> (mess);
        distTsqr_->init (messBase);
        // Generic BLAS level 3 matrix multiplication
        // \f$\text{this}\leftarrow \alpha A B+\beta\text{this}\f$   
        void gemm(const Real alpha,
                  const MV& A,
                  const Teuchos::SerialDenseMatrix<int,Real> &B,
                  const Real beta) {

           // Scale this by beta

            for(int i=0;i<B.numRows();++i) {
                for(int j=0;j<B.numCols();++j) {
예제 #19
    /// \brief Finish internode TSQR initialization.
    /// \param mv [in] A multivector, from which to extract the
    ///   Epetra_Comm communicator wrapper to use to initialize TSQR.
    /// \note It's OK to call this method more than once; it is idempotent.
    prepareDistTsqr (const MV& mv)
      using Teuchos::RCP;
      using Teuchos::rcp;
      using TSQR::Epetra::makeTsqrMessenger;
      typedef TSQR::MessengerBase<scalar_type> base_mess_type;

      // If mv falls out of scope, its Epetra_Comm may become invalid.
      // Thus, we clone the input Epetra_Comm, so that the messenger
      // owns the object.
      RCP<const Epetra_Comm> comm = rcp (mv.Comm().Clone());
      RCP<base_mess_type> messBase = makeTsqrMessenger<scalar_type> (comm);
      distTsqr_->init (messBase);
예제 #20
(const MV& X, MV& Y) const

#ifdef KDEBUG

    Scalar ONE = STS::one();
    Scalar ZERO = STS::zero();

    size_t NumVectors = Y.getNumVectors();
    size_t numMyElements = ownedMap_->getNodeNumElements();
    RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors));

    Y=X; // We can safely do this

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0);

    curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal

    // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1)
    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++)
        // Update views of Y and diagonal blocks
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);

        diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0);

        matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block
        curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X)
        curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal
} //end applyInverse
예제 #21
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const

#ifdef KDEBUG

    size_t numMyElements = ownedMap_->getNodeNumElements();

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) {
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);
        matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X

    Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution

} //end Apply
예제 #22
void thread1(MV & x, T & result)
	MV::Version v = x.get();
	result = *v;
예제 #23
  // Compute Y := alpha Op X + beta Y.
  // We ignore the cases alpha != 1 and beta != 0 for simplicity.
  apply (const MV& X,
         MV& Y,
         Teuchos::ETransp mode = Teuchos::NO_TRANS,
         scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one (),
         scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero ()) const
    using Teuchos::RCP;
    using Teuchos::rcp;
    using std::cout;
    using std::endl;
    typedef Teuchos::ScalarTraits<scalar_type> STS;

    RCP<const Teuchos::Comm<int> > comm = opMap_->getComm ();
    const int myRank = comm->getRank ();
    const int numProcs = comm->getSize ();

    if (myRank == 0) {
      cout << "MyOp::apply" << endl;

    // We're writing the Operator subclass, so we are responsible for
    // error handling.  You can decide how much error checking you
    // want to do.  Just remember that checking things like Map
    // sameness or compatibility are expensive.
      X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
      "X and Y do not have the same numbers of vectors (columns).");

    // Let's make sure alpha is 1 and beta is 0...
    // This will throw an exception if that is not the case.
      alpha != STS::one() || beta != STS::zero(), std::logic_error,
      "MyOp::apply was given alpha != 1 or beta != 0. "
      "These cases are not implemented.");

    // Get the number of vectors (columns) in X (and Y).
    const size_t numVecs = X.getNumVectors ();

    // Make a temporary multivector for holding the redistributed
    // data.  You could also create this in the constructor and reuse
    // it across different apply() calls, but you would need to be
    // careful to reallocate if it has a different number of vectors
    // than X.  The number of vectors in X can vary across different
    // apply() calls.
    RCP<MV> redistData = rcp (new MV (redistMap_, numVecs));

    // Redistribute the data.
    // This will do all the necessary communication for you.
    // All processes now own enough data to do the matvec.
    redistData->doImport (X, *importer_, Tpetra::INSERT);

    // Get the number of local rows in X, on the calling process.
    const local_ordinal_type nlocRows =
      static_cast<local_ordinal_type> (X.getLocalLength ());

    // Perform the matvec with the data we now locally own.
    // For each column...
    for (size_t c = 0; c < numVecs; ++c) {
      // Get a view of the desired column
      Teuchos::ArrayRCP<scalar_type> colView = redistData->getDataNonConst (c);

      local_ordinal_type offset;
      // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices)
      if (myRank > 0) {
        Y.replaceLocalValue (0, c, -colView[0] + 2*colView[1] - colView[2]);
        offset = 0;
      // Y[0,c] = 2*colView[1] - colView[2] (using local indices)
      else {
        Y.replaceLocalValue (0, c, 2*colView[0] - colView[1]);
        offset = 1;

      // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]
      for (local_ordinal_type r = 1; r < nlocRows - 1; ++r) {
        const scalar_type newVal =
          -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset];
        Y.replaceLocalValue (r, c, newVal);
      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
      //                   - colView[nlocRows+1-offset]
      if (myRank < numProcs - 1) {
        const scalar_type newVal =
          -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
          - colView[nlocRows+1-offset];
        Y.replaceLocalValue (nlocRows-1, c, newVal);
      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
      else {
        const scalar_type newVal =
          -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset];
        Y.replaceLocalValue (nlocRows-1, c, newVal);
예제 #24
  /// \brief Solve AX=B for X with Chebyshev iteration with left
  ///   diagonal scaling, imitating ML's implementation.
  /// \pre A must be real-valued and symmetric positive definite.
  /// \pre numIters >= 0
  /// \pre eigRatio >= 1
  /// \pre 0 < lambdaMax
  /// \pre All entries of D_inv are positive.
  /// \param A [in] The matrix A in the linear system to solve.
  /// \param B [in] Right-hand side(s) in the linear system to solve.
  /// \param X [in] Initial guess(es) for the linear system to solve.
  /// \param numIters [in] Number of Chebyshev iterations.
  /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A.
  /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A.  We
  ///   only use this to determine if A is the identity matrix.
  /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of
          ///   D_inv*A.  We use this along with lambdaMax to compute the
          ///   Chebyshev coefficients.  This need not be the same as
          ///   lambdaMax/lambdaMin.
          /// \param D_inv [in] Vector of diagonal entries of A.  It must have
          ///   the same distribution as b.
          mlApplyImpl (const MAT& A,
                   const MV& B,
                   MV& X,
                   const int numIters,
                   const ST lambdaMax,
                   const ST lambdaMin,
                   const ST eigRatio,
                   const V& D_inv)
            const ST zero = Teuchos::as<ST> (0);
            const ST one = Teuchos::as<ST> (1);
            const ST two = Teuchos::as<ST> (2);

            MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X
            MV dk (B.getMap (), B.getNumVectors ()); // Solution update
            MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux

            ST beta = Teuchos::as<ST> (1.1) * lambdaMax;
            ST alpha = lambdaMax / eigRatio;

            ST delta = (beta - alpha) / two;
            ST theta = (beta + alpha) / two;
            ST s1 = theta / delta;
            ST rhok = one / s1;

            // Diagonal: ML replaces entries containing 0 with 1.  We
            // shouldn't have any entries like that in typical test problems,
            // so it's OK not to do that here.

            // The (scaled) matrix is the identity: set X = D_inv * B.  (If A
            // is the identity, then certainly D_inv is too.  D_inv comes from
            // A, so if D_inv * A is the identity, then we still need to apply
            // the "preconditioner" D_inv to B as well, to get X.)
            if (lambdaMin == one && lambdaMin == lambdaMax) {
              solve (X, D_inv, B);

            // The next bit of code is a direct translation of code from ML's
            // ML_Cheby function, in the "normal point scaling" section, which
            // is in lines 7365-7392 of ml_smoother.c.

            if (! zeroStartingSolution_) {
              // dk = (1/theta) * D_inv * (B - (A*X))
              A.apply (X, pAux); // pAux = A * X
              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R
              X.update (one, dk, one); // X = X + dk
            } else {
              dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B
              X = dk;

            ST rhokp1, dtemp1, dtemp2;
            for (int k = 0; k < numIters-1; ++k) {
              A.apply (X, pAux);
              rhokp1 = one / (two*s1 - rhok);
              dtemp1 = rhokp1*rhok;
              dtemp2 = two*rhokp1/delta;
              rhok = rhokp1;

              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux)
              dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1);
              X.update (one, dk, one); // X = X + dk
예제 #25
    //! Do the transpose or conjugate transpose solve.
    void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const
      typedef Teuchos::ScalarTraits<Scalar> ST;
      using Teuchos::null;

        (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error,
         "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor "
         "CONJ_TRANS.  Should never get here!  Please report this bug to the "
         "Tpetra developers.");

      const size_t numVectors = X_in.getNumVectors();
      Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer =
        matrix_->getGraph ()->getImporter ();
      Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
        matrix_->getGraph ()->getExporter ();
      Teuchos::RCP<const MV> X;

      // it is okay if X and Y reference the same data, because we can
      // perform a triangular solve in-situ.  however, we require that
      // column access to each is strided.

      // set up import/export temporary multivectors
      if (importer != null) {
        if (importMV_ != null && importMV_->getNumVectors() != numVectors) {
          importMV_ = null;
        if (importMV_ == null) {
          importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
      if (exporter != null) {
        if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) {
          exportMV_ = null;
        if (exportMV_ == null) {
          exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );

      // solve(TRANS): DomainMap -> RangeMap
      // lclMatSolve_(TRANS): ColMap -> RowMap
      // importer: DomainMap -> ColMap
      // exporter: RowMap -> RangeMap
      // solve = importer o   lclMatSolve_  o  exporter
      //         Domainmap -> ColMap     ->      RowMap -> RangeMap
      // If we have a non-trivial importer, we must import elements that
      // are permuted or are on other processes.
      if (importer != null) {
        X = importMV_;
      else if (X_in.isConstantStride() == false) {
        // cannot handle non-constant stride right now
        // generate a copy of X_in
        X = Teuchos::rcp(new MV(X_in));
      else {
        // just temporary, so this non-owning RCP is okay
        X = Teuchos::rcpFromRef (X_in);

      // If we have a non-trivial exporter, we must export elements that
      // are permuted or belong to other processes.  We will compute
      // solution into the to-be-exported MV; get a view.
      if (exporter != null) {
        matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_,
        // Make sure target is zero: necessary because we are adding
        Y_in.doExport(*importMV_, *importer, ADD);
      // otherwise, solve into Y
      else {
        if (Y_in.isConstantStride() == false) {
          // generate a strided copy of Y
          MV Y(Y_in);
          matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS);
          Y_in = Y;
        else {
          matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS);
예제 #26
    //! Do the non-transpose solve.
    void applyNonTranspose (const MV& X_in, MV& Y_in) const
      using Teuchos::NO_TRANS;
      using Teuchos::null;
      typedef Teuchos::ScalarTraits<Scalar> ST;

      // Solve U X = Y  or  L X = Y
      // X belongs to domain map, while Y belongs to range map

      const size_t numVectors = X_in.getNumVectors();
      Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer =
        matrix_->getGraph ()->getImporter ();
      Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
        matrix_->getGraph ()->getExporter ();
      Teuchos::RCP<const MV> X;

      // it is okay if X and Y reference the same data, because we can
      // perform a triangular solve in-situ.  however, we require that
      // column access to each is strided.

      // set up import/export temporary multivectors
      if (importer != null) {
        if (importMV_ != null && importMV_->getNumVectors () != numVectors) {
          importMV_ = null;
        if (importMV_ == null) {
          importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors));
      if (exporter != null) {
        if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) {
          exportMV_ = null;
        if (exportMV_ == null) {
          exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors));

      // solve(NO_TRANS): RangeMap -> DomainMap
      // lclMatSolve_: RowMap -> ColMap
      // importer: DomainMap -> ColMap
      // exporter: RowMap -> RangeMap
      // solve = reverse(exporter)  o   lclMatSolve_  o reverse(importer)
      //         RangeMap   ->    RowMap     ->     ColMap         ->    DomainMap
      // If we have a non-trivial exporter, we must import elements that
      // are permuted or are on other processors
      if (exporter != null) {
        exportMV_->doImport (X_in, *exporter, INSERT);
        X = exportMV_;
      else if (! X_in.isConstantStride ()) {
        // cannot handle non-constant stride right now
        // generate a copy of X_in
        X = Teuchos::rcp (new MV (X_in));
      else {
        // just temporary, so this non-owning RCP is okay
        X = Teuchos::rcpFromRef (X_in);

      // If we have a non-trivial importer, we must export elements that
      // are permuted or belong to other processes.  We will compute
      // solution into the to-be-exported MV.
      if (importer != null) {
        matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS);
        // Make sure target is zero: necessary because we are adding.
        Y_in.putScalar (ST::zero ());
        Y_in.doExport (*importMV_, *importer, ADD);
      // otherwise, solve into Y
      else {
        // can't solve into non-strided multivector
        if (! Y_in.isConstantStride ()) {
          // generate a strided copy of Y
          MV Y (Y_in);
          matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS);
          Tpetra::deep_copy (Y_in, Y);
        else {
          matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS);
예제 #27
 typename MV::size_type num_non_zeros (const MV &mv) {
     return mv.non_zeros ();
예제 #28
applyImpl (const MV& X,
	   MV& Y,
	   Teuchos::ETransp mode,
	   scalar_type alpha,
	   scalar_type beta) const 
  using Teuchos::ArrayRCP;
  using Teuchos::as;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcpFromRef;

  const scalar_type zero = STS::zero();
  const scalar_type one = STS::one();

  // Y = beta*Y + alpha*M*X.

  // If alpha == 0, then we don't need to do Chebyshev at all.
  if (alpha == zero) {
    if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN.
      Y.putScalar (zero);
    else {
      Y.scale (beta);

  // If beta != 0, then we need to keep a copy of the initial value of
  // Y, so that we can add beta*it to the Chebyshev result at the end.
  // Usually this method is called with beta == 0, so we don't have to 
  // worry about caching Y_org.
  RCP<MV> Y_orig;
  if (beta != zero) {
    Y_orig = rcp (new MV (Y));

  // If X and Y point to the same memory location, we need to use a
  // copy of X (X_copy) as the input MV.  Otherwise, just let X_copy
  // point to X.
  // This is hopefully an uncommon use case, so we don't bother to
  // optimize for it by caching X_copy.
  RCP<const MV> X_copy;
  bool copiedInput = false;
  if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
    X_copy = rcp (new MV (X));
    copiedInput = true;
  else {
    X_copy = rcpFromRef (X);
  // If alpha != 1, fold alpha into (a copy of) X.
  // This is an uncommon use case, so we don't bother to optimize for
  // it by caching X_copy.  However, we do check whether we've already
  // copied X above, to avoid a second copy.
  if (alpha != one) {
    RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy);
    if (! copiedInput) {
      X_copy_nonConst = rcp (new MV (X));
      copiedInput = true;
    X_copy_nonConst->scale (alpha);
    X_copy = rcp_const_cast<const MV> (X_copy_nonConst);

  impl_.apply (*X_copy, Y);

  if (beta != zero) {
    Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y
예제 #29
void thread2(MV & x, const char * result)
예제 #30
 /// \brief Finish intranode TSQR initialization.
 /// \note It's OK to call this method more than once; it is idempotent.
 prepareNodeTsqr (const MV& mv)
     node_tsqr_factory_type::prepareNodeTsqr (nodeTsqr_, mv.getMap()->getNode());