/*! \brief This routine takes the Epetra_MultiVector \c x and applies the operator
      to it resulting in the Epetra_MultiVector \c y, which is returned.
      \note It is expected that any problem with applying this operator to \c x will be
      indicated by an std::exception being thrown.
    */
    void Apply ( const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans=NOTRANS ) const {
      TEUCHOS_TEST_FOR_EXCEPTION(trans!=NOTRANS, XpetraOpFailure,
                         "Belos::MueLuTpetraOp::Apply, transpose mode != NOTRANS not supported.");

      Epetra_MultiVector & temp_x = const_cast<Epetra_MultiVector &>(x);

      const Xpetra::EpetraMultiVectorT<GlobalOrdinal,Node> tX(rcpFromRef(temp_x));
      Xpetra::EpetraMultiVectorT<GlobalOrdinal,Node>       tY(rcpFromRef(y));

      //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate().
      tY.putScalar(0.0);

      Op_->apply(tX,tY);
    }
void DefaultBlockedLinearOp<Scalar>::describe(
  Teuchos::FancyOStream &out_arg
  ,const Teuchos::EVerbosityLevel verbLevel
  ) const
{
  typedef Teuchos::ScalarTraits<Scalar> ST;
  using Teuchos::rcpFromRef;
  using Teuchos::FancyOStream;
  using Teuchos::OSTab;
  assertBlockFillIsActive(false);
  RCP<FancyOStream> out = rcpFromRef(out_arg);
  OSTab tab1(out);
  switch(verbLevel) {
    case Teuchos::VERB_DEFAULT:
    case Teuchos::VERB_LOW:
      *out << this->description() << std::endl;
      break;
    case Teuchos::VERB_MEDIUM:
    case Teuchos::VERB_HIGH:
    case Teuchos::VERB_EXTREME:
    {
      *out
        << Teuchos::Describable::description() << "{"
        << "rangeDim=" << this->range()->dim()
        << ",domainDim=" << this->domain()->dim()
        << ",numRowBlocks=" << numRowBlocks_
        << ",numColBlocks=" << numColBlocks_
        << "}\n";
      OSTab tab2(out);
      *out
        << "Constituent LinearOpBase objects for M = [ Op[0,0] ..."
        << " ; ... ; ... Op[numRowBlocks-1,numColBlocks-1] ]:\n";
      tab2.incrTab();
      for( int i = 0; i < numRowBlocks_; ++i ) {
        for( int j = 0; j < numColBlocks_; ++j ) {
          *out << "Op["<<i<<","<<j<<"] = ";
          RCP<const LinearOpBase<Scalar> >
            block_i_j = getBlock(i,j);
          if(block_i_j.get())
            *out << Teuchos::describe(*getBlock(i,j),verbLevel);
          else
            *out << "NULL\n";
        }
      }
      break;
    }
    default:
      TEST_FOR_EXCEPT(true); // Should never get here!
  }
}
Esempio n. 3
0
    /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator
      to it resulting in the Tpetra::MultiVector \c y, which is returned.
      \note It is expected that any problem with applying this operator to \c x will be
      indicated by an std::exception being thrown.
    */
    void Apply ( const Epetra_MultiVector& x, Epetra_MultiVector& y, ETrans trans=NOTRANS ) const {

      TEUCHOS_TEST_FOR_EXCEPTION(trans!=NOTRANS, MueLuOpFailure,
                         "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners.");

      Epetra_MultiVector & temp_x = const_cast<Epetra_MultiVector &>(x);

      const Xpetra::EpetraMultiVector tX(rcpFromRef(temp_x));
      Xpetra::EpetraMultiVector       tY(rcpFromRef(y));

      //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate().
      tY.putScalar(0.0);

      Hierarchy_->Iterate( tX, 1, tY , true);

    }
Esempio n. 4
0
  Teuchos::RCP<const Teuchos::ParameterList>
  MinresSolMgr<ScalarType, MV, OP>::defaultParameters()
  {
    using Teuchos::ParameterList;
    using Teuchos::parameterList;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcpFromRef;
    using Teuchos::EnhancedNumberValidator;
    typedef MagnitudeType MT;
    typedef Teuchos::ScalarTraits<MT> MST;

    // List of parameters accepted by MINRES, and their default values.
    RCP<ParameterList> pl = parameterList ("MINRES");

    pl->set ("Convergence Tolerance", MST::squareroot (MST::eps()),
	     "Relative residual tolerance that needs to be achieved by "
	     "the iterative solver, in order for the linear system to be "
	     "declared converged.",
	     rcp (new EnhancedNumberValidator<MT> (MST::zero(), MST::rmax())));
    pl->set ("Maximum Iterations", static_cast<int>(1000),
	     "Maximum number of iterations allowed for each right-hand "
	     "side solved.",
	     rcp (new EnhancedNumberValidator<int> (0, INT_MAX)));
    pl->set ("Block Size", static_cast<int>(1),
	     "Number of vectors in each block.  WARNING: The current "
	     "implementation of MINRES only accepts a block size of 1, "
	     "since it can only solve for 1 right-hand side at a time.",
	     rcp (new EnhancedNumberValidator<int> (1, 1)));
    pl->set ("Verbosity", (int) Belos::Errors,
	     "The type(s) of solver information that should "
	     "be written to the output stream.");
    pl->set ("Output Style", (int) Belos::General,
	     "What style is used for the solver information written "
	     "to the output stream.");
    pl->set ("Output Frequency", static_cast<int>(-1),
	     "How often (in terms of number of iterations) intermediate "
	     "convergence information should be written to the output stream."
	     "  -1 means never.");
    pl->set ("Output Stream", rcpFromRef(std::cout),
	     "A reference-counted pointer to the output stream where all "
	     "solver output is sent.  The output stream defaults to stdout.");
    pl->set ("Timer Label", std::string("Belos"),
	     "The string to use as a prefix for the timer labels.");
    return pl;
  }
  void
  DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::
  describe (Teuchos::FancyOStream &out,
            const Teuchos::EVerbosityLevel verbLevel) const
  {
    using Teuchos::rcpFromRef;
    using std::endl;

    const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
      Teuchos::VERB_LOW : verbLevel;

    if (vl != Teuchos::VERB_NONE) {
      out << this->description () << endl;
      Teuchos::OSTab tab (rcpFromRef (out));
      out << "Export buffer size (in packets): " << exports_.size() << endl
          << "Import buffer size (in packets): " << imports_.size() << endl
          << "Map over which this object is distributed:" << endl;
      map_->describe (out, vl);
    }
  }
Esempio n. 6
0
    /*! \brief This routine takes the Tpetra::MultiVector \c x and applies the operator
      to it resulting in the Tpetra::MultiVector \c y, which is returned.
      \note It is expected that any problem with applying this operator to \c x will be
      indicated by an std::exception being thrown.
    */
    void Apply(const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& x, Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>& y, ETrans trans = NOTRANS ) const {

      TEUCHOS_TEST_FOR_EXCEPTION(trans!=NOTRANS, MueLuOpFailure,
                         "Belos::MueLuOp::Apply, transpose mode != NOTRANS not supported by MueLu preconditionners.");

      //FIXME InitialGuessIsZero currently does nothing in MueLu::Hierarchy.Iterate(), but it matters for AMGX
      y.putScalar(0.0);

#ifdef HAVE_MUELU_AMGX
      if (!AMGX_.is_null())
        AMGX_->apply(x, y);
#endif

      if (!Hierarchy_.is_null()) {
        Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & temp_x = const_cast<Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> &>(x);

        const Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX(rcpFromRef(temp_x));
        Xpetra::TpetraMultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tY(rcpFromRef(y));
        Hierarchy_->Iterate(tX, tY, 1, true);
      }
    }
Esempio n. 7
0
bool tNeumannSeries::test_scaledOp(int verbosity,std::ostream & os)
{
   bool status = false;
   bool allPassed = true;

   // perform actual test
   Thyra::LinearOpTester<double> tester;
   tester.show_all_tests(true);
   std::stringstream ss;
   Teuchos::FancyOStream fos(rcpFromRef(ss),"      |||");

   {
      // build library parameter list
      Teuchos::RCP<Teuchos::ParameterList> pl = buildLibPL(2,"Diagonal");
      if(verbosity>=10) {
         os << "   tNeumannSeries::test_scaledOp :"
            << " printing library parameter list" << std::endl;
         pl->print(os);
      }

      RCP<Teko::InverseLibrary> invLib = Teko::InverseLibrary::buildFromParameterList(*pl);
      RCP<Teko::InverseFactory> neumann = invLib->getInverseFactory("Neumann");
      RCP<Teko::InverseFactory> direct = invLib->getInverseFactory("Amesos");

      Teko::LinearOp op = buildExampleOp(2,*GetComm());
   
      Teko::LinearOp neuInv = Teko::buildInverse(*neumann,op);
      Teko::LinearOp dirInv = Teko::buildInverse(*direct,op);

      const bool result = tester.compare( *neuInv, *dirInv, Teuchos::ptrFromRef(fos) );
      TEST_ASSERT(result,
             std::endl << "   tNeumannSeries::test_scaledOp "
             << ": Comparing factory generated operator to correct operator");
      if(not result || verbosity>=10) 
         os << ss.str(); 
   }
 
   return allPassed;
}
void BlockImplicitLinearOp::applyImpl(
  const Thyra::EOpTransp M_trans,
  const Thyra::MultiVectorBase<double> & x,
  const Teuchos::Ptr<Thyra::MultiVectorBase<double> > & y,
  const double alpha,
  const double beta
  ) const
{
   TEST_FOR_EXCEPTION(M_trans!=Thyra::NOTRANS, std::runtime_error,
     "Linear operators of inherited type BlockImplicitLinearOp "
     "cannot handle conjugation (yet!)");

   // cast source vector
   RCP<const ProductMultiVectorBase<double> > src =
     rcp_dynamic_cast<const ProductMultiVectorBase<double> >(rcpFromRef(x));
   BlockedMultiVector srcX = rcp_const_cast<ProductMultiVectorBase<double> >(src);

   // cast destination vector
   BlockedMultiVector destY =
     rcp_dynamic_cast<ProductMultiVectorBase<double> >(rcpFromPtr(y));

   // call apply
   implicitApply(srcX,destY,alpha,beta);
}
Esempio n. 9
0
  void Export<LocalOrdinal,GlobalOrdinal,Node>::
  print (std::ostream& os) const
  {
    using Teuchos::Comm;
    using Teuchos::getFancyOStream;
    using Teuchos::RCP;
    using Teuchos::rcpFromRef;
    using Teuchos::toString;
    using std::endl;

    RCP<const Comm<int> > comm = getSourceMap ()->getComm ();
    const int myImageID = comm->getRank ();
    const int numImages = comm->getSize ();
    for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
      if (myImageID == imageCtr) {
        os << endl;
        if (myImageID == 0) { // I'm the root node (only output this info once)
          os << "Export Data Members:" << endl;
        }
        os << "Image ID       : " << myImageID << endl;

        os << "permuteFromLIDs: " << toString (getPermuteFromLIDs ()) << endl;
        os << "permuteToLIDs  : " << toString (getPermuteToLIDs ()) << endl;
        os << "remoteLIDs     : " << toString (getRemoteLIDs ()) << endl;
        os << "exportLIDs     : " << toString (getExportLIDs ()) << endl;
        os << "exportPIDs     : " << toString (getExportPIDs ()) << endl;

        os << "numSameIDs     : " << getNumSameIDs () << endl;
        os << "numPermuteIDs  : " << getNumPermuteIDs () << endl;
        os << "numRemoteIDs   : " << getNumRemoteIDs () << endl;
        os << "numExportIDs   : " << getNumExportIDs () << endl;
      }
      // A few global barriers give output a chance to complete.
      comm->barrier();
      comm->barrier();
      comm->barrier();
    }
    if (myImageID == 0) {
      os << endl << endl << "Source Map:" << endl << std::flush;
    }
    comm->barrier();
    os << *getSourceMap();
    comm->barrier();

    if (myImageID == 0) {
      os << endl << endl << "Target Map:" << endl << std::flush;
    }
    comm->barrier();
    os << *getTargetMap();
    comm->barrier();

    // It's also helpful for debugging to print the Distributor
    // object.  Epetra_Export::Print() does this, so we can do a
    // side-by-side comparison.
    if (myImageID == 0) {
      os << endl << endl << "Distributor:" << endl << std::flush;
    }
    comm->barrier();
    getDistributor().describe (*(getFancyOStream (rcpFromRef (os))),
                               Teuchos::VERB_EXTREME);
  }
Esempio n. 10
0
  int
main (int argc, char *argv[])
{
  using Teuchos::inOutArg;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::endl;
  typedef double                          ST;
  typedef Epetra_Operator                 OP;
  typedef Epetra_MultiVector              MV;
  typedef Belos::OperatorTraits<ST,MV,OP> OPT;
  typedef Belos::MultiVecTraits<ST,MV>    MVT;

  // This calls MPI_Init and MPI_Finalize as necessary.
  Belos::Test::MPISession session (inOutArg (argc), inOutArg (argv));
  RCP<const Epetra_Comm> comm = session.getComm ();

  bool success = false;
  bool verbose = false;
  try {
    int MyPID = comm->MyPID ();

    //
    // Parameters to read from command-line processor
    //
    int frequency = -1;  // how often residuals are printed by solver
    int numRHS = 1;  // total number of right-hand sides to solve for
    int maxIters = 13000;  // maximum number of iterations for solver to use
    std::string filename ("bcsstk14.hb");
    double tol = 1.0e-5; // relative residual tolerance

    //
    // Read in command-line arguments
    //
    Teuchos::CommandLineProcessor cmdp (false, true);
    cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and results.");
    cmdp.setOption ("frequency", &frequency, "Solvers frequency for printing "
        "residuals (#iters).");
    cmdp.setOption ("tol", &tol, "Relative residual tolerance used by MINRES "
        "solver.");
    cmdp.setOption ("filename", &filename, "Filename for Harwell-Boeing test "
        "matrix.");
    cmdp.setOption ("num-rhs", &numRHS, "Number of right-hand sides to solve.");
    cmdp.setOption ("max-iters", &maxIters, "Maximum number of iterations per "
        "linear system (-1 means \"adapt to problem/block size\").");
    if (cmdp.parse (argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      return EXIT_FAILURE;
    }
    Teuchos::oblackholestream blackHole;
    std::ostream& verbOut = (verbose && MyPID == 0) ? std::cout : blackHole;

    //
    // Generate the linear system(s) to solve.
    //
    verbOut << "Generating the linear system(s) to solve" << endl << endl;
    RCP<Epetra_CrsMatrix> A;
    RCP<Epetra_MultiVector> B, X;
    RCP<Epetra_Map> rowMap;
    try {
      // This might change the number of right-hand sides, if we read in
      // a right-hand side from the Harwell-Boeing file.
      Belos::Util::createEpetraProblem (filename, &rowMap, &A, &B, &X, &MyPID, numRHS);
    } catch (std::exception& e) {
      TEUCHOS_TEST_FOR_EXCEPTION (true, std::runtime_error,
          "Failed to create Epetra problem for matrix "
          "filename \"" << filename << "\".  "
          "createEpetraProblem() reports the following "
          "error: " << e.what());
    }
    //
    // Compute the initial residual norm of the problem, so we can see
    // by how much it improved after the solve.
    //
    std::vector<double> initialResidualNorms (numRHS);
    std::vector<double> initialResidualInfNorms (numRHS);
    Epetra_MultiVector R (*rowMap, numRHS);
    OPT::Apply (*A, *X, R);
    MVT::MvAddMv (-1.0, R, 1.0, *B, R); // R := -(A*X) + B.
    MVT::MvNorm (R, initialResidualNorms);
    MVT::MvNorm (R, initialResidualInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Initial residual 2-norms:            \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialResidualNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Initial residual Inf-norms:          \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialResidualInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    std::vector<double> rhs2Norms (numRHS);
    std::vector<double> rhsInfNorms (numRHS);
    MVT::MvNorm (*B, rhs2Norms);
    MVT::MvNorm (*B, rhsInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Right-hand side 2-norms:             \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << rhs2Norms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Right-hand side Inf-norms:           \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << rhsInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    std::vector<double> initialGuess2Norms (numRHS);
    std::vector<double> initialGuessInfNorms (numRHS);
    MVT::MvNorm (*X, initialGuess2Norms);
    MVT::MvNorm (*X, initialGuessInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Initial guess 2-norms:               \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialGuess2Norms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Initial guess Inf-norms:             \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialGuessInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }
    //
    // Compute the infinity-norm of A.
    //
    const double normOfA = A->NormInf ();
    verbOut << "||A||_inf:                           \t" << normOfA << endl;
    //
    // Compute ||A|| ||X_i|| + ||B_i|| for each right-hand side B_i.
    //
    std::vector<double> scaleFactors (numRHS);
    for (int i = 0; i < numRHS; ++i) {
      scaleFactors[i] = normOfA * initialGuessInfNorms[i] + rhsInfNorms[i];
    }
    if (verbose) {
      verbOut << "||A||_inf ||X_i||_inf + ||B_i||_inf: \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << scaleFactors[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    //
    // Solve using Belos
    //
    verbOut << endl << "Setting up Belos" << endl;
    const int NumGlobalElements = B->GlobalLength();

    // Set up Belos solver parameters.
    RCP<ParameterList> belosList = parameterList ("MINRES");
    belosList->set ("Maximum Iterations", maxIters);
    belosList->set ("Convergence Tolerance", tol);
    if (verbose) {
      belosList->set ("Verbosity", Belos::Errors + Belos::Warnings +
          Belos::IterationDetails + Belos::OrthoDetails +
          Belos::FinalSummary + Belos::TimingDetails + Belos::Debug);
      belosList->set ("Output Frequency", frequency);
    }
    else {
      belosList->set ("Verbosity", Belos::Errors + Belos::Warnings);
    }
    belosList->set ("Output Stream", rcpFromRef (verbOut));

    // Construct an unpreconditioned linear problem instance.
    typedef Belos::LinearProblem<double,MV,OP> prob_type;
    RCP<prob_type> problem = rcp (new prob_type (A, X, B));
    if (! problem->setProblem()) {
      verbOut << endl << "ERROR:  Failed to set up Belos::LinearProblem!" << endl;
      return EXIT_FAILURE;
    }

    // Create an iterative solver manager.
    Belos::SolverFactory<double, MV, OP> factory;
    RCP<Belos::SolverManager<double,MV,OP> > newSolver =
      factory.create ("MINRES", belosList);
    newSolver->setProblem (problem);

    // Print out information about problem.  Make sure to use the
    // information as stored in the Belos ParameterList, so that we know
    // what the solver will do.
    verbOut << endl
      << "Dimension of matrix: " << NumGlobalElements << endl
      << "Number of right-hand sides: " << numRHS << endl
      << "Max number of MINRES iterations: "
      << belosList->get<int> ("Maximum Iterations") << endl
      << "Relative residual tolerance: "
      << belosList->get<double> ("Convergence Tolerance") << endl
      << "Output frequency: "
      << belosList->get<int> ("Output Frequency") << endl
      << endl;

    // Solve the linear system.
    verbOut << "Solving the linear system" << endl << endl;
    Belos::ReturnType ret = newSolver->solve();
    verbOut << "Belos results:" << endl
      << "- Number of iterations: "
      << newSolver->getNumIters () << endl
      << "- " << (ret == Belos::Converged ? "Converged" : "Not converged")
      << endl;
    //
    // After the solve, compute residual(s) explicitly.  This tests
    // whether the Belos solver did so correctly.
    //
    std::vector<double> absoluteResidualNorms (numRHS);
    OPT::Apply (*A, *X, R);
    MVT::MvAddMv (-1.0, R, 1.0, *B, R);
    MVT::MvNorm (R, absoluteResidualNorms);

    std::vector<double> relativeResidualNorms (numRHS);
    for (int i = 0; i < numRHS; ++i) {
      relativeResidualNorms[i] = (initialResidualNorms[i] == 0.0) ?
        absoluteResidualNorms[i] :
        absoluteResidualNorms[i] / initialResidualNorms[i];
    }

    verbOut << "---------- Computed relative residual norms ----------"
      << endl << endl;
    bool badRes = false;
    if (verbose) {
      for (int i = 0; i < numRHS; ++i) {
        const double actRes = relativeResidualNorms[i];
        verbOut << "Problem " << i << " : \t" << actRes << endl;
        if (actRes > tol) {
          badRes = true;
        }
      }
    }

#   ifdef BELOS_TEUCHOS_TIME_MONITOR
    Teuchos::TimeMonitor::summarize (verbOut);
#   endif // BELOS_TEUCHOS_TIME_MONITOR

    success = (ret == Belos::Converged && !badRes);

    if (success) {
      verbOut << endl << "End Result: TEST PASSED" << endl;
    } else {
      verbOut << endl << "End Result: TEST FAILED" << endl;
    }
  } // try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
} // end test_minres_hb.cpp
Esempio n. 11
0
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using namespace MueLuTests;

#ifdef __GNUC__
#warning Navier2DBlocked_test based tests are disabled on 12/11/2013 due to some thrown exception
#endif
  return EXIT_SUCCESS;


  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
  //
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  out->setOutputToRootOnly(0);
  *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

  // Timing
  Teuchos::Time myTime("global");
  Teuchos::TimeMonitor MM(myTime);

  // read in some command line parameters
  Teuchos::CommandLineProcessor clp(false);

  int rebalanceBlock0 = 1;      clp.setOption("rebalanceBlock0",       &rebalanceBlock0,     "rebalance block 0 (1=yes, else=no)");
  int rebalanceBlock1 = 1;      clp.setOption("rebalanceBlock1",       &rebalanceBlock1,     "rebalance block 1 (1=yes, else=no)");

  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }


#if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
#ifndef HAVE_TEUCHOS_LONG_LONG_INT
  *out << "Warning: scaling test was not compiled with long long int support" << std::endl;


  // custom parameters
  LocalOrdinal maxLevels = 3;

  GlobalOrdinal maxCoarseSize=1; //FIXME clp doesn't like long long int

  int globalNumDofs = 8898;  // used for the maps
  int nDofsPerNode = 3;      // used for generating the fine level null-space

  // build strided maps
  // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
  std::vector<size_t> stridingInfo;
  stridingInfo.push_back(2);
  stridingInfo.push_back(1);

  /////////////////////////////////////// build strided maps
  // build strided maps:
  // xstridedfullmap: full map (velocity and pressure dof gids), continous
  // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
  // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
  Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
  RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
  RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
  RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

  /////////////////////////////////////// transform Xpetra::Map objects to Epetra
  // this is needed for AztecOO
  const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
  RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
  RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

  /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

  // read in problem
  Epetra_CrsMatrix * ptrA = 0;
  Epetra_Vector * ptrf = 0;
  Epetra_MultiVector* ptrNS = 0;

  *out << "Reading matrix market file" << std::endl;

  EpetraExt::MatrixMarketFileToCrsMatrix("A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  EpetraExt::MatrixMarketFileToVector("b5932_re1000.txt",*fullmap,ptrf);
  //EpetraExt::MatrixMarketFileToCrsMatrix("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  //EpetraExt::MatrixMarketFileToVector("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/b5932_re1000.txt",*fullmap,ptrf);

  RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
  RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
  RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


  /////////////////////////////////////// split system into 2x2 block system

  *out << "Split matrix into 2x2 block matrix" << std::endl;

  // split fullA into A11,..., A22
  Teuchos::RCP<Epetra_CrsMatrix> A11;
  Teuchos::RCP<Epetra_CrsMatrix> A12;
  Teuchos::RCP<Epetra_CrsMatrix> A21;
  Teuchos::RCP<Epetra_CrsMatrix> A22;

  if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
    *out << "Problem with splitting matrix"<< std::endl;

  /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

  // build Xpetra objects from Epetra_CrsMatrix objects
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A11));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A12));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A21));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A22));

  /////////////////////////////////////// generate MapExtractor object

  std::vector<Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > > xmaps;
  xmaps.push_back(xstridedvelmap);
  xmaps.push_back(xstridedpremap);

  Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LocalOrdinal,GlobalOrdinal,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LocalOrdinal,GlobalOrdinal>::Build(xstridedfullmap,xmaps);

  /////////////////////////////////////// build blocked transfer operator
  // using the map extractor
  Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal>(map_extractor,map_extractor,10));
  bOp->setMatrix(0,0,xA11);
  bOp->setMatrix(0,1,xA12);
  bOp->setMatrix(1,0,xA21);
  bOp->setMatrix(1,1,xA22);

  bOp->fillComplete();

  //////////////////////////////////////////////////// create Hierarchy
  RCP<Hierarchy> H = rcp ( new Hierarchy() );
  H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  //H->setDefaultVerbLevel(Teuchos::VERB_NONE);
  H->SetMaxCoarseSize(maxCoarseSize);

  //////////////////////////////////////////////////////// finest Level
  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));


  ////////////////////////////////////////// prepare null space for A11
  RCP<MultiVector> nullspace11 = MultiVectorFactory::Build(xstridedvelmap, 2);  // this is a 2D standard null space

  for (int i=0; i<nDofsPerNode-1; ++i) {
    Teuchos::ArrayRCP<Scalar> nsValues = nullspace11->getDataNonConst(i);
    int numBlocks = nsValues.size() / (nDofsPerNode - 1);
    for (int j=0; j< numBlocks; ++j) {
      nsValues[j*(nDofsPerNode - 1) + i] = 1.0;
    }
  }

  Finest->Set("Nullspace1",nullspace11);

  ////////////////////////////////////////// prepare null space for A22
  RCP<MultiVector> nullspace22 = MultiVectorFactory::Build(xstridedpremap, 1);  // this is a 2D standard null space
  Teuchos::ArrayRCP<Scalar> nsValues22 = nullspace22->getDataNonConst(0);
  for (int j=0; j< nsValues22.size(); ++j) {
    nsValues22[j] = 1.0;
  }

  Finest->Set("Nullspace2",nullspace22);


  /////////////////////////////////////////// define rebalanced block AC factory
  // This is the main factory for "A" and defines the input for
  //   - the SubBlockAFactory objects
  //   - the rebalanced block Ac factory
  RCP<RebalanceBlockAcFactory> RebalancedAcFact = rcp(new RebalanceBlockAcFactory());

  /////////////////////////////////////////// define non-rebalanced blocked transfer ops
  RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory()); // use row map index base from bOp
  RCP<GenericRFactory> RFact = rcp(new GenericRFactory());
  RFact->SetFactory("P", PFact);

  // non-rebalanced block coarse matrix factory
  // output is non-rebalanced coarse block matrix Ac
  // used as input for rebalanced block coarse factory RebalancedAcFact
  RCP<Factory> AcFact = rcp(new BlockedRAPFactory());
  AcFact->SetFactory("A", MueLu::NoFactory::getRCP());
  AcFact->SetFactory("P", PFact);  // use non-rebalanced block prolongator as input
  AcFact->SetFactory("R", RFact);  // use non-rebalanced block restrictor as input

  // define matrix sub-blocks of possibly rebalanced block matrix A
  // These are used as input for
  //   - the sub blocks of the transfer operators
  RCP<SubBlockAFactory> A11Fact = Teuchos::rcp(new SubBlockAFactory());
  A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  RCP<SubBlockAFactory> A22Fact = Teuchos::rcp(new SubBlockAFactory());
  A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
  A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

  /////////////////////////////////////////// define rebalancing factories
  // define sub blocks of the coarse non-rebalanced block matrix Ac
  // input is the block operator generated by AcFact
  RCP<SubBlockAFactory> rebA11Fact = Teuchos::rcp(new SubBlockAFactory());
  rebA11Fact->SetFactory("A",AcFact);
  rebA11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  rebA11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  RCP<SubBlockAFactory> rebA22Fact = Teuchos::rcp(new SubBlockAFactory());
  rebA22Fact->SetFactory("A",AcFact);
  rebA22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
  rebA22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

  // define rebalancing factory for coarse block matrix A(1,1)
  RCP<AmalgamationFactory> rebAmalgFact11 = rcp(new AmalgamationFactory());
  rebAmalgFact11->SetFactory("A", rebA11Fact);
  rebAmalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<MueLu::IsorropiaInterface<LO, GO, NO, LMO> > isoInterface1 = rcp(new MueLu::IsorropiaInterface<LO, GO, NO, LMO>());
  isoInterface1->SetFactory("A", rebA11Fact);
  isoInterface1->SetFactory("UnAmalgamationInfo", rebAmalgFact11);

  RCP<MueLu::RepartitionInterface<LO, GO, NO, LMO> > repInterface1 = rcp(new MueLu::RepartitionInterface<LO, GO, NO, LMO>());
  repInterface1->SetFactory("A", rebA11Fact);
  repInterface1->SetFactory("AmalgamatedPartition", isoInterface1);
  repInterface1->SetFactory("UnAmalgamationInfo", rebAmalgFact11);

  // Repartitioning (creates "Importer" from "Partition")
  RCP<Factory> RepartitionFact = rcp(new RepartitionFactory());
  {
    Teuchos::ParameterList paramList;
    paramList.set("minRowsPerProcessor", 200);
    paramList.set("nonzeroImbalance", 1.3);
    if(rebalanceBlock0 == 1)
      paramList.set("startLevel",1);
    else
      paramList.set("startLevel",10); // supress rebalancing
    RepartitionFact->SetParameterList(paramList);
  }
  RepartitionFact->SetFactory("A", rebA11Fact);
  RepartitionFact->SetFactory("Partition", repInterface1);

  // define rebalancing factory for coarse block matrix A(1,1)
  RCP<AmalgamationFactory> rebAmalgFact22 = rcp(new AmalgamationFactory());
  rebAmalgFact22->SetFactory("A", rebA22Fact);
  rebAmalgFact22->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<MueLu::IsorropiaInterface<LO, GO, NO, LMO> > isoInterface2 = rcp(new MueLu::IsorropiaInterface<LO, GO, NO, LMO>());
  isoInterface2->SetFactory("A", rebA22Fact);
  isoInterface2->SetFactory("UnAmalgamationInfo", rebAmalgFact22);

  RCP<MueLu::RepartitionInterface<LO, GO, NO, LMO> > repInterface2 = rcp(new MueLu::RepartitionInterface<LO, GO, NO, LMO>());
  repInterface2->SetFactory("A", rebA22Fact);
  repInterface2->SetFactory("AmalgamatedPartition", isoInterface2);
  repInterface2->SetFactory("UnAmalgamationInfo", rebAmalgFact22);

  // second repartition factory
  RCP<Factory> RepartitionFact2 = rcp(new RepartitionFactory());
  {
    Teuchos::ParameterList paramList;
    paramList.set("minRowsPerProcessor", 100);
    paramList.set("nonzeroImbalance", 1.2);
    if(rebalanceBlock1 == 1)
      paramList.set("startLevel",1);
    else
      paramList.set("startLevel",10); // supress rebalancing
    RepartitionFact2->SetParameterList(paramList);
  }
  RepartitionFact2->SetFactory("A", rebA22Fact);
  RepartitionFact2->SetFactory("Partition", repInterface2); // this is not valid

  ////////////////////////////////////////// build non-rebalanced matrix blocks
  // build factories for transfer operator P(1,1) and R(1,1)
  RCP<AmalgamationFactory> amalgFact11 = rcp(new AmalgamationFactory());
  amalgFact11->SetFactory("A", A11Fact);
  amalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<CoalesceDropFactory> dropFact11 = rcp(new CoalesceDropFactory());
  dropFact11->SetFactory("A", A11Fact);
  dropFact11->SetFactory("UnAmalgamationInfo", amalgFact11);
  dropFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<UncoupledAggregationFactory> UncoupledAggFact11 = rcp(new UncoupledAggregationFactory());
  UncoupledAggFact11->SetFactory("Graph", dropFact11);
  UncoupledAggFact11->SetMinNodesPerAggregate(9);
  UncoupledAggFact11->SetMaxNeighAlreadySelected(2);
  UncoupledAggFact11->SetOrdering(MueLu::AggOptions::NATURAL);

  RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact11->setStridingData(stridingInfo);
  coarseMapFact11->setStridedBlockId(0);

  RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());
  RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

  Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));
  nspFact11->SetFactory("Nullspace1",P11Fact); // pick "Nullspace1" from Finest level

  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> M11 = rcp(new FactoryManager());
  M11->SetFactory("A", A11Fact);  // rebalanced fine-level block operator
  M11->SetFactory("P", P11Fact);  // non-rebalanced transfer operator block P(1,1)
  M11->SetFactory("R", R11Fact);  // non-rebalanced transfer operator block R(1,1)
  M11->SetFactory("Aggregates", UncoupledAggFact11);
  M11->SetFactory("Graph", dropFact11);
  M11->SetFactory("DofsPerNode", dropFact11);
  M11->SetFactory("UnAmalgamationInfo", amalgFact11);
  M11->SetFactory("Nullspace", nspFact11); // TODO check me?
  M11->SetFactory("CoarseMap", coarseMapFact11);
  M11->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

  ////////////////////////////////////////// build non-rebalanced matrix blocks
  // build factories for transfer operator P(2,2) and R(2,2)
  RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory());
  RCP<TentativePFactory> P22Fact = rcp(new TentativePFactory());
  RCP<TransPFactory> R22Fact = rcp(new TransPFactory());

  /*XXX*/
  RCP<CoalesceDropFactory> dropFact22 = rcp(new CoalesceDropFactory());
  dropFact22->SetFactory("A", A22Fact);
  dropFact22->SetFactory("UnAmalgamationInfo", amalgFact22);
  dropFact22->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  /*XXX*/
  RCP<UncoupledAggregationFactory> UncoupledAggFact22 = rcp(new UncoupledAggregationFactory());
  UncoupledAggFact22->SetFactory("Graph", dropFact22);
  UncoupledAggFact22->SetMinNodesPerAggregate(6);
  UncoupledAggFact22->SetMaxNeighAlreadySelected(2);
  UncoupledAggFact22->SetOrdering(MueLu::AggOptions::NATURAL);

  // connect null space and tentative PFactory
  Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2"));
  nspFact22->SetFactory("Nullspace2", P22Fact); // define null space generated by P22Fact as null space for coarse level (non-rebalanced)

  RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact22->setStridingData(stridingInfo);
  coarseMapFact22->setStridedBlockId(1);

  //////////////////////////////// define factory manager for (2,2) block
  RCP<FactoryManager> M22 = rcp(new FactoryManager());
  M22->SetFactory("A", A22Fact); // rebalanced fine-level block operator
  M22->SetFactory("P", P22Fact); // non-rebalanced transfer operator P(2,2)
  M22->SetFactory("R", R22Fact); // non-rebalanced transfer operator R(2,2)
  M22->SetFactory("Aggregates", UncoupledAggFact22 /* UncoupledAggFact11 *//*XXX*/); // aggregates from block (1,1)
  M22->SetFactory("Graph", dropFact22);
  M22->SetFactory("DofsPerNode", dropFact22);
  M22->SetFactory("Nullspace", nspFact22); // TODO check me
  M22->SetFactory("UnAmalgamationInfo", amalgFact22);
  M22->SetFactory("Ptent", P22Fact);
  M22->SetFactory("CoarseMap", coarseMapFact22);
  M22->SetIgnoreUserData(true);

  /////////////////////////////////////////// define rebalanced blocked transfer ops
  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> rebM11 = rcp(new FactoryManager());
  rebM11->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
  //rebM11->SetFactory("P", PFact); // use non-rebalanced block P operator as input
  //rebM11->SetFactory("R", RFact); // use non-rebalanced block R operator as input
  rebM11->SetFactory("Importer", RepartitionFact);
  rebM11->SetFactory("Nullspace", nspFact11);
  //rebM11->SetIgnoreUserData(true);

  RCP<FactoryManager> rebM22 = rcp(new FactoryManager());
  rebM22->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
  rebM22->SetFactory("Importer", RepartitionFact2); // use dummy repartitioning factory
  rebM22->SetFactory("Nullspace", nspFact22);

  // Reordering of the transfer operators
  RCP<RebalanceBlockInterpolationFactory> RebalancedBlockPFact = rcp(new RebalanceBlockInterpolationFactory());
  RebalancedBlockPFact->SetFactory("P", PFact); // use non-rebalanced block P operator as input
  RebalancedBlockPFact->AddFactoryManager(rebM11);
  RebalancedBlockPFact->AddFactoryManager(rebM22);

  RCP<RebalanceBlockRestrictionFactory> RebalancedBlockRFact = rcp(new RebalanceBlockRestrictionFactory());
  //RebalancedBlockRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction")));
  RebalancedBlockRFact->SetFactory("R", RFact); // non-rebalanced block P operator
  RebalancedBlockRFact->AddFactoryManager(rebM11);
  RebalancedBlockRFact->AddFactoryManager(rebM22);

  ///////////////////////////////////////// initialize non-rebalanced block transfer operators
  // output are the non-rebalanced block transfer operators used as input in AcFact to build
  // the non-rebalanced coarse level block matrix Ac
  PFact->AddFactoryManager(M11);  // use non-rebalanced information from sub block factory manager M11
  PFact->AddFactoryManager(M22);  // use non-rebalanced information from sub block factory manager M22

  ///////////////////////////////////////// initialize rebalanced coarse block AC factory
  RebalancedAcFact->SetFactory("A", AcFact);   // use non-rebalanced block operator as input
  RebalancedAcFact->AddFactoryManager(rebM11);
  RebalancedAcFact->AddFactoryManager(rebM22);

  //////////////////////////////////////////////////////////////////////
  // Smoothers

  //Another factory manager for braes sarazin smoother
  //Schur Complement Factory, using the factory to generate AcFact
    SC omega = 1.3;
    RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", Teuchos::ParameterEntry(omega));
    SFact->SetFactory("A", MueLu::NoFactory::getRCP()); // this finally be the rebalanced block operator!

    //Smoother Factory, using SFact as a factory for A
    std::string ifpackSCType;
    Teuchos::ParameterList ifpackSCList;
    ifpackSCList.set("relaxation: sweeps", (LocalOrdinal) 3);
    ifpackSCList.set("relaxation: damping factor", (Scalar) 1.0);
    ifpackSCType = "RELAXATION";
    ifpackSCList.set("relaxation: type", "Gauss-Seidel");
    RCP<SmootherPrototype> smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
    smoProtoSC->SetFactory("A", SFact);
    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    RCP<BraessSarazinSmoother> smootherPrototype     = rcp( new BraessSarazinSmoother() );
    smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
    smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));
    smootherPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
  RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );

  RCP<BraessSarazinSmoother> coarseSolverPrototype = rcp( new BraessSarazinSmoother() );
  coarseSolverPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
  coarseSolverPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));

  coarseSolverPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
  RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );

  RCP<FactoryManager> MB = rcp(new FactoryManager());
  MB->SetFactory("A",     SFact);
  MB->SetFactory("Smoother",    SmooSCFact);
  MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
  smootherPrototype->AddFactoryManager(MB,0);
  coarseSolverPrototype->AddFactoryManager(MB,0);


  ////////////////////////////////////////// define main factory manager
  FactoryManager M;
  M.SetFactory("A",            RebalancedAcFact);     // rebalance block AC Factory using importer
  M.SetFactory("P",            RebalancedBlockPFact); // rebalance prolongator using non-balanced Ac
  M.SetFactory("R",            RebalancedBlockRFact); // rebalance restrictor and null space using non-balanced Ac
  M.SetFactory("Smoother",     smootherFact);
  M.SetFactory("PreSmoother",     smootherFact);
  M.SetFactory("PostSmoother",     smootherFact);
  M.SetFactory("CoarseSolver", coarseSolverFact);

  H->Setup(M,0,maxLevels);

  /**out << std::endl;
  *out << "print content of multigrid levels:" << std::endl;

  Finest->print(*out);

  RCP<Level> coarseLevel = H->GetLevel(1);
  coarseLevel->print(*out);

  RCP<Level> coarseLevel2 = H->GetLevel(2);
  coarseLevel2->print(*out);*/

  RCP<MultiVector> xLsg = MultiVectorFactory::Build(xstridedfullmap,1);

  // Use AMG directly as an iterative method
#if 0
  {
    xLsg->putScalar( (SC) 0.0);

    // Epetra_Vector -> Xpetra::Vector
    RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

    // calculate initial (absolute) residual
    Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
    xRhs->norm2(norms);
    *out << "||x_0|| = " << norms[0] << std::endl;

    // apply ten multigrid iterations
    H->Iterate(*xRhs,100,*xLsg);


    // calculate and print residual
    RCP<MultiVector> xTmp = MultiVectorFactory::Build(xstridedfullmap,1);
    bOp->apply(*xLsg,*xTmp,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);
    xRhs->update((SC)-1.0,*xTmp,(SC)1.0);
    xRhs->norm2(norms);
    *out << "||x|| = " << norms[0] << std::endl;
  }
#endif

  //
  // Solve Ax = b using AMG as a preconditioner in AztecOO
  //
  {
    RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
    X->PutScalar(0.0);
    Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

    AztecOO aztecSolver(epetraProblem);
    aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

    MueLu::EpetraOperator aztecPrec(H);
    aztecSolver.SetPrecOperator(&aztecPrec);

    int maxIts = 50;
    double tol = 1e-8;

    aztecSolver.Iterate(maxIts, tol);
  }

#endif // end ifndef HAVE_LONG_LONG_INT
#endif // #if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
   return EXIT_SUCCESS;
}
int 
main (int argc, char *argv[]) 
{
  using Teuchos::Comm;
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::oblackholestream;
  using Teuchos::OSTab;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcpFromRef;
  using std::cout;
  using std::endl;
  //
  // Typedefs for Tpetra template arguments.
  //
  typedef double scalar_type;
  typedef long int global_ordinal_type;
  typedef int local_ordinal_type;
  typedef Kokkos::DefaultNode::DefaultNodeType node_type;
  //
  // Tpetra objects which are the MV and OP template parameters of the
  // Belos specialization which we are testing.
  //
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
  typedef Tpetra::Operator<scalar_type, local_ordinal_type, global_ordinal_type, node_type> OP;
  // 
  // Other typedefs.
  // 
  typedef Teuchos::ScalarTraits<scalar_type> STS;
  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> sparse_matrix_type;

  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &cout);
  RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
  RCP<node_type> node = Tpetra::DefaultPlatform::getDefaultPlatform().getNode();
  RCP<oblackholestream> blackHole (new oblackholestream);
  const int myRank = comm->getRank();

  // Output stream that prints only on Rank 0.
  RCP<FancyOStream> out;
  if (myRank == 0) {
    out = Teuchos::getFancyOStream (rcpFromRef (cout));
  } else {
    out = Teuchos::getFancyOStream (blackHole);
  }

  //
  // Get test parameters from command-line processor.
  //  
  // CommandLineProcessor always understands int, but may not
  // understand global_ordinal_type.  We convert to the latter below.
  int numRows = comm->getSize() * 100;
  bool tolerant = false;
  bool verbose = false;
  bool debug = false;
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("numRows", &numRows,
		 "Global number of rows (and columns) in the sparse matrix to generate.");
  cmdp.setOption("tolerant", "intolerant", &tolerant,
		 "Whether to parse files tolerantly.");
  cmdp.setOption("verbose", "quiet", &verbose, 
		 "Print messages and results.");
  cmdp.setOption("debug", "release", &debug, 
		 "Run debugging checks and print copious debugging output.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    *out << "\nEnd Result: TEST FAILED" << endl;
    return EXIT_FAILURE;
  }
  // Output stream for verbose output.
  RCP<FancyOStream> verbOut = verbose ? out : getFancyOStream (blackHole);

  const bool success = true;

  // Test whether it's possible to instantiate the solver.
  // This is a minimal compilation test.
  *verbOut << "Instantiating Block GCRODR solver" << endl;
  Belos::BlockGCRODRSolMgr<scalar_type, MV, OP> solver;
  //
  // Test setting solver parameters.  For now, we just use an empty
  // (but non-null) parameter list, which the solver should fill in
  // with defaults.
  //
  *verbOut << "Setting solver parameters" << endl;
  RCP<ParameterList> solverParams = parameterList ();
  solver.setParameters (solverParams);
  //
  // Create a linear system to solve.
  //
  *verbOut << "Creating linear system" << endl;
  RCP<sparse_matrix_type> A;
  RCP<MV> X_guess, X_exact, B;
  {
    typedef Belos::Tpetra::ProblemMaker<sparse_matrix_type> factory_type;
    factory_type factory (comm, node, out, tolerant, debug);
    
    RCP<ParameterList> problemParams = parameterList ();
    problemParams->set ("Global number of rows", 
			static_cast<global_ordinal_type> (numRows));
    problemParams->set ("Problem type", std::string ("Nonsymmetric"));
    factory.makeProblem (A, X_guess, X_exact, B, problemParams);
  }
  // Approximate solution vector is a copy of the guess vector.
  RCP<MV> X (new MV (*X_guess));

  TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), std::logic_error,
			     "The sparse matrix is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X_guess.is_null(), std::logic_error,
			     "The initial guess X_guess is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X_exact.is_null(), std::logic_error,
			     "The exact solution X_exact is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(B.is_null(), std::logic_error,
			     "The right-hand side B is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X.is_null(), std::logic_error,
			     "The approximate solution vector X is null!");

  typedef Belos::LinearProblem<scalar_type, MV, OP> problem_type;
  RCP<problem_type> problem (new problem_type (A, X, B));
  problem->setProblem ();
  solver.setProblem (problem);

  *verbOut << "Solving linear system" << endl;
  Belos::ReturnType result = solver.solve ();

  *verbOut << "Result of solve: " 
	   << Belos::convertReturnTypeToString (result) 
	   << endl;
  if (success) {
    *out << "\nEnd Result: TEST PASSED" << endl;
    return EXIT_SUCCESS;
  } 
  else {
    *out << "\nEnd Result: TEST FAILED" << endl;
    return EXIT_FAILURE;
  }
}
Esempio n. 13
0
Teuchos::RCP<const Teuchos::ParameterList>
LSQRSolMgr<ScalarType,MV,OP,false>::getValidParameters() const
{
    using Teuchos::ParameterList;
    using Teuchos::parameterList;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcpFromRef;
    typedef Teuchos::ScalarTraits<MagnitudeType> STM;

    // Set all the valid parameters and their default values.
    if (is_null (validParams_)) {
        // We use Teuchos::as just in case MagnitudeType doesn't have a
        // constructor that takes an int.  Otherwise, we could just write
        // "MagnitudeType(10)".
        const MagnitudeType ten = Teuchos::as<MagnitudeType> (10);
        const MagnitudeType sqrtEps = STM::squareroot (STM::eps());

        const MagnitudeType lambda = STM::zero();
        RCP<std::ostream> outputStream = rcpFromRef (std::cout);
        const MagnitudeType relRhsErr = ten * sqrtEps;
        const MagnitudeType relMatErr = ten * sqrtEps;
        const MagnitudeType condMax = STM::one() / STM::eps();
        const int maxIters = 1000;
        const int termIterMax = 1;
        const std::string orthoType ("DGKS");
        const MagnitudeType orthoKappa = Teuchos::as<MagnitudeType> (-1.0);
        const int verbosity = Belos::Errors;
        const int outputStyle = Belos::General;
        const int outputFreq = -1;
        const std::string label ("Belos");

        RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
        pl->set("Output Stream", outputStream,
                "is a reference-counted pointer to the output stream receiving\n"
                "all solver output.");
        pl->set("Lambda", lambda, "is the damping parameter.");
        pl->set("Rel RHS Err", relRhsErr,
                "estimates the error in the data defining the right-\n"
                "hand side.");
        pl->set("Rel Mat Err", relMatErr,
                "estimates the error in the data defining the matrix.");
        pl->set("Condition Limit", condMax,
                "bounds the estimated condition number of Abar.");
        pl->set("Maximum Iterations", maxIters,
                "allows at most the maximum number of iterations.");
        pl->set("Term Iter Max", termIterMax,
                "consecutive iterations meeting thresholds are necessary for\n"
                "for convergence.");
        pl->set("Orthogonalization", orthoType,
                "uses orthogonalization of either DGKS, ICGS, IMGS, or TSQR.");
        {
            OrthoManagerFactory<ScalarType, MV, OP> factory;
            pl->set("Orthogonalization", orthoType,
                    "refers to the orthogonalization method to use.  Valid "
                    "options: " + factory.validNamesString());
            RCP<const ParameterList> orthoParams =
                factory.getDefaultParameters (orthoType);
            pl->set ("Orthogonalization Parameters", *orthoParams,
                     "Parameters specific to the type of orthogonalization used.");
        }
        pl->set("Orthogonalization Constant", orthoKappa,
                "is the threshold used by DGKS orthogonalization to determine\n"
                "whether or not to repeat classical Gram-Schmidt.  This parameter\n"
                "is ignored if \"Orthogonalization\" is not \"DGKS\".");
        pl->set("Verbosity", verbosity,
                "type(s) of solver information are outputted to the output\n"
                "stream.");
        pl->set("Output Style", outputStyle,
                "the style used for the solver information outputted to the\n"
                "output stream.");
        pl->set("Output Frequency", outputFreq,
                "is the frequency at which information is written to the\n"
                "output stream.");
        pl->set("Timer Label", label,
                "is the string to use as a prefix for the timer labels.");
        //  pl->set("Restart Timers", restartTimers_);
        pl->set("Block Size", 1, "Block size parameter (currently, this must always be 1).");

        validParams_ = pl;
    }
    return validParams_;
}
void DenseContainer<MatrixType, LocalScalarType>::
applyImpl (const local_mv_type& X,
           local_mv_type& Y,
           Teuchos::ETransp mode,
           LocalScalarType alpha,
           LocalScalarType beta) const
{
    using Teuchos::ArrayRCP;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcpFromRef;

    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != Y.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: X and Y have "
        "incompatible dimensions (" << X.getLocalLength () << " resp. "
        << Y.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        localMap_->getNodeNumElements () != X.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
        "operator and X have incompatible dimensions (" <<
        localMap_->getNodeNumElements () << " resp. "
        << X.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        localMap_->getNodeNumElements () != Y.getLocalLength (),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The inverse "
        "operator and Y have incompatible dimensions (" <<
        localMap_->getNodeNumElements () << " resp. "
        << Y.getLocalLength () << ").  Please report this bug to "
        "the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The input "
        "multivector X has incompatible dimensions from those of the "
        "inverse operator (" << X.getLocalLength () << " vs. "
        << (mode == Teuchos::NO_TRANS ? diagBlock_.numCols () : diagBlock_.numRows ())
        << ").  Please report this bug to the Ifpack2 developers.");
    TEUCHOS_TEST_FOR_EXCEPTION(
        X.getLocalLength () != static_cast<size_t> (diagBlock_.numRows ()),
        std::logic_error, "Ifpack2::DenseContainer::applyImpl: The output "
        "multivector Y has incompatible dimensions from those of the "
        "inverse operator (" << Y.getLocalLength () << " vs. "
        << (mode == Teuchos::NO_TRANS ? diagBlock_.numRows () : diagBlock_.numCols ())
        << ").  Please report this bug to the Ifpack2 developers.");

    typedef Teuchos::ScalarTraits<local_scalar_type> STS;
    const int numVecs = static_cast<int> (X.getNumVectors ());
    if (alpha == STS::zero ()) { // don't need to solve the linear system
        if (beta == STS::zero ()) {
            // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
            // any Inf or NaN values in Y (rather than multiplying them by
            // zero, resulting in NaN values).
            Y.putScalar (STS::zero ());
        }
        else { // beta != 0
            Y.scale (beta);
        }
    }
    else { // alpha != 0; must solve the linear system
        Teuchos::LAPACK<int, local_scalar_type> lapack;
        // If beta is nonzero or Y is not constant stride, we have to use
        // a temporary output multivector.  It gets a (deep) copy of X,
        // since GETRS overwrites its (multi)vector input with its output.
        RCP<local_mv_type> Y_tmp;
        if (beta == STS::zero () ) {
            Tpetra::deep_copy (Y, X);
            Y_tmp = rcpFromRef (Y);
        }
        else {
            Y_tmp = rcp (new local_mv_type (X, Teuchos::Copy));
        }
        const int Y_stride = static_cast<int> (Y_tmp->getStride ());
        ArrayRCP<local_scalar_type> Y_view = Y_tmp->get1dViewNonConst ();
        local_scalar_type* const Y_ptr = Y_view.getRawPtr ();

        int INFO = 0;
        const char trans =
            (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
        lapack.GETRS (trans, diagBlock_.numRows (), numVecs,
                      diagBlock_.values (), diagBlock_.stride (),
                      ipiv_.getRawPtr (), Y_ptr, Y_stride, &INFO);
        TEUCHOS_TEST_FOR_EXCEPTION(
            INFO != 0, std::runtime_error, "Ifpack2::DenseContainer::applyImpl: "
            "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
            "failed with INFO = " << INFO << " != 0.");

        if (beta != STS::zero ()) {
            Y.update (alpha, *Y_tmp, beta);
        }
        else if (! Y.isConstantStride ()) {
            Tpetra::deep_copy (Y, *Y_tmp);
        }
    }
}
int main(int argc, char *argv[]){
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);
  typedef float SCALAR;
  typedef int LO;
  typedef int GO;
  typedef std::complex<SCALAR> cmplx;
  typedef CrsMatrix<cmplx,LO,GO,Node> MAT;
  typedef ScalarTraits<cmplx> ST;
  typedef MultiVector<cmplx,LO,GO,Node> MV;
  typedef ST::magnitudeType Mag;
  typedef ScalarTraits<Mag> MT;
  const size_t numVecs = 1;

  std::ostream &out = std::cout;
  RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out));

  Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  RCP<const Comm<int> > comm = platform.getComm();
  RCP<Node>             node = platform.getNode();

  RCP<MAT> A =
    Tpetra::MatrixMarket::Reader<MAT>::readSparseFile("../test/matrices/amesos2_test_mat3.mtx",comm,node);

  RCP<const Map<LO,GO,Node> > dmnmap = A->getDomainMap();
  RCP<const Map<LO,GO,Node> > rngmap = A->getRangeMap();

  // Create the know-solution vector
  std::map<GO,cmplx> xValues;
  xValues[0] = cmplx(-0.58657, 0.10646);
  xValues[1] = cmplx(0.86716, 0.75421); 
  xValues[2] = cmplx(0.58970, 0.29876);

  std::map<GO,cmplx>::iterator it;
  RCP<MV> X = rcp(new MV(dmnmap, numVecs));
  X->setObjectLabel("X");

  for( it = xValues.begin(); it != xValues.end(); ++it ){
    if( rngmap->isNodeGlobalElement( (*it).first ) ){
      X->replaceGlobalValue( (*it).first, 0, (*it).second );
    }
  }

  RCP<MV> Xhat = rcp(new MV(dmnmap, numVecs));
  RCP<MV> B    = rcp(new MV(rngmap, numVecs));
  X->setObjectLabel("X");
  B->setObjectLabel("B");
  Xhat->setObjectLabel("Xhat");

  A->apply(*X,*B,Teuchos::CONJ_TRANS); // use conjugate-transpose

  Xhat->randomize();

  // Solve A*Xhat = B for Xhat using the Superlu solver
  RCP<Amesos2::Solver<MAT,MV> > solver
    = Amesos2::create<MAT,MV>("SuperLU", A, Xhat, B );

  Teuchos::ParameterList amesos2_params;
  // Setting the following will cause Amesos2 to complain
  amesos2_params.sublist("SuperLU").set("Trans","CONJ","Solve with conjugate-transpose");

  solver->setParameters( rcpFromRef(amesos2_params) );
  solver->symbolicFactorization().numericFactorization().solve();

  B->describe(*fos, Teuchos::VERB_EXTREME);
  Xhat->describe(*fos, Teuchos::VERB_EXTREME);
  X->describe(*fos, Teuchos::VERB_EXTREME);

  // Check result of solve
  Array<Mag> xhatnorms(numVecs), xnorms(numVecs);
  Xhat->norm2(xhatnorms());
  X->norm2(xnorms());
}
SolveStatus<double>
AmesosLinearOpWithSolve::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<double> &B,
  const Ptr<MultiVectorBase<double> > &X,
  const Ptr<const SolveCriteria<double> > solveCriteria
  ) const
{
  using Teuchos::rcpFromPtr;
  using Teuchos::rcpFromRef;
  using Teuchos::OSTab;

  Teuchos::Time totalTimer("");
  totalTimer.start(true);

  TEUCHOS_FUNC_TIME_MONITOR("AmesosLOWS");

  Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if(out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\nSolving block system using Amesos solver "
         << typeName(*amesosSolver_) << " ...\n\n";

  //
  // Get the op(...) range and domain maps
  //
  const EOpTransp amesosOpTransp = real_trans(trans_trans(amesosSolverTransp_,M_trans));
  const Epetra_Operator *amesosOp = epetraLP_->GetOperator();
  const Epetra_Map
    &opRangeMap  = ( amesosOpTransp == NOTRANS
      ? amesosOp->OperatorRangeMap()  : amesosOp->OperatorDomainMap() ),
    &opDomainMap = ( amesosOpTransp == NOTRANS
      ? amesosOp->OperatorDomainMap() : amesosOp->OperatorRangeMap()  );

  //
  // Get Epetra_MultiVector views of B and X
  //
  Teuchos::RCP<const Epetra_MultiVector>
    epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B));
  Teuchos::RCP<Epetra_MultiVector>
    epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X));

  //
  // Set B and X in the linear problem
  //
  epetraLP_->SetLHS(&*epetra_X);
  epetraLP_->SetRHS(const_cast<Epetra_MultiVector*>(&*epetra_B));
  // Above should be okay but cross your fingers!

  //
  // Solve the linear system
  //
  const bool oldUseTranspose = amesosSolver_->UseTranspose();
  amesosSolver_->SetUseTranspose(amesosOpTransp==TRANS);
  const int err = amesosSolver_->Solve();
  TEST_FOR_EXCEPTION( 0!=err, CatastrophicSolveFailure,
    "Error, the function Solve() on the amesos solver of type\n"
    "\'"<<typeName(*amesosSolver_)<<"\' failed with error code "<<err<<"!"
    );
  amesosSolver_->SetUseTranspose(oldUseTranspose);

  //
  // Unset B and X
  //
  epetraLP_->SetLHS(NULL);
  epetraLP_->SetRHS(NULL);
  epetra_X = Teuchos::null;
  epetra_B = Teuchos::null;

  //
  // Scale X if needed
  //
  if(amesosSolverScalar_!=1.0)
    Thyra::scale(1.0/amesosSolverScalar_, X);

  //
  // Set the solve status if requested
  //
  SolveStatus<double> solveStatus;
  solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
  solveStatus.achievedTol = SolveStatus<double>::unknownTolerance();
  solveStatus.message =
    std::string("Solver ")+typeName(*amesosSolver_)+std::string(" converged!");
  
  //
  // Report the overall time
  //
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return solveStatus;

}
Esempio n. 17
0
int 
main (int argc, char *argv[])
{
  using Teuchos::ArrayRCP;
  using Teuchos::ArrayView;
  using Teuchos::Comm;
  using Teuchos::CommandLineProcessor;
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::OSTab;
  using Teuchos::ptr;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::cout;
  using std::endl;

  bool success = true; // May be changed by tests

  Teuchos::oblackholestream blackHole;
  //Teuchos::GlobalMPISession (&argc, &argv, &blackHole);
  MPI_Init (&argc, &argv);

  //
  // Construct communicators, and verify that we are on 4 processors.
  //

  // Construct a Teuchos Comm object.
  RCP<const Comm<int> > teuchosComm = Teuchos::DefaultComm<int>::getComm();
  const int numProcs = teuchosComm->getSize();
  const int pid = teuchosComm->getRank();
  RCP<FancyOStream> pOut = 
    getFancyOStream (rcpFromRef ((pid == 0) ? std::cout : blackHole));
  FancyOStream& out = *pOut;
  // Verify that we are on four processors (which manifests the bug).
  if (teuchosComm->getSize() != 4) {
    out << "This test must be run on four processors.  Exiting ..." << endl;
    return EXIT_FAILURE;
  }

  // We also need an Epetra Comm, so that we can compare Tpetra and
  // Epetra results.
  Epetra_MpiComm epetraComm (MPI_COMM_WORLD);

  //
  // Default values of command-line options.
  //
  bool verbose = false;
  bool printEpetra = false;
  bool printTpetra = false;
  CommandLineProcessor cmdp (false,true);
  //
  // Set command-line options.
  //
  cmdp.setOption ("verbose", "quiet", &verbose, "Print verbose output.");
  // Epetra and Tpetra output will ask the Maps and Import objects to
  // print themselves in distributed, maximally verbose fashion.  It's
  // best to turn on either Epetra or Tpetra, but not both.  Then you
  // can compare their output side by side.
  cmdp.setOption ("printEpetra", "dontPrintEpetra", &printEpetra, 
		  "Print Epetra output (in verbose mode only).");
  cmdp.setOption ("printTpetra", "dontPrintTpetra", &printTpetra, 
		  "Print Tpetra output (in verbose mode only).");
  // Parse command-line options.
  if (cmdp.parse (argc,argv) != CommandLineProcessor::PARSE_SUCCESSFUL) {
    out << "End Result: TEST FAILED" << endl;
    MPI_Finalize ();
    return EXIT_FAILURE;
  }

  if (verbose) {
    out << "Running test on " << numProcs << " process" 
	<< (numProcs != 1 ? "es" : "") << "." << endl;
  }

  // The maps for this problem are derived from a 3D structured mesh.
  // In this example, the dimensions are 4x4x2 and there are 2
  // processors assigned to the first dimension and 2 processors
  // assigned to the second dimension, with no parallel decomposition
  // along the third dimension.  The "owned" arrays represent the
  // one-to-one map, with each array representing a 2x2x2 slice.  If
  // DIMENSIONS == 2, then only the first 4 values will be used,
  // representing a 2x2(x1) slice.
  int owned0[8] = { 0, 1, 4, 5,16,17,20,21};
  int owned1[8] = { 2, 3, 6, 7,18,19,22,23};
  int owned2[8] = { 8, 9,12,13,24,25,28,29};
  int owned3[8] = {10,11,14,15,26,27,30,31};

  // The "overlap" arrays represent the map with communication
  // elements, with each array representing a 3x3x2 slice.  If
  // DIMENSIONS == 2, then only the first 9 values will be used,
  // representing a 3x3(x1) slice.
  int overlap0[18] = {0,1,2,4, 5, 6, 8, 9,10,16,17,18,20,21,22,24,25,26};
  int overlap1[18] = {1,2,3,5, 6, 7, 9,10,11,17,18,19,21,22,23,25,26,27};
  int overlap2[18] = {4,5,6,8, 9,10,12,13,14,20,21,22,24,25,26,28,29,30};
  int overlap3[18] = {5,6,7,9,10,11,13,14,15,21,22,23,25,26,27,29,30,31};

  // Construct the owned and overlap maps for both Epetra and Tpetra.
  int* owned;
  int* overlap;
  if (pid == 0) {
    owned   = owned0;
    overlap = overlap0;
  }
  else if (pid == 1) {
    owned   = owned1;
    overlap = overlap1;
  }
  else if (pid == 2) {
    owned   = owned2;
    overlap = overlap2;
  }
  else {
    owned   = owned3;
    overlap = overlap3;
  }

#if DIMENSIONS == 2
  int ownedSize   = 4;
  int overlapSize = 9;
#elif DIMENSIONS == 3
  int ownedSize   =  8;
  int overlapSize = 18;
#endif

  // Create the two Epetra Maps.  Source for the Import is the owned
  // map; target for the Import is the overlap map.
  Epetra_Map epetraOwnedMap (  -1, ownedSize,   owned,   0, epetraComm);
  Epetra_Map epetraOverlapMap (-1, overlapSize, overlap, 0, epetraComm);

  if (verbose && printEpetra) {
    // Have the Epetra_Map objects describe themselves.
    //
    // Epetra_BlockMap::Print() takes an std::ostream&, and expects
    // all MPI processes to be able to write to it.  (The method
    // handles its own synchronization.)
    out << "Epetra owned map:" << endl;
    epetraOwnedMap.Print (std::cout);
    out << "Epetra overlap map:" << endl;
    epetraOverlapMap.Print (std::cout);
  }

  // Create the two Tpetra Maps.  The "invalid" global element count
  // input tells Tpetra::Map to compute the global number of elements
  // itself.
  const int invalid = Teuchos::OrdinalTraits<int>::invalid();
  RCP<Tpetra::Map<int> > tpetraOwnedMap = 
    rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (owned, ownedSize), 
			       0, teuchosComm));
  tpetraOwnedMap->setObjectLabel ("Owned Map");
  RCP<Tpetra::Map<int> > tpetraOverlapMap =
    rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (overlap, overlapSize),
			       0, teuchosComm));
  tpetraOverlapMap->setObjectLabel ("Overlap Map");

  // In verbose mode, have the Tpetra::Map objects describe themselves.
  if (verbose && printTpetra) {
    Teuchos::EVerbosityLevel verb = Teuchos::VERB_EXTREME;

    // Tpetra::Map::describe() takes a FancyOStream, but expects all
    // MPI processes to be able to write to it.  (The method handles
    // its own synchronization.)
    RCP<FancyOStream> globalOut = getFancyOStream (rcpFromRef (std::cout));
    out << "Tpetra owned map:" << endl;
    {
      OSTab tab (globalOut);
      tpetraOwnedMap->describe (*globalOut, verb);
    }
    out << "Tpetra overlap map:" << endl;
    {
      OSTab tab (globalOut);
      tpetraOverlapMap->describe (*globalOut, verb);
    }
  }

  // Use the owned and overlap maps to construct an importer for both
  // Epetra and Tpetra.
  Epetra_Import       epetraImporter (epetraOverlapMap, epetraOwnedMap  );
  Tpetra::Import<int> tpetraImporter (tpetraOwnedMap  , tpetraOverlapMap);

  // In verbose mode, have the Epetra_Import object describe itself.
  if (verbose && printEpetra) {
    out << "Epetra importer:" << endl;
    // The importer's Print() method takes an std::ostream& and plans
    // to write to it on all MPI processes (handling synchronization
    // itself).
    epetraImporter.Print (std::cout);
    out << endl;
  }

  // In verbose mode, have the Tpetra::Import object describe itself.
  if (verbose && printTpetra) {
    out << "Tpetra importer:" << endl;
    // The importer doesn't implement Teuchos::Describable.  It wants
    // std::cout and plans to write to it on all MPI processes (with
    // its own synchronization).
    tpetraImporter.print (std::cout);
    out << endl;
  }

  // Construct owned and overlap vectors for both Epetra and Tpetra.
  Epetra_Vector epetraOwnedVector   (epetraOwnedMap  );
  Epetra_Vector epetraOverlapVector (epetraOverlapMap);
  Tpetra::Vector<double,int> tpetraOwnedVector   (tpetraOwnedMap  );
  Tpetra::Vector<double,int> tpetraOverlapVector (tpetraOverlapMap);

  // The test is as follows: initialize the owned and overlap vectors
  // with global IDs in the owned regions.  Initialize the overlap
  // vectors to equal -1 in the overlap regions.  Then perform a
  // communication from the owned vectors to the overlap vectors.  The
  // resulting overlap vectors should have global IDs everywhere and
  // all of the -1 values should be overwritten.

  // Initialize.  We cannot assign directly to the Tpetra Vectors;
  // instead, we extract nonconst views and assign to those.  The
  // results aren't guaranteed to be committed to the vector unless
  // the views are released (by assigning Teuchos::null to them).
  epetraOverlapVector.PutScalar(-1);
  tpetraOverlapVector.putScalar(-1);
  ArrayRCP<double> tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
  ArrayRCP<double> tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
  for (int owned_lid = 0; 
       owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
       ++owned_lid) {
    int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
    int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
    epetraOwnedVector[owned_lid]     = gid;
    epetraOverlapVector[overlap_lid] = gid;
    tpetraOwnedArray[owned_lid]      = gid;
    tpetraOverlapArray[overlap_lid]  = gid;
  }
  // Make sure that the changes to the Tpetra Vector were committed,
  // by releasing the nonconst views.
  tpetraOwnedArray = Teuchos::null;
  tpetraOverlapArray = Teuchos::null;

  // Test the Epetra and Tpetra Import.
  if (verbose) {
    out << "Testing Import from owned Map to overlap Map:" << endl << endl;
  }
  epetraOverlapVector.Import(  epetraOwnedVector, epetraImporter, Insert);
  tpetraOverlapVector.doImport(tpetraOwnedVector, tpetraImporter, 
			       Tpetra::INSERT);
  // Check the Import results.
  success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			   epetraOverlapMap, epetraOverlapVector, 
			   tpetraOwnedMap, tpetraOwnedVector, 
			   tpetraOverlapMap, tpetraOverlapVector, verbose);

  const bool testOtherDirections = false;
  if (testOtherDirections) {
    //
    // Reinitialize the Tpetra vectors and test whether Export works.
    //
    tpetraOverlapVector.putScalar(-1);
    tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
    tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
    for (int owned_lid = 0; 
	 owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
	 ++owned_lid) 
      {
	int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
	int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
	tpetraOwnedArray[owned_lid]      = gid;
	tpetraOverlapArray[overlap_lid]  = gid;
      }
    // Make sure that the changes to the Tpetra Vector were committed,
    // by releasing the nonconst views.
    tpetraOwnedArray = Teuchos::null;
    tpetraOverlapArray = Teuchos::null;

    // Make a Tpetra Export object, and test the export.
    Tpetra::Export<int> tpetraExporter1 (tpetraOwnedMap, tpetraOverlapMap);
    if (verbose) {
      out << "Testing Export from owned Map to overlap Map:" << endl << endl;
    }
    tpetraOverlapVector.doExport (tpetraOwnedVector, tpetraExporter1, 
				  Tpetra::INSERT);

    // Check the Export results.
    success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			     epetraOverlapMap, epetraOverlapVector, 
			     tpetraOwnedMap, tpetraOwnedVector, 
			     tpetraOverlapMap, tpetraOverlapVector, verbose);
    //
    // Reinitialize the Tpetra vectors and see what Import in the
    // other direction does.
    //
    tpetraOverlapVector.putScalar(-1);
    tpetraOwnedArray   = tpetraOwnedVector.getDataNonConst(0);
    tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0);
    for (int owned_lid = 0; 
	 owned_lid < tpetraOwnedMap->getNodeElementList().size(); 
	 ++owned_lid) 
      {
	int gid         = tpetraOwnedMap->getGlobalElement(owned_lid);
	int overlap_lid = tpetraOverlapMap->getLocalElement(gid);
	tpetraOwnedArray[owned_lid]      = gid;
	tpetraOverlapArray[overlap_lid]  = gid;
      }
    // Make sure that the changes to the Tpetra Vector were committed,
    // by releasing the nonconst views.
    tpetraOwnedArray = Teuchos::null;
    tpetraOverlapArray = Teuchos::null;

    if (verbose) {
      out << "Testing Import from overlap Map to owned Map:" << endl << endl;
    }
    Tpetra::Import<int> tpetraImporter2 (tpetraOverlapMap, tpetraOwnedMap);
    tpetraOwnedVector.doImport (tpetraOverlapVector, tpetraImporter2, 
				Tpetra::INSERT);
    // Check the Import results.
    success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, 
			     epetraOverlapMap, epetraOverlapVector, 
			     tpetraOwnedMap, tpetraOwnedVector, 
			     tpetraOverlapMap, tpetraOverlapVector, verbose);
  } // if testOtherDirections

  out << "End Result: TEST " << (success ? "PASSED" : "FAILED") << endl;
  MPI_Finalize ();
  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
Esempio n. 18
0
int 
main (int argc, char *argv[]) 
{
  using Teuchos::as;
  using Teuchos::Comm;
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::cerr;
  using std::cout;
  using std::endl;

  typedef double scalar_type;
  typedef int local_ordinal_type;
#if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION) && defined(HAVE_TPETRA_INST_INT_LONG)
  typedef long global_ordinal_type;
#else
  typedef int  global_ordinal_type;
#endif
  typedef Kokkos::SerialNode node_type;

  Teuchos::oblackholestream blackHole;
  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole);
  RCP<const Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  //
  // Read in command line arguments.
  //  
  int unknownsPerNode = 20; // number of unknowns per process
  int unknownsPerElt = 3; // number of unknowns per (overlapping) element
  int numCols = 1;
  bool verbose = false;

  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption ("unknownsPerNode", &unknownsPerNode, 
		  "Number of unknowns per process");
  cmdp.setOption ("unknownsPerElt", &unknownsPerElt, 
		  "Number of unknowns per (overlapping) element.");
  cmdp.setOption ("numCols", &numCols, 
		  "Number of columns in the multivector.  Must be positive.");
  cmdp.setOption ("verbose", "quiet", &verbose, 
		  "Whether to print verbose output.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return EXIT_FAILURE;
  }

  const Teuchos::EVerbosityLevel verbLevel = 
    verbose ? Teuchos::VERB_EXTREME : Teuchos::VERB_DEFAULT;
  RCP<FancyOStream> out = verbose ? getFancyOStream (rcpFromRef (std::cout)) :
    getFancyOStream (rcpFromRef (blackHole));

  RCP<ParameterList> nodeParams = parameterList ("Kokkos Node");
  RCP<node_type> node = makeSerialNode (nodeParams);

  // Run the test.
  bool succeeded = true;
  try {
    Tpetra::Test::testMultiVectorFiller<scalar_type, 
      local_ordinal_type, 
      global_ordinal_type, 
      node_type> (comm, node, as<size_t> (unknownsPerNode),
		  as<global_ordinal_type> (unknownsPerElt), 
		  as<size_t> (numCols), out, verbLevel);
    succeeded = true;
  } catch (std::exception& e) {
    *out << "MultiVectorFiller test threw an exception:  " << e.what() << endl;
    succeeded = false;
  }

  const int localSuccess = succeeded ? 1 : 0;
  int globalSuccess = localSuccess;
  Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, localSuccess, 
		      Teuchos::ptr (&globalSuccess));
  
  if (globalSuccess) {
    std::cout << "End Result: TEST PASSED" << endl;
    return EXIT_SUCCESS;
  }
  else {
    std::cout << "End Result: TEST FAILED" << endl;
    return EXIT_FAILURE;
  }
}
Esempio n. 19
0
void 
Chebyshev<MatrixType>::
applyImpl (const MV& X,
	   MV& Y,
	   Teuchos::ETransp mode,
	   scalar_type alpha,
	   scalar_type beta) const 
{
  using Teuchos::ArrayRCP;
  using Teuchos::as;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcpFromRef;

  const scalar_type zero = STS::zero();
  const scalar_type one = STS::one();

  // Y = beta*Y + alpha*M*X.

  // If alpha == 0, then we don't need to do Chebyshev at all.
  if (alpha == zero) {
    if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN.
      Y.putScalar (zero);
    }
    else {
      Y.scale (beta);
    }
    return;
  }

  // If beta != 0, then we need to keep a copy of the initial value of
  // Y, so that we can add beta*it to the Chebyshev result at the end.
  // Usually this method is called with beta == 0, so we don't have to 
  // worry about caching Y_org.
  RCP<MV> Y_orig;
  if (beta != zero) {
    Y_orig = rcp (new MV (Y));
  }

  // If X and Y point to the same memory location, we need to use a
  // copy of X (X_copy) as the input MV.  Otherwise, just let X_copy
  // point to X.
  //
  // This is hopefully an uncommon use case, so we don't bother to
  // optimize for it by caching X_copy.
  RCP<const MV> X_copy;
  bool copiedInput = false;
  if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
    X_copy = rcp (new MV (X));
    copiedInput = true;
  }
  else {
    X_copy = rcpFromRef (X);
  }
  
  // If alpha != 1, fold alpha into (a copy of) X.
  //
  // This is an uncommon use case, so we don't bother to optimize for
  // it by caching X_copy.  However, we do check whether we've already
  // copied X above, to avoid a second copy.
  if (alpha != one) {
    RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy);
    if (! copiedInput) {
      X_copy_nonConst = rcp (new MV (X));
      copiedInput = true;
    }
    X_copy_nonConst->scale (alpha);
    X_copy = rcp_const_cast<const MV> (X_copy_nonConst);
  }

  impl_.apply (*X_copy, Y);

  if (beta != zero) {
    Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y
  }
}
Esempio n. 20
0
TEUCHOS_UNIT_TEST(tProbingFactory, parameterlist_constr)
{
   // build global (or serial communicator)
   #ifdef HAVE_MPI
      Epetra_MpiComm Comm(MPI_COMM_WORLD);
   #else
      Epetra_SerialComm Comm;
   #endif

   Teko::LinearOp lo = buildSystem(Comm,10);
   
   Teuchos::RCP<Teko::InverseLibrary> invLib = Teko::InverseLibrary::buildFromStratimikos();
   Teuchos::RCP<Teko::InverseFactory> directSolveFactory = invLib->getInverseFactory("Amesos");

   {
      Teuchos::ParameterList pl;
      pl.set("Inverse Type","Amesos");
      pl.set("Probing Graph Operator",lo);
   
      Teuchos::RCP<Teko::ProbingPreconditionerFactory> probeFact
            = rcp(new Teko::ProbingPreconditionerFactory);
      probeFact->initializeFromParameterList(pl);
   
      RCP<Teko::InverseFactory> invFact = Teuchos::rcp(new Teko::PreconditionerInverseFactory(probeFact,Teuchos::null));
   
      Teko::LinearOp probedInverse = Teko::buildInverse(*invFact,lo);
      Teko::LinearOp invLo = Teko::buildInverse(*directSolveFactory,lo);
   
      Thyra::LinearOpTester<double> tester;
      tester.dump_all(true);
      tester.show_all_tests(true);
   
      {
         const bool result = tester.compare( *probedInverse, *invLo, &out);
         if (!result) {
            out << "Apply: FAILURE" << std::endl;
            success = false;
         }
         else
            out << "Apply: SUCCESS" << std::endl;
      }
   }

   {
      Teuchos::RCP<const Epetra_CrsGraph> theGraph
            = rcpFromRef(rcp_dynamic_cast<const Epetra_CrsMatrix>(Thyra::get_Epetra_Operator(*lo))->Graph());

      Teuchos::ParameterList pl;
      pl.set("Inverse Type","Amesos");
      pl.set("Probing Graph",theGraph);
   
      Teuchos::RCP<Teko::ProbingPreconditionerFactory> probeFact
            = rcp(new Teko::ProbingPreconditionerFactory);
      probeFact->initializeFromParameterList(pl);
   
      RCP<Teko::InverseFactory> invFact = Teuchos::rcp(new Teko::PreconditionerInverseFactory(probeFact,Teuchos::null));
   
      Teko::LinearOp probedInverse = Teko::buildInverse(*invFact,lo);
      Teko::LinearOp invLo = Teko::buildInverse(*directSolveFactory,lo);
   
      Thyra::LinearOpTester<double> tester;
      tester.dump_all(true);
      tester.show_all_tests(true);
   
      {
         const bool result = tester.compare( *probedInverse, *invLo, &out);
         if (!result) {
            out << "Apply: FAILURE" << std::endl;
            success = false;
         }
         else
            out << "Apply: SUCCESS" << std::endl;
      }
   }
}
SolveStatus<Scalar>
BelosLinearOpWithSolve<Scalar>::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<Scalar> &B,
  const Ptr<MultiVectorBase<Scalar> > &X,
  const Ptr<const SolveCriteria<Scalar> > solveCriteria
  ) const
{

  THYRA_FUNC_TIME_MONITOR("Stratimikos: BelosLOWS");

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::FancyOStream;
  using Teuchos::OSTab;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::describe;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef typename ST::magnitudeType ScalarMag;
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  assertSolveSupports(*this, M_trans, solveCriteria);
  // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function
  // solve(...).

  const RCP<FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) {
    *out << "\nStarting iterations with Belos:\n";
    OSTab tab2(out);
    *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel);
    *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel);
    *out << "With #Eqns="<<B.range()->dim()<<", #RHSs="<<B.domain()->dim()<<" ...\n";
  }

  //
  // Set RHS and LHS
  //

  bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) );
  TEUCHOS_TEST_FOR_EXCEPTION(
    ret == false, CatastrophicSolveFailure
    ,"Error, the Belos::LinearProblem could not be set for the current solve!"
    );

  //
  // Set the solution criteria
  //

  // Parameter list for the current solve.
  const RCP<ParameterList> tmpPL = Teuchos::parameterList();

  // The solver's valid parameter list.
  RCP<const ParameterList> validPL = iterativeSolver_->getValidParameters();

  SolveMeasureType solveMeasureType;
  RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    const ScalarMag requestedTol = solveCriteria->requestedTol;
    if (solveMeasureType.useDefault()) {
      tmpPL->set("Convergence Tolerance", defaultTol_);
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      setResidualScalingType (tmpPL, validPL, "Norm of RHS");
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      setResidualScalingType (tmpPL, validPL, "Norm of Initial Residual");
    }
    else {
      // Set the most generic (and inefficient) solve criteria
      generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest(
        *solveCriteria, convergenceTestFrequency_);
      // Set the verbosity level (one level down)
      generalSolveCriteriaBelosStatusTest->setOStream(out);
      generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1));
      // Set the default convergence tolerance to always converged to allow
      // the above status test to control things.
      tmpPL->set("Convergence Tolerance", 1.0);
    }
    // maximum iterations
    if (nonnull(solveCriteria->extraParameters)) {
      if (Teuchos::isParameterType<int>(*solveCriteria->extraParameters,"Maximum Iterations")) {
        tmpPL->set("Maximum Iterations", Teuchos::get<int>(*solveCriteria->extraParameters,"Maximum Iterations"));
      }
    }
  }
  else {
    // No solveCriteria was even passed in!
    tmpPL->set("Convergence Tolerance", defaultTol_);
  }

  //
  // Solve the linear system
  //

  Belos::ReturnType belosSolveStatus;
  {
    RCP<std::ostream>
      outUsed =
      ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)
        ? out
        : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream())))
        );
    Teuchos::OSTab tab1(outUsed,1,"BELOS");
    tmpPL->set("Output Stream", outUsed);
    iterativeSolver_->setParameters(tmpPL);
    if (nonnull(generalSolveCriteriaBelosStatusTest)) {
      iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest);
    }
    belosSolveStatus = iterativeSolver_->solve();
  }

  //
  // Report the solve status
  //

  totalTimer.stop();

  SolveStatus<Scalar> solveStatus;

  switch (belosSolveStatus) {
    case Belos::Unconverged: {
      solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED;
      // Set achievedTol even if the solver did not converge.  This is
      // helpful for things like nonlinear solvers, which might be
      // able to use a partially converged result, and which would
      // like to know the achieved convergence tolerance for use in
      // computing bounds.  It's also helpful for estimating whether a
      // small increase in the maximum iteration count might be
      // helpful next time.
      try {
	// Some solvers might not have implemented achievedTol(). 
	// The default implementation throws std::runtime_error.
	solveStatus.achievedTol = iterativeSolver_->achievedTol();
      } catch (std::runtime_error&) {
	// Do nothing; use the default value of achievedTol.
      }
      break;
    }
    case Belos::Converged: {
      solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
      if (nonnull(generalSolveCriteriaBelosStatusTest)) {
	// The user set a custom status test.  This means that we
	// should ask the custom status test itself, rather than the
	// Belos solver, what the final achieved convergence tolerance
	// was.
        const ArrayView<const ScalarMag> achievedTol = 
          generalSolveCriteriaBelosStatusTest->achievedTol();
        solveStatus.achievedTol = ST::zero();
        for (Ordinal i = 0; i < achievedTol.size(); ++i) {
          solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]);
        }
      }
      else {
	try {
	  // Some solvers might not have implemented achievedTol(). 
	  // The default implementation throws std::runtime_error.
	  solveStatus.achievedTol = iterativeSolver_->achievedTol();
	} catch (std::runtime_error&) {
	  // Use the default convergence tolerance.  This is a correct
	  // upper bound, since we did actually converge.
	  solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_);
	}
      }
      break;
    }
    TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT();
  }

  std::ostringstream ossmessage;
  ossmessage
    << "The Belos solver of type \""<<iterativeSolver_->description()
    <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\""
    << " in " << iterativeSolver_->getNumIters() << " iterations"
    << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ;
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\n" << ossmessage.str() << "\n";

  solveStatus.message = ossmessage.str();

  // Dump the getNumIters() and the achieved convergence tolerance
  // into solveStatus.extraParameters, as the "Belos/Iteration Count"
  // resp. "Belos/Achieved Tolerance" parameters.
  if (solveStatus.extraParameters.is_null()) {
    solveStatus.extraParameters = parameterList ();
  }
  solveStatus.extraParameters->set ("Belos/Iteration Count", 
				    iterativeSolver_->getNumIters());\
  // package independent version of the same
  solveStatus.extraParameters->set ("Iteration Count", 
				    iterativeSolver_->getNumIters());\
  // NOTE (mfh 13 Dec 2011) Though the most commonly used Belos
  // solvers do implement achievedTol(), some Belos solvers currently
  // do not.  In the latter case, if the solver did not converge, the
  // reported achievedTol() value may just be the default "invalid"
  // value -1, and if the solver did converge, the reported value will
  // just be the convergence tolerance (a correct upper bound).
  solveStatus.extraParameters->set ("Belos/Achieved Tolerance", 
				    solveStatus.achievedTol);

//  This information is in the previous line, which is printed anytime the verbosity
//  is not set to Teuchos::VERB_NONE, so I'm commenting this out for now.
//  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
//    *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n";
  
  return solveStatus;

}
int
main (int argc, char *argv[])
{
  using namespace TrilinosCouplings; // Yes, this means I'm lazy.

  using TpetraIntrepidPoissonExample::exactResidualNorm;
  using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide;
  using TpetraIntrepidPoissonExample::solveWithBelos;
  using TpetraIntrepidPoissonExample::solveWithBelosGPU;
  using IntrepidPoissonExample::makeMeshInput;
  using IntrepidPoissonExample::parseCommandLineArguments;
  using IntrepidPoissonExample::setCommandLineArgumentDefaults;
  using IntrepidPoissonExample::setMaterialTensorOffDiagonalValue;
  using IntrepidPoissonExample::setUpCommandLineArguments;
  using Tpetra::DefaultPlatform;
  using Teuchos::Comm;
  using Teuchos::outArg;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::getFancyOStream;
  using Teuchos::FancyOStream;
  using std::endl;
  // Pull in typedefs from the example's namespace.
  typedef TpetraIntrepidPoissonExample::ST ST;
#ifdef HAVE_TRILINOSCOUPLINGS_MUELU
  typedef TpetraIntrepidPoissonExample::LO LO;
  typedef TpetraIntrepidPoissonExample::GO GO;
#endif // HAVE_TRILINOSCOUPLINGS_MUELU
  typedef TpetraIntrepidPoissonExample::Node Node;
  typedef Teuchos::ScalarTraits<ST> STS;
  typedef STS::magnitudeType MT;
  typedef Teuchos::ScalarTraits<MT> STM;
  typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type;
  typedef TpetraIntrepidPoissonExample::vector_type vector_type;
  typedef TpetraIntrepidPoissonExample::operator_type operator_type;

  bool success = true;
  try {
    Teuchos::oblackholestream blackHole;
    Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole);
    const int myRank = mpiSession.getRank ();
    //const int numProcs = mpiSession.getNProc ();

    // Get the default communicator and Kokkos Node instance
    RCP<const Comm<int> > comm =
      DefaultPlatform::getDefaultPlatform ().getComm ();
    RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode ();

    // Did the user specify --help at the command line to print help
    // with command-line arguments?
    bool printedHelp = false;
    // Values of command-line arguments.
    int nx, ny, nz;
    std::string xmlInputParamsFile;
    bool verbose, debug;
    int maxNumItersFromCmdLine = -1; // -1 means "read from XML file"
    double tolFromCmdLine = -1.0; // -1 means "read from XML file"
    std::string solverName = "GMRES";
    ST materialTensorOffDiagonalValue = 0.0;

    // Set default values of command-line arguments.
    setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile,
                                    solverName, verbose, debug);
    // Parse and validate command-line arguments.
    Teuchos::CommandLineProcessor cmdp (false, true);
    setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile,
                               solverName, tolFromCmdLine,
                               maxNumItersFromCmdLine,
                               verbose, debug);
    cmdp.setOption ("materialTensorOffDiagonalValue",
                    &materialTensorOffDiagonalValue, "Off-diagonal value in "
                    "the material tensor.  This controls the iteration count.  "
                    "Be careful with this if you use CG, since you can easily "
                    "make the matrix indefinite.");

    // Additional command-line arguments for GPU experimentation.
    bool gpu = false;
    cmdp.setOption ("gpu", "no-gpu", &gpu,
                    "Run example using GPU node (if supported)");
    int ranks_per_node = 1;
    cmdp.setOption ("ranks_per_node", &ranks_per_node,
                    "Number of MPI ranks per node");
    int gpu_ranks_per_node = 1;
    cmdp.setOption ("gpu_ranks_per_node", &gpu_ranks_per_node,
                    "Number of MPI ranks per node for GPUs");
    int device_offset = 0;
    cmdp.setOption ("device_offset", &device_offset,
                    "Offset for attaching MPI ranks to CUDA devices");

    // Additional command-line arguments for dumping the generated
    // matrix or its row Map to output files.
    //
    // FIXME (mfh 09 Apr 2014) Need to port these command-line
    // arguments to the Epetra version.

    // If matrixFilename is nonempty, dump the matrix to that file
    // in MatrixMarket format.
    std::string matrixFilename;
    cmdp.setOption ("matrixFilename", &matrixFilename, "If nonempty, dump the "
                    "generated matrix to that file in MatrixMarket format.");

    // If rowMapFilename is nonempty, dump the matrix's row Map to
    // that file in MatrixMarket format.
    std::string rowMapFilename;
    cmdp.setOption ("rowMapFilename", &rowMapFilename, "If nonempty, dump the "
                    "generated matrix's row Map to that file in a format that "
                    "Tpetra::MatrixMarket::Reader can read.");
    // Option to exit after building A and b (and dumping stuff to
    // files, if requested).
    bool exitAfterAssembly = false;
    cmdp.setOption ("exitAfterAssembly", "dontExitAfterAssembly",
                    &exitAfterAssembly, "If true, exit after building the "
                    "sparse matrix and dense right-hand side vector.  If either"
                    " --matrixFilename or --rowMapFilename are nonempty strings"
                    ", dump the matrix resp. row Map to their respective files "
                    "before exiting.");

    parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz,
                               xmlInputParamsFile, solverName, verbose, debug);
    if (printedHelp) {
      // The user specified --help at the command line to print help
      // with command-line arguments.  We printed help already, so quit
      // with a happy return code.
      return EXIT_SUCCESS;
    }

    setMaterialTensorOffDiagonalValue (materialTensorOffDiagonalValue);

    // Both streams only print on MPI Rank 0.  "out" only prints if the
    // user specified --verbose.
    RCP<FancyOStream> out =
      getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole));
    RCP<FancyOStream> err =
      getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole));

#ifdef HAVE_MPI
    *out << "PARALLEL executable" << endl;
#else
    *out << "SERIAL executable" << endl;
#endif

    /**********************************************************************************/
    /********************************** GET XML INPUTS ********************************/
    /**********************************************************************************/
    ParameterList inputList;
    if (xmlInputParamsFile != "") {
      *out << "Reading parameters from XML file \""
           << xmlInputParamsFile << "\"..." << endl;
      Teuchos::updateParametersFromXmlFile (xmlInputParamsFile,
                                            outArg (inputList));
      if (myRank == 0) {
        inputList.print (*out, 2, true, true);
        *out << endl;
      }
    }

    // Get Pamgen mesh definition string, either from the input
    // ParameterList or from our function that makes a cube and fills in
    // the number of cells along each dimension.
    std::string meshInput = inputList.get("meshInput", "");
    if (meshInput == "") {
      *out << "Generating mesh input string: nx = " << nx
           << ", ny = " << ny
           << ", nz = " << nz << endl;
      meshInput = makeMeshInput (nx, ny, nz);
    }

    // Total application run time
    {
      TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time);

      RCP<sparse_matrix_type> A;
      RCP<vector_type> B, X_exact, X;
      {
        TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly);
        makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput,
                                    out, err, verbose, debug);
      }

      // Optionally dump the matrix and/or its row Map to files.
      {
        typedef Tpetra::MatrixMarket::Writer<sparse_matrix_type> writer_type;
        if (matrixFilename != "") {
          writer_type::writeSparseFile (matrixFilename, A);
        }
        if (rowMapFilename != "") {
          writer_type::writeMapFile (rowMapFilename, * (A->getRowMap ()));
        }
      }

      if (exitAfterAssembly) {
        // Users might still be interested in assembly time.
        Teuchos::TimeMonitor::report (comm.ptr (), std::cout);
        return EXIT_SUCCESS;
      }

      const std::vector<MT> norms = exactResidualNorm (A, B, X_exact);
      // X_exact is the exact solution of the PDE, projected onto the
      // discrete mesh.  It may not necessarily equal the exact solution
      // of the linear system.
      *out << "||B - A*X_exact||_2 = " << norms[0] << endl
           << "||B||_2 = " << norms[1] << endl
           << "||A||_F = " << norms[2] << endl;

      // Setup preconditioner
      std::string prec_type = inputList.get ("Preconditioner", "None");
      RCP<operator_type> M;
      {
        TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec);

        if (prec_type == "MueLu") {
#ifdef HAVE_TRILINOSCOUPLINGS_MUELU
          if (inputList.isSublist("MueLu")) {
            ParameterList mueluParams = inputList.sublist("MueLu");
            M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams);
          } else {
            M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A);
          }
#else // NOT HAVE_TRILINOSCOUPLINGS_MUELU
          TEUCHOS_TEST_FOR_EXCEPTION(
            prec_type == "MueLu", std::runtime_error, "Tpetra scaling example: "
            "In order to precondition with MueLu, you must have built Trilinos "
            "with the MueLu package enabled.");
#endif // HAVE_TRILINOSCOUPLINGS_MUELU
        }
      } // setup preconditioner

      // Get the convergence tolerance for each linear solve.
      // If the user provided a nonnegative value at the command
      // line, it overrides any value in the input ParameterList.
      MT tol = STM::squareroot (STM::eps ()); // default value
      if (tolFromCmdLine < STM::zero ()) {
        tol = inputList.get ("Convergence Tolerance", tol);
      } else {
        tol = tolFromCmdLine;
      }

      // Get the maximum number of iterations for each linear solve.
      // If the user provided a value other than -1 at the command
      // line, it overrides any value in the input ParameterList.
      int maxNumIters = 200; // default value
      if (maxNumItersFromCmdLine == -1) {
        maxNumIters = inputList.get ("Maximum Iterations", maxNumIters);
      } else {
        maxNumIters = maxNumItersFromCmdLine;
      }

      // Get the number of "time steps."  We imitate a time-dependent
      // PDE by doing this many linear solves.
      const int num_steps = inputList.get ("Number of Time Steps", 1);

      // Do the linear solve(s).
      bool converged = false;
      int numItersPerformed = 0;
      if (gpu) {
        TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve);
        solveWithBelosGPU (converged, numItersPerformed, tol, maxNumIters,
                           num_steps, ranks_per_node, gpu_ranks_per_node,
                           device_offset, prec_type, X, A, B, Teuchos::null, M);
      }
      else {
        TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve);
        solveWithBelos (converged, numItersPerformed, solverName, tol,
                        maxNumIters, num_steps, X, A, B, Teuchos::null, M);
      }

      // Compute ||X-X_exact||_2
      const MT norm_x = X_exact->norm2 ();
      X_exact->update (-1.0, *X, 1.0);
      const MT norm_error = X_exact->norm2 ();
      *out << endl
           << "||X - X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x
           << endl;
    } // total time block

    // Summarize timings
    Teuchos::TimeMonitor::report (comm.ptr (), std::cout);
  } // try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);

  if (success) {
    return EXIT_SUCCESS;
  } else {
    return EXIT_FAILURE;
  }
}
Esempio n. 23
0
  static void run(Teuchos::ParameterList &myMachPL, const Teuchos::RCP<const Teuchos::Comm<int> > &comm, const Teuchos::RCP<Node> &node) {
    using std::endl;
  
    ThreadedBlasKiller<Node>::kill();

    typedef double Scalar;
    typedef int LO; //LocalOrdinal
    typedef int GO; //GlobalOrdinal
    typedef Tpetra::MultiVector<Scalar,LO,GO,Node> TMV;
    typedef Tpetra::Operator<Scalar,LO,GO,Node>    TOP;
    typedef Belos::LinearProblem<Scalar,TMV,TOP>   BLP;
    typedef Belos::SolverManager<Scalar,TMV,TOP>   BSM;
    typedef Belos::MultiVecTraits<Scalar,TMV>      BMVT;

    BMVT::mvTimesMatAddMvTimer_ = Teuchos::TimeMonitor::getNewTimer("Belos/Tpetra::MvTimesMatAddMv()");
    BMVT::mvTransMvTimer_ = Teuchos::TimeMonitor::getNewTimer("Belos/Tpetra::MvTransMv()");

    const bool IAmRoot = (comm->getRank() == 0);

    RCP<FancyOStream> fos = fancyOStream(rcpFromRef(std::cout));
    fos->setShowProcRank(true);
    *fos << "Running test with Node == " << Teuchos::typeName(*node) << " on rank " << comm->getRank() << "/" << comm->getSize() << endl;

    int myWeight = myMachPL.get<int>("Node Weight",1);

    // The build_problem function is located in build_problem.hpp.
    // Note that build_problem calls build_precond and sets a preconditioner on the
    // linear-problem, if a preconditioner is specified.
    RCP<BLP> problem = build_problem<Scalar,LO,GO,Node>(BelosTpetraHybridDriverTest::plTestParams, comm, node, myWeight);

    // The build_solver function is located in build_solver.hpp:
    RCP<BSM> solver = build_solver<Scalar,TMV,TOP>(comm, BelosTpetraHybridDriverTest::plTestParams, problem);
    if (IAmRoot) *fos << solver->description() << endl;

    NodeDetails<Node>::clear(node);

    Belos::ReturnType ret = solver->solve();

    if (IAmRoot) {
      *fos << "Converged in " << solver->getNumIters() << " iterations." << endl;
    }

    // RCP<TMV> R = rcp(new TMV(*problem->getRHS()));
    // problem->computeCurrResVec(&*R, &*problem->getLHS(), &*problem->getRHS());
    // Array<ScalarTraits<Scalar>::magnitudeType> norms(R->getNumVectors());
    // R->norm2(norms);
    // if (norms.size() < 1) {
    //   throw std::runtime_error("ERROR: norms.size()==0 indicates R->getNumVectors()==0.");
    // }
    // if (IAmRoot) {
    //   *fos << "2-Norm of 0th residual vec: " << norms[0] << endl;
    // }

    if (IAmRoot) {
      *fos << endl;
    }
    Teuchos::TimeMonitor::summarize(*fos,true,false,false);
    if (IAmRoot) {
      *fos << endl;
    }

    // print node details
    for (int i=0; i<comm->getSize(); ++i) {
      if (comm->getRank() == i) {
        NodeDetails<Node>::printDetails(fos,node.getConst());
      }
      comm->barrier();
    }
  }
SolveStatus<double>
AztecOOLinearOpWithSolve::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<double> &B,
  const Ptr<MultiVectorBase<double> > &X,
  const Ptr<const SolveCriteria<double> > solveCriteria
  ) const
{

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::OSTab;
  typedef SolveCriteria<double> SC;
  typedef SolveStatus<double> SS;

  THYRA_FUNC_TIME_MONITOR("Stratimikos: AztecOOLOWS");
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\nSolving block system using AztecOO ...\n\n";

  //
  // Validate input
  //
  TEUCHOS_ASSERT(this->solveSupportsImpl(M_trans));
  SolveMeasureType solveMeasureType;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    assertSupportsSolveMeasureType(*this, M_trans, solveMeasureType);
  }

  //
  // Get the transpose argument
  //
  const EOpTransp aztecOpTransp = real_trans(M_trans);

  //
  // Get the solver, operator, and preconditioner that we will use
  //
  RCP<AztecOO>
    aztecSolver = ( aztecOpTransp == NOTRANS ? aztecFwdSolver_  : aztecAdjSolver_ );
  const Epetra_Operator
    *aztecOp = aztecSolver->GetUserOperator();

  //
  // Get the op(...) range and domain maps
  //
  const Epetra_Map
    &opRangeMap = aztecOp->OperatorRangeMap(),
    &opDomainMap = aztecOp->OperatorDomainMap();

  //
  // Get the convergence criteria
  //
  double tol = ( aztecOpTransp==NOTRANS ? fwdDefaultTol() : adjDefaultTol() );
  int maxIterations = ( aztecOpTransp==NOTRANS
    ? fwdDefaultMaxIterations() : adjDefaultMaxIterations() );
  bool isDefaultSolveCriteria = true;
  if (nonnull(solveCriteria)) {
    if ( solveCriteria->requestedTol != SC::unspecifiedTolerance() ) {
      tol = solveCriteria->requestedTol;
      isDefaultSolveCriteria = false;
    }
    if (nonnull(solveCriteria->extraParameters)) {
      maxIterations = solveCriteria->extraParameters->get("Maximum Iterations",maxIterations);
    }
  }

  //
  // Get Epetra_MultiVector views of B and X
  //

  RCP<const Epetra_MultiVector> epetra_B;
  RCP<Epetra_MultiVector> epetra_X;

  const EpetraOperatorWrapper* opWrapper =
    dynamic_cast<const EpetraOperatorWrapper*>(aztecOp);

  if (opWrapper == 0) {
    epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B));
    epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X));
  }

  //
  // Use AztecOO to solve each RHS one at a time (which is all that I can do anyway)
  //

  int totalIterations = 0;
  SolveStatus<double> solveStatus;
  solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
  solveStatus.achievedTol = -1.0;

  /* Get the number of columns in the multivector. We use Thyra
   * functions rather than Epetra functions to do this, as we
   * might not yet have created an Epetra multivector. - KL */
  //const int m = epetra_B->NumVectors();
  const int m = B.domain()->dim();

  for( int j = 0; j < m; ++j ) {

    THYRA_FUNC_TIME_MONITOR_DIFF("Stratimikos: AztecOOLOWS:SingleSolve", SingleSolve);

    //
    // Get Epetra_Vector views of B(:,j) and X(:,j)
    // How this is done will depend on whether we have a true Epetra operator
    // or we are wrapping a general Thyra operator in an Epetra operator.
    //

    // We need to declare epetra_x_j as non-const because when we have a phony
    // Epetra operator we'll have to copy a thyra vector into it.
    RCP<Epetra_Vector> epetra_b_j;
    RCP<Epetra_Vector> epetra_x_j;

    if (opWrapper == 0) {
      epetra_b_j = rcpFromRef(*const_cast<Epetra_Vector*>((*epetra_B)(j)));
      epetra_x_j = rcpFromRef(*(*epetra_X)(j));
    }
    else {
      if (is_null(epetra_b_j)) {
        epetra_b_j = rcp(new Epetra_Vector(opRangeMap));
        epetra_x_j = rcp(new Epetra_Vector(opDomainMap));
      }
      opWrapper->copyThyraIntoEpetra(*B.col(j), *epetra_b_j);
      opWrapper->copyThyraIntoEpetra(*X->col(j), *epetra_x_j);
    }

    //
    // Set the RHS and LHS
    //

    aztecSolver->SetRHS(&*epetra_b_j);
    aztecSolver->SetLHS(&*epetra_x_j);

    //
    // Solve the linear system
    //
    timer.start(true);
    {
      SetAztecSolveState
        setAztecSolveState(aztecSolver,out,verbLevel,solveMeasureType);
      aztecSolver->Iterate( maxIterations, tol );
      // NOTE: We ignore the returned status but get it below
    }
    timer.stop();

    //
    // Scale the solution
    // (Originally, this was at the end of the loop after all columns had been
    // processed. It's moved here because we need to do it before copying the
    // solution back into a Thyra vector. - KL
    //
    if (aztecSolverScalar_ != 1.0)
      epetra_x_j->Scale(1.0/aztecSolverScalar_);

    //
    // If necessary, convert the solution back to a non-epetra vector
    //
    if (opWrapper != 0) {
      opWrapper->copyEpetraIntoThyra(*epetra_x_j, X->col(j).ptr());
    }

    //
    // Set the return solve status
    //

    const int iterations = aztecSolver->NumIters();
    const double achievedTol = aztecSolver->ScaledResidual();
    const double *AZ_status = aztecSolver->GetAztecStatus();
    std::ostringstream oss;
    bool converged = false;
    if (AZ_status[AZ_why]==AZ_normal) { oss << "Aztec returned AZ_normal."; converged = true; }
    else if (AZ_status[AZ_why]==AZ_param) oss << "Aztec returned AZ_param.";
    else if (AZ_status[AZ_why]==AZ_breakdown) oss << "Aztec returned AZ_breakdown.";
    else if (AZ_status[AZ_why]==AZ_loss) oss << "Aztec returned AZ_loss.";
    else if (AZ_status[AZ_why]==AZ_ill_cond) oss << "Aztec returned AZ_ill_cond.";
    else if (AZ_status[AZ_why]==AZ_maxits) oss << "Aztec returned AZ_maxits.";
    else oss << "Aztec returned an unknown status?";
    oss << "  Iterations = " << iterations << ".";
    oss << "  Achieved Tolerance = " << achievedTol << ".";
    oss << "  Total time = " << timer.totalElapsedTime() << " sec.";
    if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) && outputEveryRhs())
      Teuchos::OSTab(out).o() << "j="<<j<<": " << oss.str() << "\n";

    solveStatus.achievedTol = TEUCHOS_MAX(solveStatus.achievedTol, achievedTol);
    // Note, achieveTol may actually be greater than tol due to ill conditioning and roundoff!

    totalIterations += iterations;

    solveStatus.message = oss.str();
    if ( isDefaultSolveCriteria ) {
      switch(solveStatus.solveStatus) {
        case SOLVE_STATUS_UNKNOWN:
          // Leave overall unknown!
          break;
        case SOLVE_STATUS_CONVERGED:
          solveStatus.solveStatus = ( converged ? SOLVE_STATUS_CONVERGED : SOLVE_STATUS_UNCONVERGED );
          break;
        case SOLVE_STATUS_UNCONVERGED:
          // Leave overall unconverged!
          break;
        default:
          TEUCHOS_TEST_FOR_EXCEPT(true); // Should never get here!
      }
    }
  }

  aztecSolver->UnsetLHSRHS();

  //
  // Release the Epetra_MultiVector views of X and B
  //
  epetra_X = Teuchos::null;
  epetra_B = Teuchos::null;

  //
  // Update the overall solve criteria
  //
  totalTimer.stop();
  SolveStatus<double> overallSolveStatus;
  if (isDefaultSolveCriteria) {
    overallSolveStatus.solveStatus = SOLVE_STATUS_UNKNOWN;
    overallSolveStatus.achievedTol = SS::unknownTolerance();
  }
  else {
    overallSolveStatus.solveStatus = solveStatus.solveStatus;
    overallSolveStatus.achievedTol = solveStatus.achievedTol;
  }
  std::ostringstream oss;
  oss
    << "AztecOO solver "
    << ( overallSolveStatus.solveStatus==SOLVE_STATUS_CONVERGED ? "converged" : "unconverged" )
    << " on m = "<<m<<" RHSs using " << totalIterations << " cumulative iterations"
    << " for an average of " << (totalIterations/m) << " iterations/RHS and"
    << " total CPU time of "<<totalTimer.totalElapsedTime()<<" sec.";
  overallSolveStatus.message = oss.str();

  // Added these statistics following what was done for Belos
  if (overallSolveStatus.extraParameters.is_null()) {
    overallSolveStatus.extraParameters = Teuchos::parameterList ();
  }
  overallSolveStatus.extraParameters->set ("AztecOO/Iteration Count",
                                            totalIterations);
  // package independent version of the same
  overallSolveStatus.extraParameters->set ("Iteration Count",
                                            totalIterations);
  overallSolveStatus.extraParameters->set ("AztecOO/Achieved Tolerance",
                                            overallSolveStatus.achievedTol);

  //
  // Report the overall time
  //
  if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return overallSolveStatus;

}
int
main (int argc, char *argv[])
{
  using Teuchos::Comm;
  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::endl;
  using std::cout;

  typedef double                           ST;
  typedef Teuchos::ScalarTraits<ST>       STS;
  typedef STS::magnitudeType               MT;
  typedef Tpetra::Operator<ST,int>         OP;
  typedef Tpetra::MultiVector<ST,int>      MV;
  typedef Belos::OperatorTraits<ST,MV,OP> OPT;
  typedef Belos::MultiVecTraits<ST,MV>    MVT;
  typedef MV::node_type             node_type;

  typedef Teuchos::CommandLineProcessor   CLP;



  Teuchos::GlobalMPISession mpisess (&argc, &argv, &cout);

  bool success = false;
  bool verbose = false;
  try {
    RCP<const Comm<int> > comm =
      Tpetra::DefaultPlatform::getDefaultPlatform ().getComm ();
    RCP<node_type> node =
      Tpetra::DefaultPlatform::getDefaultPlatform ().getNode ();

    //
    // Get test parameters from command-line processor
    //
    bool proc_verbose = false;
    bool debug = false;
    int frequency = -1;
    int numrhs = 1;
    int blocksize = 1;
    int maxiters = -1;
    std::string filename ("bcsstk14.hb");
    MT tol = 1.0e-5;

    CLP cmdp (false, true);
    cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and "
                    "results.");
    cmdp.setOption ("debug", "nodebug", &debug, "Run debugging checks.");
    cmdp.setOption ("frequency", &frequency, "Solver's frequency for printing "
                    "residuals (#iters).  -1 means \"never\"; 1 means \"every "
                    "iteration.\"");
    cmdp.setOption ("tol", &tol, "Relative residual tolerance used by solver.");
    cmdp.setOption ("filename", &filename, "Filename for Harwell-Boeing test "
                    "matrix.");
    cmdp.setOption ("num-rhs", &numrhs, "Number of right-hand sides for which "
                    "to solve.");
    cmdp.setOption ("max-iters", &maxiters, "Maximum number of iterations per "
                    "linear system (-1 := adapted to problem/block size).");
    cmdp.setOption ("block-size", &blocksize, "Block size to be used by the "
                    "solver.");
    if (cmdp.parse (argc, argv) != CLP::PARSE_SUCCESSFUL) {
      return -1;
    }
    if (debug) {
      verbose = true;
    }
    if (!verbose) {
      frequency = -1;  // reset frequency if test is not verbose
    }

    const int MyPID = comm->getRank ();
    proc_verbose = verbose && MyPID == 0;

    if (proc_verbose) {
      cout << Belos::Belos_Version () << endl << endl;
    }

    //
    // Get the data from the HB file and build the Map,Matrix
    //
    RCP<Tpetra::CrsMatrix<ST,int> > A;
    Tpetra::Utils::readHBMatrix (filename, comm, node, A);
    RCP<const Tpetra::Map<int> > map = A->getDomainMap ();

    // Create initial vectors
    RCP<MV> B, X;
    X = rcp (new MV (map, numrhs));
    MVT::MvRandom (*X);
    B = rcp (new MV (map, numrhs));
    OPT::Apply (*A, *X, *B);
    MVT::MvInit (*X, STS::zero ());

    //
    // ********Other information used by block solver***********
    // *****************(can be user specified)******************
    //
    const int NumGlobalElements = B->getGlobalLength();
    if (maxiters == -1) {
      maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run
    }
    //
    ParameterList belosList;
    belosList.set( "Block Size", blocksize );              // Blocksize to be used by iterative solver
    belosList.set( "Maximum Iterations", maxiters );       // Maximum number of iterations allowed
    belosList.set( "Convergence Tolerance", tol );         // Relative convergence tolerance requested
    int verbLevel = Belos::Errors + Belos::Warnings;
    if (debug) {
      verbLevel += Belos::Debug;
    }
    if (verbose) {
      verbLevel += Belos::TimingDetails + Belos::FinalSummary + Belos::StatusTestDetails;
    }
    belosList.set( "Verbosity", verbLevel );
    if (verbose) {
      if (frequency > 0) {
        belosList.set( "Output Frequency", frequency );
      }
    }
    //
    // Construct an unpreconditioned linear problem instance.
    //
    Belos::LinearProblem<ST,MV,OP> problem( A, X, B );
    bool set = problem.setProblem ();
    if (! set) {
      if (proc_verbose) {
        cout << endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << endl;
      }
      return EXIT_FAILURE;
    }
    //
    // *******************************************************************
    // *************Start the block CG iteration***********************
    // *******************************************************************
    //
    typedef Belos::PseudoBlockStochasticCGSolMgr<ST,MV,OP> solver_type;
    solver_type solver (rcpFromRef (problem), rcpFromRef (belosList));

    //
    // **********Print out information about problem*******************
    //
    if (proc_verbose) {
      cout << endl << endl;
      cout << "Dimension of matrix: " << NumGlobalElements << endl;
      cout << "Number of right-hand sides: " << numrhs << endl;
      cout << "Block size used by solver: " << blocksize << endl;
      cout << "Max number of CG iterations: " << maxiters << endl;
      cout << "Relative residual tolerance: " << tol << endl;
      cout << endl;
    }
    //
    // Perform solve
    //
    Belos::ReturnType ret = solver.solve();
    //
    // Compute actual residuals.
    //
    bool badRes = false;
    std::vector<MT> actual_resids (numrhs);
    std::vector<MT> rhs_norm (numrhs);
    MV resid (map, numrhs);
    OPT::Apply (*A, *X, resid);
    MVT::MvAddMv (-STS::one (), resid, STS::one (), *B, resid);
    MVT::MvNorm (resid, actual_resids);
    MVT::MvNorm (*B, rhs_norm);
    if (proc_verbose) {
      cout << "---------- Actual Residuals (normalized) ----------" << endl << endl;
    }
    for (int i=0; i<numrhs; i++) {
      MT actRes = actual_resids[i]/rhs_norm[i];
      if (proc_verbose) {
        cout << "Problem " << i << " : \t" << actRes << endl;
      }
      if (actRes > tol) badRes = true;
    }

    success = ret == Belos::Converged && ! badRes;
    if (success) {
      if (proc_verbose) {
        cout << "\nEnd Result: TEST PASSED" << endl;
      }
    } else {
      if (proc_verbose) {
        cout << "\nEnd Result: TEST FAILED" << endl;
      }
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
void
SupportGraph<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,
                                 local_ordinal_type,
                                 global_ordinal_type,
                                 node_type>& X,
       Tpetra::MultiVector<scalar_type,
                           local_ordinal_type,
                           global_ordinal_type,
                           node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::Time;
  using Teuchos::TimeMonitor;
  typedef scalar_type DomainScalar;
  typedef scalar_type RangeScalar;
  typedef Tpetra::MultiVector<DomainScalar, local_ordinal_type,
    global_ordinal_type, node_type> MV;

  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(std::cout));

  // Create a timer for this method, if it doesn't exist already.
  // TimeMonitor::getNewCounter registers the timer, so that
  // TimeMonitor's class methods like summarize() will report the
  // total time spent in successful calls to this method.
  const std::string timerName ("Ifpack2::SupportGraph::apply");
  RCP<Time> timer = TimeMonitor::lookupCounter(timerName);
  if (timer.is_null()) {
    timer = TimeMonitor::getNewCounter(timerName);
  }

  { // Start timing here.
    Teuchos::TimeMonitor timeMon (*timer);

    TEUCHOS_TEST_FOR_EXCEPTION(
      ! isComputed(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: You must call compute() to compute the "
      "incomplete factorization, before calling apply().");

    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: X and Y must have the same number of "
      "columns.  X has " << X.getNumVectors() << " columns, but Y has "
      << Y.getNumVectors() << " columns.");

    TEUCHOS_TEST_FOR_EXCEPTION(
      beta != STS::zero(), std::logic_error,
      "Ifpack2::SupportGraph::apply: This method does not currently work when "
      "beta != 0.");

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
      Xcopy = rcp (new MV(X));
    }
    else {
      Xcopy = rcpFromRef(X);
    }

    if (alpha != STS::one()) {
      Y.scale(alpha);
    }

    RCP<MV> Ycopy = rcpFromRef(Y);

    solver_->setB(Xcopy);
    solver_->setX(Ycopy);

    solver_->solve ();
  } // Stop timing here.

  ++NumApply_;

  // timer->totalElapsedTime() returns the total time over all timer
  // calls.  Thus, we use = instead of +=.
  ApplyTime_ = timer->totalElapsedTime();
}
SolveStatus<Scalar>
BelosLinearOpWithSolve<Scalar>::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<Scalar> &B,
  const Ptr<MultiVectorBase<Scalar> > &X,
  const Ptr<const SolveCriteria<Scalar> > solveCriteria
  ) const
{

  TEUCHOS_FUNC_TIME_MONITOR("BelosLOWS");

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::FancyOStream;
  using Teuchos::OSTab;
  using Teuchos::describe;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef typename ST::magnitudeType ScalarMag;
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  assertSolveSupports(*this, M_trans, solveCriteria);
  // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function
  // solve(...).

  const int numRhs = B.domain()->dim();
  const int numEquations = B.range()->dim();

  const RCP<FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) {
    *out << "\nStarting iterations with Belos:\n";
    OSTab tab2(out);
    *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel);
    *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel);
    *out << "With #Eqns="<<numEquations<<", #RHSs="<<numRhs<<" ...\n";
  }

  //
  // Set RHS and LHS
  //

  bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) );
  TEST_FOR_EXCEPTION(
    ret == false, CatastrophicSolveFailure
    ,"Error, the Belos::LinearProblem could not be set for the current solve!"
    );

  //
  // Set the solution criteria
  //

  const RCP<Teuchos::ParameterList> tmpPL = Teuchos::parameterList();

  SolveMeasureType solveMeasureType;
  RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    const ScalarMag requestedTol = solveCriteria->requestedTol;
    if (solveMeasureType.useDefault()) {
      tmpPL->set("Convergence Tolerance", defaultTol_);
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      tmpPL->set("Explicit Residual Scaling", "Norm of RHS");
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      tmpPL->set("Explicit Residual Scaling", "Norm of Initial Residual");
    }
    else {
      // Set the most generic (and inefficient) solve criteria
      generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest(
        *solveCriteria, convergenceTestFrequency_);
      // Set the verbosity level (one level down)
      generalSolveCriteriaBelosStatusTest->setOStream(out);
      generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1));
      // Set the default convergence tolerance to always converged to allow
      // the above status test to control things.
      tmpPL->set("Convergence Tolerance", 1.0);
    }
  }
  else {
    // No solveCriteria was even passed in!
    tmpPL->set("Convergence Tolerance", defaultTol_);
  }

  //
  // Reset the blocksize if we adding more vectors than half the number of equations,
  // orthogonalization will fail on the first iteration!
  //

  RCP<const Teuchos::ParameterList> solverParams = iterativeSolver_->getCurrentParameters();
  const int currBlockSize = Teuchos::getParameter<int>(*solverParams, "Block Size");
  bool isNumBlocks = false;
  int currNumBlocks = 0;
  if (Teuchos::isParameterType<int>(*solverParams, "Num Blocks")) {
    currNumBlocks = Teuchos::getParameter<int>(*solverParams, "Num Blocks");
    isNumBlocks = true;
  }
  const int newBlockSize = TEUCHOS_MIN(currBlockSize,numEquations/2);
  if (nonnull(out)
    && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)
    && newBlockSize != currBlockSize)
  {
    *out << "\nAdjusted block size = " << newBlockSize << "\n";
  }
  //
  tmpPL->set("Block Size",newBlockSize);

  //
  // Set the number of Krylov blocks if we are using a GMRES solver, or a solver
  // that recognizes "Num Blocks". Otherwise the solver will throw an error!
  //

  if (isNumBlocks) {
    const int Krylov_length = (currNumBlocks*currBlockSize)/newBlockSize;
    tmpPL->set("Num Blocks",Krylov_length);
  
    if (newBlockSize != currBlockSize) {
      if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
        *out
          << "\nAdjusted max number of Krylov basis blocks = " << Krylov_length << "\n";
    }
  }

  //
  // Solve the linear system
  //

  Belos::ReturnType belosSolveStatus;
  {
    RCP<std::ostream>
      outUsed =
      ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)
        ? out
        : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream())))
        );
    Teuchos::OSTab tab(outUsed,1,"BELOS");
    tmpPL->set("Output Stream", outUsed);
    iterativeSolver_->setParameters(tmpPL);
    if (nonnull(generalSolveCriteriaBelosStatusTest)) {
      iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest);
    }
    belosSolveStatus = iterativeSolver_->solve();
  }

  //
  // Report the solve status
  //

  totalTimer.stop();

  SolveStatus<Scalar> solveStatus;

  switch (belosSolveStatus) {
    case Belos::Unconverged: {
      solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED;
      break;
    }
    case Belos::Converged: {
      solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
      if (nonnull(generalSolveCriteriaBelosStatusTest)) {
        const ArrayView<const ScalarMag> achievedTol = 
          generalSolveCriteriaBelosStatusTest->achievedTol();
        solveStatus.achievedTol = ST::zero();
        for (Ordinal i = 0; i < achievedTol.size(); ++i) {
          solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]);
        }
      }
      else {
        solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_);
      }
      break;
    }
    TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT();
  }

  std::ostringstream ossmessage;
  ossmessage
    << "The Belos solver of type \""<<iterativeSolver_->description()
    <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\""
    << " in " << iterativeSolver_->getNumIters() << " iterations"
    << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ;
  if (out.get() && static_cast<int>(verbLevel) >=static_cast<int>(Teuchos::VERB_LOW))
    *out << "\n" << ossmessage.str() << "\n";

  solveStatus.message = ossmessage.str();

  if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return solveStatus;

}
Esempio n. 28
0
  //
  // Test for TimeMonitor's enableTimer and disableTimer methods.
  //
  TEUCHOS_UNIT_TEST( TimeMonitor, enableTimer )
  {
    using Teuchos::Array;
    using Teuchos::OSTab;
    using Teuchos::ParameterList;
    using Teuchos::parameterList;
    using Teuchos::RCP;
    using Teuchos::rcpFromRef;
    using Teuchos::Time;
    using Teuchos::TimeMonitor;
    using std::endl;
    typedef Teuchos::Array<RCP<Time> >::size_type size_type;

    out << "Testing TimeMonitor's disableTimer() and enableTimer() methods"
        << endl;
    OSTab (rcpFromRef (out));

    out << "Creating timers" << endl;
    const int numTrials = 5;
    const int numTimers = 3;
    Array<RCP<Time> > timers (numTimers);
    for (size_type i = 0; i < numTimers; ++i) {
      std::ostringstream os; // construct timer name
      os << "Timer " << i;
      timers[i] = TimeMonitor::getNewTimer (os.str ());
    }

    out << "Running all timers without disabling any" << endl;
    // The actual number of operations in slowloop is proportional to
    // the cube of the loop length.  Adjust loopLength as necessary to
    // ensure the timer reports a nonzero elapsed time for each of the
    // invocations.
    const size_t loopLength = 25;
    for (int k = 0; k < numTrials; ++k) {
      for (size_type i = 0; i < numTimers; ++i) {
        TimeMonitor timeMon (* timers[i]);
        slowLoop (loopLength);
      }
    }
    for (size_type i = 0; i < numTimers; ++i) {
      TEST_EQUALITY( timers[i]->numCalls(), numTrials );
    }

    out << "Disabling one timer and trying again" << endl;
    // Disable timers[0] only, and repeat the above loops.
    TEST_NOTHROW( TimeMonitor::disableTimer ("Timer 0") );
    for (int k = 0; k < numTrials; ++k) {
      for (size_type i = 0; i < numTimers; ++i) {
        TimeMonitor timeMon (* timers[i]);
        slowLoop (loopLength);
      }
    }
    TEST_EQUALITY( timers[0]->numCalls(), numTrials );
    for (size_type i = 1; i < numTimers; ++i) {
      TEST_EQUALITY( timers[i]->numCalls(), 2*numTrials );
    }

    out << "Reenabling the timer and trying again" << endl;
    // Enable timers[0] and repeat the above loops.
    TEST_NOTHROW( TimeMonitor::enableTimer ("Timer 0") );
    for (int k = 0; k < numTrials; ++k) {
      for (size_type i = 0; i < numTimers; ++i) {
        TimeMonitor timeMon (* timers[i]);
        slowLoop (loopLength);
      }
    }
    TEST_EQUALITY( timers[0]->numCalls(), 2*numTrials );
    for (size_type i = 1; i < numTimers; ++i) {
      TEST_EQUALITY( timers[i]->numCalls(), 3*numTrials );
    }

    out << "Test that summarize() reports enabled and disabled timers" << endl;
    // Make sure that summarize() reports all timers.  Disabling a
    // timer must _not_ exclude it from the list of timers printed by
    // summarize().  Disable a different timer this time just for fun.
    TEST_NOTHROW( TimeMonitor::disableTimer ("Timer 1") );
    {
      std::ostringstream oss;
      TimeMonitor::summarize (oss);

      // Echo summarize() output to the FancyOStream out (which is a
      // standard unit test argument).  Output should only appear in
      // show-all-test-details mode.
      out << oss.str () << std::endl;

      // Make sure that each timer's name shows up in the output.
      for (size_type i = 0; i < numTimers; ++i) {
        const std::string name = timers[i]->name ();
        const size_t substr_i = oss.str ().find (name);
        TEST_INEQUALITY(substr_i, std::string::npos);
      }
    }

    // This sets up for the next unit test.
    TimeMonitor::clearCounters ();
  }
void Hiptmair<MatrixType>::
apply (const Tpetra::MultiVector<typename MatrixType::scalar_type,
       typename MatrixType::local_ordinal_type,
       typename MatrixType::global_ordinal_type,
       typename MatrixType::node_type>& X,
       Tpetra::MultiVector<typename MatrixType::scalar_type,
                           typename MatrixType::local_ordinal_type,
                           typename MatrixType::global_ordinal_type,
                           typename MatrixType::node_type>& Y,
       Teuchos::ETransp mode,
       typename MatrixType::scalar_type alpha,
       typename MatrixType::scalar_type beta) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
                              global_ordinal_type, node_type> MV;
  TEUCHOS_TEST_FOR_EXCEPTION(
    ! isComputed (), std::runtime_error,
    "Ifpack2::Hiptmair::apply: You must call compute() before you may call apply().");
  TEUCHOS_TEST_FOR_EXCEPTION(
    X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
    "Ifpack2::Hiptmair::apply: The MultiVector inputs X and Y do not have the "
    "same number of columns.  X.getNumVectors() = " << X.getNumVectors ()
    << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");

  // Catch unimplemented cases: alpha != 1, beta != 0, mode != NO_TRANS.
  TEUCHOS_TEST_FOR_EXCEPTION(
    alpha != STS::one (), std::logic_error,
    "Ifpack2::Hiptmair::apply: alpha != 1 has not been implemented.");
  TEUCHOS_TEST_FOR_EXCEPTION(
    beta != STS::zero (), std::logic_error,
    "Ifpack2::Hiptmair::apply: zero != 0 has not been implemented.");
  TEUCHOS_TEST_FOR_EXCEPTION(
    mode != Teuchos::NO_TRANS, std::logic_error,
    "Ifpack2::Hiptmair::apply: mode != Teuchos::NO_TRANS has not been implemented.");

  Teuchos::Time timer ("apply");
  { // The body of code to time
    Teuchos::TimeMonitor timeMon (timer);

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    {
      auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
      auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
      if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
        Xcopy = rcp (new MV (X, Teuchos::Copy));
      } else {
        Xcopy = rcpFromRef (X);
      }
    }

    RCP<MV> Ycopy = rcpFromRef (Y);
    if (ZeroStartingSolution_) {
      Ycopy->putScalar (STS::zero ());
    }

    // apply Hiptmair Smoothing
    applyHiptmairSmoother (*Xcopy, *Ycopy);

  }
  ++NumApply_;
  ApplyTime_ += timer.totalElapsedTime ();
}
Esempio n. 30
0
// triangle tests
TEUCHOS_UNIT_TEST(tSTKInterface, interface_test)
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;

   const CellTopologyData * ctd = shards::getCellTopologyData<QuadTopo>();
   const CellTopologyData * side_ctd = shards::CellTopology(ctd).getBaseCellTopologyData(1,0);

   // build global (or serial communicator)
   #ifdef HAVE_MPI
      Epetra_MpiComm Comm(MPI_COMM_WORLD);
   #else
      Epetra_SerialComm Comm;
   #endif
   RCP<Epetra_Comm> comm = rcpFromRef(Comm);

   STK_Interface mesh(2);

   TEST_EQUALITY(mesh.getDimension(),2);

   mesh.addElementBlock("0",ctd);
   mesh.addSideset("Inflow",side_ctd);
   mesh.addSideset("Outflow",side_ctd);
   mesh.addSideset("Top",side_ctd);
   mesh.addSideset("Bottom",side_ctd);

   TEST_EQUALITY(mesh.getDimension(),2);
   TEST_EQUALITY(mesh.getNumSidesets(),4);
   TEST_EQUALITY(mesh.getNumElementBlocks(),1);

   TEST_ASSERT(not mesh.isModifiable());
   mesh.initialize(MPI_COMM_WORLD);

   TEST_ASSERT(not mesh.isModifiable());
   mesh.beginModification();
      TEST_ASSERT(mesh.isModifiable());

      std::vector<double> coord(2);
      stk::mesh::Part * block = mesh.getElementBlockPart("0");
      { 
         // Add four coordinates
         //
         //    4 ---- 3
         //    |      |
         //    |      |
         //    1 ---- 2 
         //
      
         coord[0] = 0.0; coord[1] = 0.0;
         mesh.addNode(1,coord); 

         coord[0] = 1.0; coord[1] = 0.0;
         mesh.addNode(2,coord); 

         coord[0] = 1.0; coord[1] = 1.0;
         mesh.addNode(3,coord); 

         coord[0] = 0.0; coord[1] = 1.0;
         mesh.addNode(4,coord); 

         // add an element
         std::vector<stk::mesh::EntityId> nodes;
         for(std::size_t i=1;i<5;i++)
            nodes.push_back(i);

         Teuchos::RCP<ElementDescriptor> ed = buildElementDescriptor(1,nodes);
         mesh.addElement(ed,block);
      }

      { 
         // Add four coordinates
         //
         //    3 ---- 6
         //    |      |
         //    |      |
         //    2 ---- 5 
         //
      
         coord[0] = 2.0; coord[1] = 0.5;
         mesh.addNode(5,coord); 

         coord[0] = 2.1; coord[1] = 1.5;
         mesh.addNode(6,coord); 

         // add an element
         std::vector<stk::mesh::EntityId> nodes(4);
         nodes[0] = 5;
         nodes[1] = 6;
         nodes[2] = 3;
         nodes[3] = 2;

         Teuchos::RCP<ElementDescriptor> ed = buildElementDescriptor(2,nodes);
         mesh.addElement(ed,block);
      }

   mesh.endModification();
   TEST_ASSERT(not mesh.isModifiable());

   stk::mesh::EntityRank nodeRank = mesh.getNodeRank();
   stk::mesh::EntityRank sideRank = mesh.getSideRank();
   stk::mesh::EntityRank elmtRank = mesh.getElementRank();

   TEST_EQUALITY(mesh.getEntityCounts(nodeRank),6);
   TEST_EQUALITY(mesh.getEntityCounts(sideRank),0);
   TEST_EQUALITY(mesh.getEntityCounts(elmtRank),2);

   #ifdef HAVE_IOSS
      TEST_ASSERT(mesh.isWritable());
      TEST_NOTHROW(mesh.writeToExodus("simplemesh.exo"));
   #else
      TEST_ASSERT(not mesh.isWritable());
      TEST_THROW(mesh.writeToExodus("simplemesh.exo"),std::logic_error);
   #endif 

   const double * coords = 0;

   coords = mesh.getNodeCoordinates(5);
   TEST_FLOATING_EQUALITY(coords[0],2.0,1e-14);
   TEST_FLOATING_EQUALITY(coords[1],0.5,1e-14);

   coords = mesh.getNodeCoordinates(2);
   TEST_FLOATING_EQUALITY(coords[0],1.0,1e-14);
   TEST_EQUALITY(coords[1],0.0);

   coords = mesh.getNodeCoordinates(1);
   TEST_EQUALITY(coords[0],0.0);
   TEST_EQUALITY(coords[1],0.0);

   TEST_EQUALITY(mesh.getMaxEntityId(nodeRank),6);
   TEST_EQUALITY(mesh.getMaxEntityId(elmtRank),2);
}