Example #1
0
int AmesosGenOp::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y ) const 
{
  if (!useTranspose_) {
    
    // Storage for M*X
    Epetra_MultiVector MX(X.Map(),X.NumVectors());
    
    // Apply M*X
    massMtx_->Apply(X, MX);
    Y.PutScalar(0.0);
    
    // Set the LHS and RHS
    problem_->SetRHS(&MX);
    problem_->SetLHS(&Y);

    // Solve the linear system A*Y = MX
    solver_->Solve();
  }
  else {
    // Storage for A^{-T}*X
    Epetra_MultiVector ATX(X.Map(),X.NumVectors());
    Epetra_MultiVector tmpX = const_cast<Epetra_MultiVector&>(X);
    
    // Set the LHS and RHS
    problem_->SetRHS(&tmpX);
    problem_->SetLHS(&ATX);
    
    // Solve the linear system A^T*Y = X 
    solver_->Solve();
    
    // Apply M*ATX
    massMtx_->Apply(ATX, Y);
  }
  
  return 0;
}
int
main (int argc, char *argv[])
{
  using namespace Anasazi;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using std::endl;

#ifdef HAVE_MPI
  // Initialize MPI
  MPI_Init (&argc, &argv);
#endif // HAVE_MPI

  // Create an Epetra communicator
#ifdef HAVE_MPI
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif // HAVE_MPI

  // Create an Anasazi output manager
  BasicOutputManager<double> printer;
  printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl;

  // Get the sorting std::string from the command line
  std::string which ("LM");
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM).");
  if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Dimension of the matrix
  //
  // Discretization points in any one direction.
  const int nx = 10;
  // Size of matrix nx*nx
  const int NumGlobalElements = nx*nx;

  // Construct a Map that puts approximately the same number of
  // equations on each process.
  Epetra_Map Map (NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.
  int NumMyElements = Map.NumMyElements ();

  std::vector<int> MyGlobalElements (NumMyElements);
  Map.MyGlobalElements (&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra
  // matrix.  NumNz[i] is the number of OFF-DIAGONAL terms for the
  // i-th global equation on this process.
  std::vector<int> NumNz (NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.
  */
  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    }
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    }
    else {
      NumNz[i] = 5;
    }
  }

  // Create an Epetra_Matrix
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, &NumNz[0]));

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double rho = 0.0;
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries;

  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i]==0) {
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx*(nx-1)) {
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx-1) {
      Indices[0] = nx-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == NumGlobalElements-1) {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] < nx) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] > nx*(nx-1)) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i]%nx == 0) {
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if ((MyGlobalElements[i]+1)%nx == 0) {
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    // Put in the diagonal entry
    int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
       << info << " != 0." );
  }

  // Finish up
  int info = A->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "A->FillComplete() returned info = "
     << info << " != 0." );
  A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  // Create a identity matrix for the temporary mass matrix
  RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, 1));
  for (int i=0; i<NumMyElements; i++) {
    Values[0] = one;
    Indices[0] = i;
    NumEntries = 1;
    info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "M->InsertGlobalValues() returned info = "
       << info << " != 0." );
  }
  // Finish up
  info = M->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "M->FillComplete() returned info = "
     << info << " != 0." );
  M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Call the LOBPCG solver manager
  //***********************************
  //
  // Variables used for the LOBPCG Method
  const int nev       = 10;
  const int blockSize = 5;
  const int maxIters  = 500;
  const double tol    = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the
  // solver.  Note: This needs to have the same number of columns as
  // the blocksize.
  RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize));
  ivec->Random (); // fill the initial vector with random values

  // Create the eigenproblem.
  RCP<BasicEigenproblem<double, MV, OP> > MyProblem =
    rcp (new BasicEigenproblem<double, MV, OP> (A, ivec));

  // Inform the eigenproblem that the operator A is symmetric
  MyProblem->setHermitian (true);

  // Set the number of eigenvalues requested
  MyProblem->setNEV (nev);

  // Tell the eigenproblem that you are finishing passing it information.
  const bool success = MyProblem->setProblem ();
  if (! success) {
    printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n");
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set ("Which", which);
  MyPL.set ("Block Size", blockSize);
  MyPL.set ("Maximum Iterations", maxIters);
  MyPL.set ("Convergence Tolerance", tol);
  MyPL.set ("Full Ortho", true);
  MyPL.set ("Use Locking", true);

  // Create the solver manager
  LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL);

  // Solve the problem
  ReturnType returnCode = MySolverMan.solve ();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Eigensolution<double,MV> sol = MyProblem->getSolution ();
  std::vector<Value<double> > evals = sol.Evals;
  RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR (sol.numVecs);
  if (sol.numVecs > 0) {
    Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs);
    Epetra_MultiVector tempAevec (Map, sol.numVecs );
    T.putScalar (0.0);
    for (int i = 0; i < sol.numVecs; ++i) {
      T(i,i) = evals[i].realpart;
    }
    A->Apply (*evecs, tempAevec);
    MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec);
    MVT::MvNorm (tempAevec, normR);
  }

  // Print the results
  std::ostringstream os;
  os.setf (std::ios_base::right, std::ios_base::adjustfield);
  os << "Solver manager returned "
     << (returnCode == Converged ? "converged." : "unconverged.") << endl;
  os << endl;
  os << "------------------------------------------------------" << endl;
  os << std::setw(16) << "Eigenvalue"
     << std::setw(18) << "Direct Residual"
     << endl;
  os << "------------------------------------------------------" << endl;
  for (int i = 0; i < sol.numVecs; ++i) {
    os << std::setw(16) << evals[i].realpart
       << std::setw(18) << normR[i] / evals[i].realpart
       << endl;
  }
  os << "------------------------------------------------------" << endl;
  printer.print (Errors, os.str ());

#ifdef HAVE_MPI
  MPI_Finalize ();
#endif // HAVE_MPI
  return 0;
}
Example #3
0
  int
main (int argc, char *argv[])
{
  using Belos::OutputManager;
  using Belos::SolverFactory;
  using Teuchos::CommandLineProcessor;
  using Teuchos::null;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::rcp_implicit_cast;
  using std::cerr;
  using std::cout;
  using std::endl;
  typedef double scalar_type;
  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Belos::SolverManager<scalar_type, MV, OP> solver_base_type;
  typedef Belos::SolverFactory<scalar_type, MV, OP> factory_type;

  Teuchos::oblackholestream blackHole;
  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole);

  RCP<Epetra_Comm> comm;
  {
#ifdef EPETRA_MPI
    RCP<Epetra_MpiComm> commSpecific (new Epetra_MpiComm (MPI_COMM_WORLD));
#else
    RCP<Epetra_SerialComm> commSpecific (new Epetra_SerialComm);
#endif // EPETRA_MPI
    comm = rcp_implicit_cast<Epetra_Comm> (commSpecific);
  }
  std::ostream& out = (comm->MyPID() == 0) ? std::cout : blackHole;

  bool success = false;
  bool verbose = false;
  try {
    int numRHS = 1;
    bool debug = false;

    // Define command-line arguments.
    CommandLineProcessor cmdp (false, true);
    cmdp.setOption ("numRHS", &numRHS, "Number of right-hand sides in the linear "
        "system to solve.");
    cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and results.");
    cmdp.setOption ("debug", "nodebug", &debug, "Print debugging information.");

    // Parse the command-line arguments.
    {
      const CommandLineProcessor::EParseCommandLineReturn parseResult =
        cmdp.parse (argc,argv);
      if (parseResult == CommandLineProcessor::PARSE_HELP_PRINTED) {
        if (comm->MyPID() == 0)
          std::cout << "End Result: TEST PASSED" << endl;
        return EXIT_SUCCESS;
      }
      TEUCHOS_TEST_FOR_EXCEPTION(parseResult != CommandLineProcessor::PARSE_SUCCESSFUL,
          std::invalid_argument, "Failed to parse command-line arguments.");
    }

    // Declare an output manager for handling local output.  Initialize,
    // using the caller's desired verbosity level.
    RCP<OutputManager<scalar_type> > outMan =
      rcp (new OutputManager<scalar_type> (selectVerbosity (verbose, debug)));

    // Stream for debug output.  If debug output is not enabled, then
    // this stream doesn't print anything sent to it (it's a "black
    // hole" stream).
    //std::ostream& debugOut = outMan->stream (Belos::Debug);

    // Create the operator to test, with domain and range maps.
    RCP<const Epetra_Map> domainMap, rangeMap;
    RCP<Epetra_Operator> A = makeMatrix (comm, domainMap, rangeMap);
    // "Solution" input/output multivector.
    RCP<MV> X_exact = rcp (new MV (*domainMap, numRHS));
    X_exact->Seed ();
    X_exact->Random ();
    RCP<MV> X = rcp (new MV (*domainMap, numRHS));
    X->PutScalar (0.0);
    // "Right-hand side" input multivector.
    RCP<MV> B = rcp (new MV (*rangeMap, numRHS));
    A->Apply (*X_exact, *B);

    //
    // Test creating solver instances using the solver factory.
    //
    factory_type factory;
    //
    // Test the canonical solver names.
    //
    {
      typedef Belos::BlockGmresSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Block GMRES", out, verbose);
    }
    {
      typedef Belos::PseudoBlockGmresSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Pseudoblock GMRES", out, verbose);
    }
    {
      typedef Belos::BlockCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Block CG", out, verbose);
    }
    {
      typedef Belos::PseudoBlockCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Pseudoblock CG", out, verbose);
    }
    {
      typedef Belos::GCRODRSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "GCRODR", out, verbose);
    }
    {
      typedef Belos::RCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "RCG", out, verbose);
    }
    {
      typedef Belos::MinresSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "MINRES", out, verbose);
    }
    {
      typedef Belos::LSQRSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "LSQR", out, verbose);
    }
    //
    // Test aliases.
    //
    {
      typedef Belos::BlockGmresSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Flexible GMRES", out, verbose);
    }
    {
      typedef Belos::PseudoBlockCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "CG", out, verbose);
    }
    {
      typedef Belos::RCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Recycling CG", out, verbose);
    }
    {
      typedef Belos::GCRODRSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Recycling GMRES", out, verbose);
    }
    {
      typedef Belos::PseudoBlockGmresSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Pseudo Block GMRES", out, verbose);
    }
    {
      typedef Belos::PseudoBlockCGSolMgr<scalar_type, MV, OP> solver_impl_type;
      testCreatingSolver<factory_type, solver_base_type,
        solver_impl_type> (factory, "Pseudo Block CG", out, verbose);
    }

    success = true;
    if (comm->MyPID() == 0) {
      cout << "End Result: TEST PASSED" << endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
// =============================================================================
int main (int argc, char *argv[])
{
  Teuchos::GlobalMPISession(&argc, &argv, NULL);

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  Epetra_MpiComm eComm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm eComm();
#endif

  const RCP<Teuchos::FancyOStream> out =
    Teuchos::VerboseObjectBase::getDefaultOStream();

  bool success = true;
  try {
    // ===========================================================================
    // handle command line arguments
    Teuchos::CommandLineProcessor My_CLP;

    My_CLP.setDocString("Linear solver testbed for the 1D Poisson matrix.\n");

    std::string action("matvec");
    My_CLP.setOption("action",
                     &action,
                     "Which action to perform with the operator (matvec, solve_cg, solve_minres, solve_gmres)"
                    );

    std::string solver("cg");
//       My_CLP.setOption("solver", &solver, "Krylov subspace method (cg, minres, gmres)");

//       Operator op = JAC;
//       Operator allOpts[] = {JAC, KEO, KEOREG, POISSON1D};
//       std::string allOptNames[] = {"jac", "keo", "keoreg", "poisson1d"};
//       My_CLP.setOption("operator", &op, 4, allOpts, allOptNames);

    bool verbose = true;
    My_CLP.setOption("verbose", "quiet",
                     &verbose,
                     "Print messages and results.");

    int frequency = 10;
    My_CLP.setOption("frequency",
                     &frequency,
                     "Solvers frequency for printing residuals (#iters).");

    // Make sure this value is large enough to keep the cores busy for a while.
    int n = 1000;
    My_CLP.setOption("size",
                     &n,
                     "Size of the equation system (default: 1000).");

    // print warning for unrecognized arguments
    My_CLP.recogniseAllOptions(true);
    My_CLP.throwExceptions(false);

    // finally, parse the command line
    TEUCHOS_ASSERT_EQUALITY(My_CLP.parse (argc, argv),
                            Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL
                           );
    // =========================================================================
    // Construct Epetra matrix.
    RCP<Teuchos::Time> matrixConstructTime =
      Teuchos::TimeMonitor::getNewTimer("Epetra matrix construction");
    RCP<Epetra_CrsMatrix> A;
    {
      Teuchos::TimeMonitor tm(*matrixConstructTime);
      A = contructEpetraMatrix(n, eComm);
    }
//       A->Print(std::cout);

    // create initial guess and right-hand side
    RCP<Epetra_Vector> x =
      rcp(new Epetra_Vector(A->OperatorDomainMap()));
    RCP<Epetra_MultiVector> b =
      rcp(new Epetra_Vector(A->OperatorRangeMap()));
    // b->Random();
    TEUCHOS_ASSERT_EQUALITY(0, b->PutScalar(1.0));

    if (action.compare("matvec") == 0) {
      TEUCHOS_ASSERT_EQUALITY(0, x->PutScalar(1.0));
      RCP<Teuchos::Time> mvTime = Teuchos::TimeMonitor::getNewTimer("Epetra operator apply");
      {
        Teuchos::TimeMonitor tm(*mvTime);
        // Don't TEUCHOS_ASSERT_EQUALITY() here for speed.
        A->Apply(*x, *b);
      }

      // print timing data
      Teuchos::TimeMonitor::summarize();
    } else {
      // -----------------------------------------------------------------------
      // Belos part
      Teuchos::ParameterList belosList;
      // Relative convergence tolerance requested
      belosList.set("Convergence Tolerance", 1.0e-12);
      if (verbose) {
        belosList.set("Verbosity",
                      Belos::Errors +
                      Belos::Warnings +
                      Belos::IterationDetails +
                      Belos::FinalSummary +
                      Belos::Debug +
                      Belos::TimingDetails +
                      Belos::StatusTestDetails
                     );
        if (frequency > 0)
          belosList.set("Output Frequency", frequency);
      } else {
        belosList.set("Verbosity", Belos::Errors + Belos::Warnings);
      }

      // Belos::General, Belos::Brief
      belosList.set("Output Style", static_cast<int>(Belos::Brief));
      belosList.set("Maximum Iterations", 1000);

      // Construct an unpreconditioned linear problem instance.
      Belos::LinearProblem<double, MV, OP> problem(A, x, b);
      bool set = problem.setProblem();
      TEUCHOS_TEST_FOR_EXCEPTION(
          !set,
          std::logic_error,
          "ERROR:  Belos::LinearProblem failed to set up correctly!"
          );
      // -----------------------------------------------------------------------
      // Create an iterative solver manager.
      RCP<Belos::SolverManager<double, MV, OP> > newSolver;
      if (action.compare("solve_cg") == 0) {
        belosList.set("Assert Positive Definiteness", false);
        newSolver =
          rcp(new Belos::PseudoBlockCGSolMgr<double, MV, OP>(
                rcp(&problem, false),
                rcp(&belosList, false)
                ));
      } else if (action.compare("solve_minres") == 0) {
        newSolver =
          rcp(new Belos::MinresSolMgr<double, MV, OP>(
                rcp(&problem, false),
                rcp(&belosList, false)
                ));
      } else if (action.compare("solve_gmres") == 0) {
        newSolver =
          rcp(new Belos::PseudoBlockGmresSolMgr<double, MV, OP>(
                rcp(&problem, false),
                rcp(&belosList, false)
                ));
      } else {
        TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Unknown solver type \"" << solver << "\".");
      }

      // Perform solve
      RCP<Teuchos::Time> solveTime =
        Teuchos::TimeMonitor::getNewTimer("Linear system solve");
      {
        Teuchos::TimeMonitor tm(*solveTime);
        Belos::ReturnType ret = newSolver->solve();
        success = ret == Belos::Converged;
      }

//         *out << newSolver->getNumIters() << std::endl;
//         // Compute actual residuals.
//         bool badRes = false;
//         Teuchos::Array<double> actual_resids(1);
//         Teuchos::Array<double> rhs_norm(1);
//         Epetra_Vector resid(keoMatrix->OperatorRangeMap());
//         OPT::Apply(*keoMatrix, *x, resid);
//         MVT::MvAddMv(-1.0, resid, 1.0, *b, resid);
//         MVT::MvNorm(resid, actual_resids);
//         MVT::MvNorm(*b, rhs_norm);
//         if (proc_verbose) {
//           std::cout<< "---------- Actual Residuals (normalized) ----------" <<std::endl<<std::endl;
//           for (int i=0; i<1; i++) {
//             double actRes = actual_resids[i]/rhs_norm[i];
//             std::cout << "Problem " << i << " : \t" << actRes << std::endl;
//             if (actRes > 1.0e-10) badRes = true;
//           }
//         }
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, *out, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
int
main (int argc, char *argv[])
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using std::cerr;
  using std::cout;
  using std::endl;
  // Anasazi solvers have the following template parameters:
  //
  //   - Scalar: The type of dot product results.
  //   - MV: The type of (multi)vectors.
  //   - OP: The type of operators (functions from multivector to
  //     multivector).  A matrix (like Epetra_CrsMatrix) is an example
  //     of an operator; an Ifpack preconditioner is another example.
  //
  // Here, Scalar is double, MV is Epetra_MultiVector, and OP is
  // Epetra_Operator.
  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Anasazi::MultiVecTraits<double, MV> MVT;

#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init (&argc, &argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif // EPETRA_MPI

  const int MyPID = Comm.MyPID ();

  //
  // Set up the test problem
  //

  // Dimensionality of the spatial domain to discretize
  const int space_dim = 2;

  // Size of each of the dimensions of the (discrete) domain
  std::vector<double> brick_dim (space_dim);
  brick_dim[0] = 1.0;
  brick_dim[1] = 1.0;

  // Number of elements in each of the dimensions of the domain
  std::vector<int> elements (space_dim);
  elements[0] = 10;
  elements[1] = 10;

  // Create the test problem.
  RCP<ModalProblem> testCase =
    rcp (new ModeLaplace2DQ2 (Comm, brick_dim[0], elements[0],
                              brick_dim[1], elements[1]));

  // Get the stiffness and mass matrices.
  //
  // rcp (T*, false) returns a nonowning (doesn't deallocate) RCP.
  RCP<Epetra_CrsMatrix> K =
    rcp (const_cast<Epetra_CrsMatrix* > (testCase->getStiffness ()), false);
  RCP<Epetra_CrsMatrix> M =
    rcp (const_cast<Epetra_CrsMatrix* > (testCase->getMass ()), false);

  //
  // Create linear solver for linear systems with K
  //
  // Anasazi uses shift and invert, with a "shift" of zero, to find
  // the eigenvalues of least magnitude.  In this example, we
  // implement the "invert" part of shift and invert by using an
  // Amesos direct solver.
  //

  // Create Epetra linear problem class for solving linear systems
  // with K.  This implements the inverse operator for shift and
  // invert.
  Epetra_LinearProblem AmesosProblem;
  // Tell the linear problem about the matrix K.  Epetra_LinearProblem
  // doesn't know about RCP, so we have to give it a raw pointer.
  AmesosProblem.SetOperator (K.get ());

  // Create Amesos factory and solver for solving linear systems with
  // K.  The solver uses the KLU library to do a sparse LU
  // factorization.
  //
  // Note that the AmesosProblem object "absorbs" K.  Anasazi doesn't
  // see K, just the operator that implements K^{-1} M.
  Amesos amesosFactory;
  RCP<Amesos_BaseSolver> AmesosSolver;

  // Amesos can interface to many different solvers.  The following
  // loop picks a solver that Amesos supports.  The loop order
  // reflects solver preference, only in the sense that using LAPACK
  // here is a suboptimal fall-back.  (With the LAPACK option, Amesos
  // makes a dense version of the sparse matrix and uses LAPACK to
  // compute the factorization.  The other options are true sparse
  // direct factorizations.)
  const int numSolverNames = 9;
  const char* solverNames[9] = {
    "Klu", "Umfpack", "Superlu", "Superludist", "Mumps",
    "Paradiso", "Taucs", "CSparse", "Lapack"
  };
  for (int k = 0; k < numSolverNames; ++k) {
    const char* const solverName = solverNames[k];
    if (amesosFactory.Query (solverName)) {
      AmesosSolver = rcp (amesosFactory.Create (solverName, AmesosProblem));
      if (MyPID == 0) {
        cout << "Amesos solver: \"" << solverName << "\"" << endl;
      }
    }
  }
  if (AmesosSolver.is_null ()) {
    throw std::runtime_error ("Amesos appears not to have any solvers enabled.");
  }

  // The AmesosGenOp class assumes that the symbolic and numeric
  // factorizations have already been performed on the linear problem.
  AmesosSolver->SymbolicFactorization ();
  AmesosSolver->NumericFactorization ();

  //
  // Set parameters for the block Krylov-Schur eigensolver
  //

  double tol = 1.0e-8; // convergence tolerance
  int nev = 10; // number of eigenvalues for which to solve
  int blockSize = 3; // block size (number of eigenvectors processed at once)
  int numBlocks = 3 * nev / blockSize; // restart length
  int maxRestarts = 5; // maximum number of restart cycles

  // We're looking for the largest-magnitude eigenvalues of the
  // _inverse_ operator, thus, the smallest-magnitude eigenvalues of
  // the original operator.
  std::string which = "LM";
  int verbosity = Anasazi::Errors + Anasazi::Warnings + Anasazi::FinalSummary;

  // Create ParameterList to pass into eigensolver
  Teuchos::ParameterList MyPL;
  MyPL.set ("Verbosity", verbosity);
  MyPL.set ("Which", which);
  MyPL.set ("Block Size", blockSize);
  MyPL.set ("Num Blocks", numBlocks);
  MyPL.set ("Maximum Restarts", maxRestarts);
  MyPL.set ("Convergence Tolerance", tol);

  // Create an initial set of vectors to start the eigensolver.  Note:
  // This needs to have the same number of columns as the block size.
  RCP<MV> ivec = rcp (new MV (K->Map (), blockSize));
  ivec->Random ();

  // Create the Epetra_Operator for the spectral transformation using
  // the Amesos direct solver.
  //
  // The AmesosGenOp object is the operator we give to Anasazi.  Thus,
  // Anasazi just sees an operator that computes y = K^{-1} M x.  The
  // matrix K got absorbed into AmesosProblem (the
  // Epetra_LinearProblem object).  Later, when we set up the Anasazi
  // eigensolver, we will need to tell it about M, so that it can
  // orthogonalize basis vectors with respect to the inner product
  // defined by M (since it is symmetric positive definite).
  RCP<AmesosGenOp> Aop = rcp (new AmesosGenOp (AmesosSolver, M));

  // Create the eigenproblem.  This object holds all the stuff about
  // your problem that Anasazi will see.
  //
  // Anasazi only needs M so that it can orthogonalize basis vectors
  // with respect to the M inner product.  Wouldn't it be nice if
  // Anasazi didn't require M in two different places?  Alas, this is
  // not currently the case.
  RCP<Anasazi::BasicEigenproblem<double,MV,OP> > MyProblem =
    rcp (new Anasazi::BasicEigenproblem<double,MV,OP> (Aop, M, ivec));

  // Tell the eigenproblem that the matrix pencil (K,M) is symmetric.
  MyProblem->setHermitian (true);

  // Set the number of eigenvalues requested
  MyProblem->setNEV (nev);

  // Tell the eigenproblem that you are finished passing it information.
  const bool boolret = MyProblem->setProblem ();
  if (boolret != true) {
    if (MyPID == 0) {
      cerr << "Anasazi::BasicEigenproblem::setProblem() returned with error." << endl;
    }
#ifdef EPETRA_MPI
    MPI_Finalize ();
#endif // EPETRA_MPI
    return -1;
  }

  // Create the Block Krylov-Schur eigensolver.
  Anasazi::BlockKrylovSchurSolMgr<double, MV, OP> MySolverMgr (MyProblem, MyPL);

  // Solve the eigenvalue problem.
  //
  // Note that creating the eigensolver is separate from solving it.
  // After creating the eigensolver, you may call solve() multiple
  // times with different parameters or initial vectors.  This lets
  // you reuse intermediate state, like allocated basis vectors.
  Anasazi::ReturnType returnCode = MySolverMgr.solve ();
  if (returnCode != Anasazi::Converged && MyPID == 0) {
    cout << "Anasazi eigensolver did not converge." << endl;
  }

  // Get the eigenvalues and eigenvectors from the eigenproblem.
  Anasazi::Eigensolution<double,MV> sol = MyProblem->getSolution ();
  // Anasazi returns eigenvalues as Anasazi::Value, so that if
  // Anasazi's Scalar type is real-valued (as it is in this case), but
  // some eigenvalues are complex, you can still access the
  // eigenvalues correctly.  In this case, there are no complex
  // eigenvalues, since the matrix pencil is symmetric.
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  RCP<MV> evecs = sol.Evecs;
  int numev = sol.numVecs;

  if (numev > 0) {
    // Reconstruct the eigenvalues.  The ones that Anasazi gave back
    // are the inverses of the original eigenvalues.  Reconstruct the
    // eigenvectors too.
    MV tempvec (K->Map (), MVT::GetNumberVecs (*evecs));
    K->Apply (*evecs, tempvec);
    Teuchos::SerialDenseMatrix<int,double> dmatr (numev, numev);
    MVT::MvTransMv (1.0, tempvec, *evecs, dmatr);

    if (MyPID == 0) {
      double compeval = 0.0;
      cout.setf (std::ios_base::right, std::ios_base::adjustfield);
      cout << "Actual Eigenvalues (obtained by Rayleigh quotient) : " << endl;
      cout << "------------------------------------------------------" << endl;
      cout << std::setw(16) << "Real Part"
           << std::setw(16) << "Rayleigh Error" << endl;
      cout << "------------------------------------------------------" << endl;
      for (int i = 0; i < numev; ++i) {
        compeval = dmatr(i,i);
        cout << std::setw(16) << compeval
             << std::setw(16)
             << std::fabs (compeval - 1.0/evals[i].realpart)
             << endl;
      }
      cout << "------------------------------------------------------" << endl;
    }
  }

#ifdef EPETRA_MPI
  MPI_Finalize ();
#endif // EPETRA_MPI

  return 0;
}
int main (int argc, char *argv[])
{
  // Initialize MPI
  Teuchos::GlobalMPISession (&argc, &argv, NULL);

  // Create output stream. (Handy for multicore output.)
  const RCP<Teuchos::FancyOStream> out =
    Teuchos::VerboseObjectBase::getDefaultOStream();

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  RCP<Epetra_MpiComm> eComm =
    rcp<Epetra_MpiComm> (new Epetra_MpiComm (MPI_COMM_WORLD));
#else
  RCP<Epetra_SerialComm> eComm =
    rcp<Epetra_SerialComm> (new Epetra_SerialComm());
#endif

  bool success = true;
  try {
    // Create map.
    // Do strong scaling tests, so keep numGlobalElements independent of
    // the number of processes.
    int numGlobalElements = 5e7;
    int indexBase = 0;
    RCP<Epetra_Map> map =
      rcp(new Epetra_Map (numGlobalElements, indexBase, *eComm));

    //// Create map with overlay.
    //int numMyOverlapNodes = 3;

    //// Get an approximation of my nodes.
    //int numMyElements = numGlobalElements / eComm->NumProc();
    //int startIndex = eComm->MyPID() * numMyElements;
    //// Calculate the resulting number of total nodes.
    //int numTotalNodes = numMyElements * eComm->NumProc();
    //// Add one node to the first numGlobalElements-numTotalNodes processes.
    //if (eComm->MyPID() < numGlobalElements - numTotalNodes)
    //{
    //    numMyElements++;
    //    startIndex += eComm->MyPID();
    //}
    //else
    //{
    //    startIndex += numGlobalElements - numTotalNodes;
    //}

    //Teuchos::Array<int> indices(numMyElements);
    //for (int k = 0;  k<numMyElements; k++)
    //    indices[k] = startIndex + k;

    //std::cout << numGlobalElements << std::endl;
    //std::cout << numMyElements << std::endl;

    //RCP<Epetra_Map> overlapMap =
    //    rcp(new Epetra_Map (numGlobalElements, numMyElements, indices.getRawPtr(), indexBase, *eComm));

    //overlapMap->Print(std::cout);

    //throw 1;
    // tests on one vector
    RCP<Epetra_Vector> u = rcp(new Epetra_Vector(*map));
    u->Random();

    RCP<Teuchos::Time> meanValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MeanValue");
    {
      Teuchos::TimeMonitor tm(*meanValueTime);
      double meanVal;
      TEUCHOS_ASSERT_EQUALITY(0, u->MeanValue(&meanVal));
    }

    RCP<Teuchos::Time> maxValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MaxValue");
    {
      Teuchos::TimeMonitor tm(*maxValueTime);
      double maxValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MaxValue(&maxValue));
    }

    RCP<Teuchos::Time> minValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MinValue");
    {
      Teuchos::TimeMonitor tm(*minValueTime);
      double minValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MinValue(&minValue));
    }

    RCP<Teuchos::Time> norm1Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm1");
    {
      Teuchos::TimeMonitor tm(*norm1Time);
      double norm1;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm1(&norm1));
    }

    RCP<Teuchos::Time> norm2Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm2");
    {
      Teuchos::TimeMonitor tm(*norm2Time);
      double norm2;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm2(&norm2));
    }

    RCP<Teuchos::Time> normInfTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::NormInf");
    {
      Teuchos::TimeMonitor tm(*normInfTime);
      double normInf;
      TEUCHOS_ASSERT_EQUALITY(0, u->NormInf(&normInf));
    }

    RCP<Teuchos::Time> scaleTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Scale");
    {
      Teuchos::TimeMonitor tm(*scaleTime);
      double alpha = 0.5;
      TEUCHOS_ASSERT_EQUALITY(0, u->Scale(0.5));
    }
    // tests involving two vectors
    RCP<Epetra_Vector> v = rcp(new Epetra_Vector(*map));
    v->Random();

    RCP<Teuchos::Time> dotTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Dot");
    {
      Teuchos::TimeMonitor tm(*dotTime);
      double dot;
      TEUCHOS_ASSERT_EQUALITY(0, u->Dot(*v, &dot));
    }

    RCP<Teuchos::Time> multiplyTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Multiply");
    {
      Teuchos::TimeMonitor tm(*multiplyTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Multiply(1.0, *u, *v, 1.0));
    }

    RCP<Teuchos::Time> updateTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Update");
    {
      Teuchos::TimeMonitor tm(*updateTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Update(1.0, *v, 1.0));
    }

    // matrix-vector tests
    // diagonal test matrix
    RCP<Epetra_CrsMatrix> D =
      rcp(new Epetra_CrsMatrix(Copy, *map, 1));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int col = map->GID(k);
      double val = 1.0 / (col+1);
      //TEUCHOS_ASSERT_EQUALITY(0, D->InsertMyValues(k, 1, &val, &col));
      TEUCHOS_ASSERT_EQUALITY(0, D->InsertGlobalValues(col, 1, &val, &col));
    }
    TEUCHOS_ASSERT_EQUALITY(0, D->FillComplete());

    // tridiagonal test matrix
    RCP<Epetra_CrsMatrix> T =
      rcp(new Epetra_CrsMatrix(Copy, *map, 3));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int row = map->GID(k);
      if (row > 0) {
        int col = row-1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      {
        int col = row;
        double val = 2.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      if (row < numGlobalElements-1) {
        int col = row+1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
    }
    TEUCHOS_ASSERT_EQUALITY(0, T->FillComplete());

    // start timings
    RCP<Teuchos::Time> mNorm1Time =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Norm1");
    {
      Teuchos::TimeMonitor tm(*mNorm1Time);
      double dNorm1 = D->NormOne();
      double tNorm1 = T->NormOne();
    }

    RCP<Teuchos::Time> mNormInfTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormInf");
    {
      Teuchos::TimeMonitor tm(*mNormInfTime);
      double dNormInf = D->NormInf();
      double tNormInf = T->NormInf();
    }

    RCP<Teuchos::Time> mNormFrobTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormFrobenius");
    {
      Teuchos::TimeMonitor tm(*mNormFrobTime);
      double dNormFrob = D->NormFrobenius();
      double tNormFrob = T->NormFrobenius();
    }

    RCP<Teuchos::Time> mScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Scale");
    {
      Teuchos::TimeMonitor tm(*mScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Scale(2.0));
      TEUCHOS_ASSERT_EQUALITY(0, T->Scale(2.0));
    }

    RCP<Teuchos::Time> leftScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::LeftScale");
    {
      Teuchos::TimeMonitor tm(*leftScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->LeftScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->LeftScale(*v));
    }

    RCP<Teuchos::Time> rightScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::RightScale");
    {
      Teuchos::TimeMonitor tm(*rightScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->RightScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->RightScale(*v));
    }

    RCP<Teuchos::Time> applyTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Apply");
    {
      Teuchos::TimeMonitor tm(*applyTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Apply(*u, *v));
      TEUCHOS_ASSERT_EQUALITY(0, T->Apply(*u, *v));
    }
    // print timing data
    Teuchos::TimeMonitor::summarize();
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, *out, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using namespace MueLuTests;
  using namespace Teuchos;

  typedef Xpetra::StridedMap<int,int>        StridedMap;
  typedef Xpetra::StridedMapFactory<int,int> StridedMapFactory;

  oblackholestream blackhole;
  GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Comm<int> > comm = DefaultComm<int>::getComm();
    RCP<FancyOStream> out = fancyOStream(rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Time myTime("global");
    TimeMonitor MM(myTime);

#ifndef HAVE_XPETRA_INT_LONG_LONG
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

    // read in input parameters

    // default parameters
    LO BS_nSweeps = 100;
    Scalar BS_omega = 1.7;
    LO SC_nSweeps = 1;
    Scalar SC_omega = 1.0;
    int SC_bUseDirectSolver = 0;

    // Note: use --help to list available options.
    CommandLineProcessor clp(false);
    clp.setOption("BraessSarazin_sweeps",&BS_nSweeps,"number of sweeps with BraessSarazin smoother");
    clp.setOption("BraessSarazin_omega", &BS_omega,  "scaling factor for BraessSarazin smoother");
    clp.setOption("SchurComp_sweeps",    &SC_nSweeps,"number of sweeps for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_omega",     &SC_omega,  "damping parameter for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_solver",    &SC_bUseDirectSolver,  "if 1: use direct solver for SchurComp equation, otherwise use GaussSeidel smoother (=default)");

    switch (clp.parse(argc,argv)) {
      case CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case CommandLineProcessor::PARSE_ERROR:
      case CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    int globalNumDofs = 1500;  // used for the maps
    //int nDofsPerNode = 3;      // used for generating the fine level null-space

    // build strided maps
    // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(2);
    stridingInfo.push_back(1);

    /////////////////////////////////////// build strided maps
    // build strided maps:
    // xstridedfullmap: full map (velocity and pressure dof gids), continous
    // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
    // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
    Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
    RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
    RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
    RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

    /////////////////////////////////////// transform Xpetra::Map objects to Epetra
    // this is needed for our splitting routine
    const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
    RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
    RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

    /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

    // read in problem
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    *out << "Reading matrix market file" << std::endl;

    EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
    EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);

    RCP<Epetra_CrsMatrix> epA = rcp(ptrA);
    RCP<Epetra_Vector> epv = rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = rcp(ptrNS);


    /////////////////////////////////////// split system into 2x2 block system

    *out << "Split matrix into 2x2 block matrix" << std::endl;

    // split fullA into A11,..., A22
    RCP<Epetra_CrsMatrix> A11;
    RCP<Epetra_CrsMatrix> A12;
    RCP<Epetra_CrsMatrix> A21;
    RCP<Epetra_CrsMatrix> A22;

    if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
      *out << "Problem with splitting matrix"<< std::endl;

    /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

    // build Xpetra objects from Epetra_CrsMatrix objects
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA11 = rcp(new Xpetra::EpetraCrsMatrix(A11));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA12 = rcp(new Xpetra::EpetraCrsMatrix(A12));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA21 = rcp(new Xpetra::EpetraCrsMatrix(A21));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA22 = rcp(new Xpetra::EpetraCrsMatrix(A22));

    /////////////////////////////////////// generate MapExtractor object

    std::vector<RCP<const Xpetra::Map<LO,GO,Node> > > xmaps;

    xmaps.push_back(xstridedvelmap);
    xmaps.push_back(xstridedpremap);

    RCP<const Xpetra::MapExtractor<Scalar,LO,GO,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LO,GO>::Build(xstridedfullmap,xmaps);

    /////////////////////////////////////// build blocked transfer operator
    // using the map extractor
    RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO>(map_extractor,map_extractor,10));
    bOp->setMatrix(0,0,xA11);
    bOp->setMatrix(0,1,xA12);
    bOp->setMatrix(1,0,xA21);
    bOp->setMatrix(1,1,xA22);

    bOp->fillComplete();
    //////////////////////////////////////////////////////// finest Level
    RCP<MueLu::Level> Finest = rcp(new Level());
    Finest->setDefaultVerbLevel(VERB_NONE);
    Finest->Set("A",rcp_dynamic_cast<Matrix>(bOp));

    ///////////////////////////////////
    // Test Braess Sarazin Smoother as a solver

    *out << "Test: Creating Braess Sarazin Smoother" << std::endl;
    *out << "Test: Omega for BraessSarazin = " << BS_omega << std::endl;
    *out << "Test: Number of sweeps for BraessSarazin = " << BS_nSweeps << std::endl;
    *out << "Test: Omega for Schur Complement solver= " << SC_omega << std::endl;
    *out << "Test: Number of Schur Complement solver= " << SC_nSweeps << std::endl;
    *out << "Test: Setting up Braess Sarazin Smoother" << std::endl;

    // define BraessSarazin Smoother with BS_nSweeps and BS_omega as scaling factor
    // AFact_ = null (= default) for the 2x2 blocked operator
    RCP<BraessSarazinSmoother> BraessSarazinSm = rcp( new BraessSarazinSmoother() );
    BraessSarazinSm->SetParameter("Sweeps", Teuchos::ParameterEntry(BS_nSweeps));
    BraessSarazinSm->SetParameter("Damping factor", Teuchos::ParameterEntry(BS_omega));

    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(BraessSarazinSm) );

    /*note that omega must be the same in the SchurComplementFactory and in the BraessSarazinSmoother*/
    // define SchurComplement Factory
    // SchurComp gets a RCP to AFact_ which has to be the 2x2 blocked operator
    // and the scaling/damping factor omega that is used for BraessSarazin
    // It stores the resulting SchurComplement operator as "A" generated by the SchurComplementFactory
    // Instead of F^{-1} it uses the approximation \hat{F}^{-1} with \hat{F} = diag(F)
    RCP<SchurComplementFactory> SFact = rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", ParameterEntry(BS_omega));
    SFact->SetFactory("A",MueLu::NoFactory::getRCP());

    // define smoother/solver for BraessSarazin
    RCP<SmootherPrototype> smoProtoSC = null;
    if(SC_bUseDirectSolver != 1) {
      //Smoother Factory, using SFact as a factory for A
      std::string ifpackSCType;
      ParameterList ifpackSCList;
      ifpackSCList.set("relaxation: sweeps", SC_nSweeps );
      ifpackSCList.set("relaxation: damping factor", SC_omega );
      ifpackSCType = "RELAXATION";
      ifpackSCList.set("relaxation: type", "Gauss-Seidel");
      smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
      smoProtoSC->SetFactory("A", SFact);
    }
    else {
      ParameterList ifpackDSList;
      std::string ifpackDSType;
      smoProtoSC     = rcp( new DirectSolver(ifpackDSType,ifpackDSList) ); smoProtoSC->SetFactory("A", SFact);
    }

    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    // define temporary FactoryManager that is used as input for BraessSarazin smoother
    RCP<FactoryManager> MB = rcp(new FactoryManager());
    MB->SetFactory("A",                 SFact);         // SchurComplement operator for correction step (defined as "A")
    MB->SetFactory("Smoother",          SmooSCFact);    // solver/smoother for correction step
    MB->SetFactory("PreSmoother",               SmooSCFact);
    MB->SetFactory("PostSmoother",              SmooSCFact);
    MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
    BraessSarazinSm->AddFactoryManager(MB,0);  // set temporary factory manager in BraessSarazin smoother

    // setup main factory manager
    RCP<FactoryManager> M = rcp(new FactoryManager());
    M->SetFactory("A",               MueLu::NoFactory::getRCP()); // this is the 2x2 blocked operator
    M->SetFactory("Smoother",        smootherFact);               // BraessSarazin block smoother
    M->SetFactory("PreSmoother",     smootherFact);
    M->SetFactory("PostSmoother",    smootherFact);

    MueLu::SetFactoryManager SFMCoarse(Finest, M);
    Finest->Request(MueLu::TopSmootherFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>(M, "Smoother"));

    // call setup (= extract blocks and extract diagonal of F)
    BraessSarazinSm->Setup(*Finest);

    RCP<MultiVector> xtest = MultiVectorFactory::Build(xstridedfullmap,1);
    xtest->putScalar( (SC) 0.0);

    RCP<Vector> xR = rcp(new Xpetra::EpetraVector(epv));
    // calculate initial (absolute) residual
    Array<ScalarTraits<SC>::magnitudeType> norms(1);

    xR->norm2(norms);
    *out << "Test: ||x_0|| = " << norms[0] << std::endl;
    *out << "Test: Applying Braess-Sarazin Smoother" << std::endl;
    *out << "Test: START DATA" << std::endl;
    *out << "iterations\tVelocity_residual\tPressure_residual" << std::endl;
    BraessSarazinSm->Apply(*xtest,*xR);
    xtest->norm2(norms);
    *out << "Test: ||x_1|| = " << norms[0] << std::endl;

    Array<ScalarTraits<double>::magnitudeType> test = MueLu::Utils<double, int, int>::ResidualNorm(*bOp, *xtest, *xR);
    *out << "residual norm: " << test[0] << std::endl;

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif
  // My MPI process rank.                                                                                                                                                                  
  const int MyPID = Comm.MyPID();

  // "/Users/sakashitatatsuya/Downloads/barista_trunk_slepc/sample/hamiltonian_matrix.ip"
  std::ifstream  ifs(argv[1]);
  alps::Parameters params(ifs);
  Teuchos::oblackholestream blackHole;
  std::ostream& out = (MyPID == 0) ? std::cout : blackHole;

  barista::Hamiltonian<> hamiltonian(params);
  matrix_type matrix(hamiltonian.dimension(), hamiltonian.dimension());
  hamiltonian.fill<double>(matrix);
  int m,n;
  int N;
  m = n = N = hamiltonian.dimension();
  //std::cout << matrix << std::endl;

  std::ofstream ofs;
  if (MyPID==0) {
    ofs.open("anasazi_time.txt");
    if (!ofs) {
#ifdef HAVE_MPI
      MPI_Finalize() ;
#endif
      return -1;
    }
  }

  //Teuchos::ParameterList GaleriList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  typedef Teuchos::ScalarTraits<double> STS;

  const double one = STS::one();
  const double zero = STS::zero();

  // The problem is defined on a 2D grid, global size is nx * nx.
  //int nx = N; 
  //GaleriList.set("n", nx * nx);
  //GaleriList.set("nx", nx);
  //GaleriList.set("ny", nx);
  //Teuchos::RCP<Epetra_Map> Map = Teuchos::rcp( Galeri::CreateMap("Linear", Comm, GaleriList) );
  //Teuchos::RCP<Epetra_RowMatrix> A = Teuchos::rcp( Galeri::CreateCrsMatrix("Laplace2D", &*Map, GaleriList) );

  // Construct a Map that puts approximately the same number of rows
  // of the matrix A on each processor.
  Epetra_Map RowMap (N, 0, Comm);
  Epetra_Map ColMap (N, 0, Comm);
  // Get update list and number of local equations from newly created Map.
  const int NumMyRowElements = RowMap.NumMyElements ();
  std::vector<int> MyGlobalRowElements (NumMyRowElements);
  RowMap.MyGlobalElements (&MyGlobalRowElements[0]);


  // Create an Epetra_CrsMatrix using the given row map.                                                                                                                                   
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Copy, RowMap, n));

  // We use info to catch any errors that may have happened during                                                                                                                           // matrix assembly, and report them globally.  We do this so that                                                                                                                          // the MPI processes won't call FillComplete() unless they all                                                                                                                             // successfully filled their parts of the matrix.                                                                                                                                         
  int info = 0;
  try {
    //                                                                                                                                                                                     
    // Compute coefficients for the discrete integral operator.                                                                                                                           
    //                                                                                                                                                                                      
    std::vector<double> Values (n);
    std::vector<int> Indices (n);
    //const double inv_mp1 = one / (m+1);
    //const double inv_np1 = one / (n+1);
    int count;
    //for (int i = 0; i < n; ++i) {
    //  Indices[i] = i;
    //}
    for (int i = 0; i < NumMyRowElements; ++i) {
      count =0;
      for (int j = 0; j < n; ++j) {
	if (matrix(MyGlobalRowElements[i],j)!=0) {
	  Values[count] = matrix(MyGlobalRowElements[i],j);
	  Indices[count] = j;
	  count++;
	}
      }

      info = A->InsertGlobalValues (MyGlobalRowElements[i], count,
                                    &Values[0], &Indices[0]);
      // Make sure that the insertion succeeded.  Teuchos'                                                                                                                                 
      // TEST_FOR_EXCEPTION macro gives a nice error message if the                                                                                                                        
      // thrown exception isn't caught.  We'll report this on the                                                                                                                          
      // offending MPI process.                                                                                                                                                             
      /*                                                                                                                     
      TEST_FOR_EXCEPTION( info != 0, std::runtime_error, "Failed to insert n="                                                                                                             
      << n << " global value" << (n != 1 ? "s" : "")                                                                                                                    
      << " in row " << MyGlobalRowElements[i]                                                                                                                           
      << " of the matrix." );                                                                                                                                           
      */
    } // for i = 0...                                                                                                                                                                       
    // Call FillComplete on the matrix.  Since the matrix isn't square,                                                                                                                    
    // we have to give FillComplete the domain and range maps, which in                                                                                                                    
    // this case are the column resp. row maps.                                                                                                                                             
    info = A->FillComplete (ColMap, RowMap);
    /*                                                                                                                                                                                     
    TEST_FOR_EXCEPTION( info != 0, std::runtime_error,                                                                                                                                     
    "FillComplete failed with INFO = " << info << ".");                                                                                                                 
    */
    info = A->OptimizeStorage();
    /*                                                                                                                                                                                     
    TEST_FOR_EXCEPTION( info != 0, std::runtime_error,                                                                                                                                
    "OptimizeStorage failed with INFO = " << info << ".");                                                                                                              
    */
  } catch (std::runtime_error& e) {
    // If multiple MPI processes are reporting errors, sometimes                                                                                                                           
    // forming the error message as a string and then writing it to                                                                                                                        
    // the output stream prevents messages from different processes                                                                                                                        
    // from being interleaved.                                                                                                                                                              
    std::ostringstream os;
    os << "*** Error on MPI process " << MyPID << ": " << e.what();
    cerr << os.str() << endl;
    if (info == 0)
      info = -1; // All procs will share info later on.                                                                                                                                     
  }

  //  Variables used for the Block Davidson Method
  const int    nev         = 5;
  const int    blockSize   = 5;
  const int    numBlocks   = 8;
  const int    maxRestarts = 500;
  const double tol         = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Anasazi::MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the solver.
  // Note:  This needs to have the same number of columns as the blocksize.
  //
  //Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(*Map, blockSize) );
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(ColMap, blockSize) );
  ivec->Random();

  // Create the eigenproblem.
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > problem =
    Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>(A, ivec) );

  // Inform the eigenproblem that the operator A is symmetric
  problem->setHermitian(true);

  // Set the number of eigenvalues requested
  problem->setNEV( nev );

  // Inform the eigenproblem that you are finishing passing it information
  bool boolret = problem->setProblem();
  if (boolret != true) {
    std::cout<<"Anasazi::BasicEigenproblem::setProblem() returned an error." << std::endl;
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList anasaziPL;
  anasaziPL.set( "Which", "LM" );
  anasaziPL.set( "Block Size", blockSize );
  anasaziPL.set( "Maximum Iterations", 500 );
  anasaziPL.set( "Convergence Tolerance", tol );
  anasaziPL.set( "Verbosity", Anasazi::Errors+Anasazi::Warnings+Anasazi::TimingDetails+Anasazi::FinalSummary );

  // Create the solver manager
  Anasazi::LOBPCGSolMgr<double, MV, OP> anasaziSolver(problem, anasaziPL);

  // Solve the problem
  double start, end;
  MPI_Barrier(MPI_COMM_WORLD);
  start = MPI_Wtime();
  Anasazi::ReturnType returnCode = anasaziSolver.solve();
  MPI_Barrier(MPI_COMM_WORLD);
  end = MPI_Wtime();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<double,MV> sol = problem->getSolution();
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR(sol.numVecs);
  Teuchos::SerialDenseMatrix<int,double> T(sol.numVecs, sol.numVecs);
  Epetra_MultiVector tempAevec( ColMap, sol.numVecs );
  T.putScalar(0.0); 
  for (int i=0; i<sol.numVecs; i++) {
    T(i,i) = evals[i].realpart;
  }
  A->Apply( *evecs, tempAevec );
  MVT::MvTimesMatAddMv( -1.0, *evecs, T, 1.0, tempAevec );
  MVT::MvNorm( tempAevec, normR );

  if (MyPID == 0) {
  // Print the results
  std::cout<<"Solver manager returned " << (returnCode == Anasazi::Converged ? "converged." : "unconverged.") << std::endl;
  std::cout<<std::endl;
  std::cout<<"------------------------------------------------------"<<std::endl;
  std::cout<<std::setw(16)<<"Eigenvalue"
           <<std::setw(18)<<"Direct Residual"
           <<std::endl;
  std::cout<<"------------------------------------------------------"<<std::endl;
  for (int i=0; i<sol.numVecs; i++) {
    std::cout<<std::setw(16)<<evals[i].realpart
             <<std::setw(18)<<normR[i]/evals[i].realpart
             <<std::endl;
  }
  std::cout<<"------------------------------------------------------"<<std::endl;
  }

  // Print out the map and matrices
  //ColMap.Print (out);

  //A->Print (cout);
  //RowMap.Print (cout);

  double time;
  int iter;
  if (MyPID==0) {
    iter = anasaziSolver.getNumIters();
    Teuchos::Array<Teuchos::RCP<Teuchos::Time> > timer = anasaziSolver.getTimers();
    Teuchos::RCP<Teuchos::Time> _timerSolve = timer[0];
    cout << "timerSolve=" << _timerSolve << endl;
    time = end - start;
    cout << "time=" << time << endl;
    ofs << "time=" << time << endl;
    cout << "iter=" << iter << endl;
    ofs << "iter=" << iter << endl;
  }

#ifdef HAVE_MPI
  MPI_Finalize() ; 
#endif

  return 0;
}
int main(int argc, char *argv[]) {
#if defined(HAVE_MUELU_EPETRA)
  typedef double Scalar;
  typedef int LocalOrdinal;
  typedef int GlobalOrdinal;
  typedef LocalOrdinal LO;
  typedef GlobalOrdinal GO;
  typedef Xpetra::EpetraNode Node;
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using namespace MueLuTests;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Teuchos::Time myTime("global");
    Teuchos::TimeMonitor MM(myTime);

    // read in input parameters

    // default parameters
    LO SIMPLE_nSweeps = 600;
    Scalar SIMPLE_omega = 0.5;
    LO SC_nSweeps = 10;
    Scalar SC_omega = 1.0;
    LO PRED_nSweeps = 3;
    Scalar PRED_omega = 1.0;
    LO useSIMPLEC = 0;

    int SC_bUseDirectSolver = 0;

    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);
    clp.setOption("SIMPLE_sweeps",&SIMPLE_nSweeps,"number of sweeps with SIMPLE smoother");
    clp.setOption("SIMPLE_omega", &SIMPLE_omega,  "scaling factor for SIMPLE smoother");
    clp.setOption("Predict_sweeps", &PRED_nSweeps,  "number of sweeps for SIMPLE internal velocity prediction smoother (GaussSeidel)");
    clp.setOption("Predict_omega", &PRED_omega,  "damping parameter for SIMPLE internal velocity prediction smoother (GaussSeidel)");
    clp.setOption("SchurComp_sweeps",    &SC_nSweeps,"number of sweeps for SIMPLE internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_omega",     &SC_omega,  "damping parameter for SIMPLE internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_solver",    &SC_bUseDirectSolver,  "if 1: use direct solver for SchurComp equation, otherwise use GaussSeidel smoother");
    clp.setOption("useSIMPLEC",    &useSIMPLEC,  "if 1: use SIMPLEC instead of SIMPLE (default = 0 (SIMPLE))");

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    int globalNumDofs = 1500;  // used for the maps

    // build strided maps
    // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(2);
    stridingInfo.push_back(1);

    /////////////////////////////////////// build strided maps
    // build strided maps:
    // xstridedfullmap: full map (velocity and pressure dof gids), continous
    // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
    // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
    Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
    RCP<const StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
    RCP<const StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
    RCP<const StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

    /////////////////////////////////////// transform Xpetra::Map objects to Epetra
    // this is needed for our splitting routine
    const RCP<const Epetra_Map> fullmap = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
    RCP<const Epetra_Map>       velmap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
    RCP<const Epetra_Map>       premap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

    /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

    // read in problem
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    *out << "Reading matrix market file" << std::endl;

    EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
    EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);

    RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
    RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


    /////////////////////////////////////// split system into 2x2 block system

    *out << "Split matrix into 2x2 block matrix" << std::endl;

    // split fullA into A11,..., A22
    Teuchos::RCP<Epetra_CrsMatrix> A11;
    Teuchos::RCP<Epetra_CrsMatrix> A12;
    Teuchos::RCP<Epetra_CrsMatrix> A21;
    Teuchos::RCP<Epetra_CrsMatrix> A22;

    if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
      *out << "Problem with splitting matrix"<< std::endl;

    /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

    // build Xpetra objects from Epetra_CrsMatrix objects
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GO,Node>(A11));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GO,Node>(A12));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GO,Node>(A21));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GO,Node>(A22));

    /////////////////////////////////////// generate MapExtractor object

    std::vector<Teuchos::RCP<const Xpetra::Map<LO,GO,Node> > > xmaps;

    xmaps.push_back(xstridedvelmap);
    xmaps.push_back(xstridedpremap);

    Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LO,GO,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LO,GO,Node>::Build(xstridedfullmap,xmaps);

    /////////////////////////////////////// build blocked transfer operator
    // using the map extractor
    Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node>(map_extractor,map_extractor,10));
    bOp->setMatrix(0,0,Teuchos::rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xA11)));
    bOp->setMatrix(0,1,Teuchos::rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xA12)));
    bOp->setMatrix(1,0,Teuchos::rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xA21)));
    bOp->setMatrix(1,1,Teuchos::rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xA22)));

    bOp->fillComplete();
    //////////////////////////////////////////////////////// finest Level
    RCP<MueLu::Level> Finest = rcp(new Level());
    Finest->setDefaultVerbLevel(Teuchos::VERB_NONE);
    Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));


    ///////////////////////////////////
    // Test Braess Sarazin Smoother as a solver

    *out << "Test: Creating SIMPLE Smoother" << std::endl;
    *out << "Test: Omega for SIMPLE = " << SIMPLE_omega << std::endl;
    *out << "Test: Number of sweeps for SIMPLE = " << SIMPLE_nSweeps << std::endl;
    *out << "Test: Omega for Schur Complement solver= " << SC_omega << std::endl;
    *out << "Test: Number of Schur Complement solver= " << SC_nSweeps << std::endl;
    *out << "Test: Setting up Braess Sarazin Smoother" << std::endl;

    // define SIMPLE Smoother with SIMPLE_nSweeps and SIMPLE_omega as scaling factor
    // AFact_ = Teuchos::null (= default) for the 2x2 blocked operator
    RCP<SimpleSmoother> SimpleSm = rcp( new SimpleSmoother() );
    SimpleSm->SetParameter("Sweeps", Teuchos::ParameterEntry(SIMPLE_nSweeps));
    SimpleSm->SetParameter("Damping factor", Teuchos::ParameterEntry(SIMPLE_omega));
    if(useSIMPLEC==1) SimpleSm->SetParameter("UseSIMPLEC", Teuchos::ParameterEntry(true));

    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(SimpleSm) );

    // define smoother for velocity prediction
    //RCP<SubBlockAFactory> A00Fact = Teuchos::rcp(new SubBlockAFactory(MueLu::NoFactory::getRCP(), 0, 0));
    RCP<SubBlockAFactory> A00Fact = rcp(new SubBlockAFactory());
    A00Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A00Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
    A00Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
    RCP<SmootherPrototype> smoProtoPredict = Teuchos::null;
    std::string ifpackPredictType;
    Teuchos::ParameterList ifpackPredictList;
    ifpackPredictList.set("relaxation: sweeps", PRED_nSweeps );
    ifpackPredictList.set("relaxation: damping factor", PRED_omega );
    ifpackPredictType = "RELAXATION";
    ifpackPredictList.set("relaxation: type", "Gauss-Seidel");
    smoProtoPredict = rcp( new TrilinosSmoother(ifpackPredictType, ifpackPredictList, 0) );
    smoProtoPredict->SetFactory("A", A00Fact);
    RCP<SmootherFactory> SmooPredictFact = rcp( new SmootherFactory(smoProtoPredict) );
    // define temporary FactoryManager that is used as input for BraessSarazin smoother
    RCP<FactoryManager> MPredict = rcp(new FactoryManager());
    MPredict->SetFactory("A",                 A00Fact);
    MPredict->SetFactory("Smoother",          SmooPredictFact);    // solver/smoother for correction step
    MPredict->SetFactory("PreSmoother",               SmooPredictFact);
    MPredict->SetFactory("PostSmoother",              SmooPredictFact);
    MPredict->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
    SimpleSm->SetVelocityPredictionFactoryManager(MPredict);    // set temporary factory manager in BraessSarazin smoother


    // define SchurComplement Factory
    // SchurComp gets a RCP to AFact_ which has to be the 2x2 blocked operator
    // It stores the resulting SchurComplement operator as "A" generated by the SchurComplementFactory
    // Instead of F^{-1} it uses the approximation \hat{F}^{-1} with \hat{F} = diag(F)
    RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", Teuchos::ParameterEntry(1.0)); // for Simple, omega is always 1.0 in the SchurComplement
    if(useSIMPLEC == 1) SFact->SetParameter("lumping", Teuchos::ParameterEntry(true));
    else                SFact->SetParameter("lumping", Teuchos::ParameterEntry(false));
    SFact->SetFactory("A",MueLu::NoFactory::getRCP());

    // define smoother/solver for BraessSarazin
    RCP<SmootherPrototype> smoProtoSC = Teuchos::null;
    if(SC_bUseDirectSolver != 1) {
      //Smoother Factory, using SFact as a factory for A
      std::string ifpackSCType;
      Teuchos::ParameterList ifpackSCList;
      ifpackSCList.set("relaxation: sweeps", SC_nSweeps );
      ifpackSCList.set("relaxation: damping factor", SC_omega );
      ifpackSCType = "RELAXATION";
      ifpackSCList.set("relaxation: type", "Gauss-Seidel");
      smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
      smoProtoSC->SetFactory("A",SFact);
    }
    else {
      Teuchos::ParameterList ifpackDSList;
      std::string ifpackDSType;
      smoProtoSC     = rcp( new DirectSolver(ifpackDSType,ifpackDSList) ); smoProtoSC->SetFactory("A", SFact);
    }

    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    // define temporary FactoryManager that is used as input for BraessSarazin smoother
    RCP<FactoryManager> MB = rcp(new FactoryManager());
    MB->SetFactory("A",                 SFact);         // SchurComplement operator for correction step (defined as "A")
    MB->SetFactory("Smoother",          SmooSCFact);    // solver/smoother for correction step
    MB->SetFactory("PreSmoother",               SmooSCFact);
    MB->SetFactory("PostSmoother",              SmooSCFact);
    MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
    SimpleSm->SetSchurCompFactoryManager(MB);    // set temporary factory manager in BraessSarazin smoother

    // setup main factory manager
    RCP<FactoryManager> M = rcp(new FactoryManager());
    M->SetFactory("A",               MueLu::NoFactory::getRCP()); // this is the 2x2 blocked operator
    M->SetFactory("Smoother",        smootherFact);               // BraessSarazin block smoother
    M->SetFactory("PreSmoother",     smootherFact);
    M->SetFactory("PostSmoother",    smootherFact);

    MueLu::SetFactoryManager SFMCoarse(Finest, M);
    Finest->Request(MueLu::TopSmootherFactory<Scalar,LO,GO,Node>(M, "Smoother"));

    // call setup (= extract blocks and extract diagonal of F)
    SimpleSm->Setup(*Finest);

    RCP<MultiVector> xtest = MultiVectorFactory::Build(xstridedfullmap,1);
    xtest->putScalar( (Scalar) 0.0);

    RCP<Vector> xR = Teuchos::rcp(new Xpetra::EpetraVectorT<int,Node>(epv));
    // calculate initial (absolute) residual
    Teuchos::Array<Teuchos::ScalarTraits<Scalar>::magnitudeType> norms(1);

    xR->norm2(norms);
    *out << "Test: ||x_0|| = " << norms[0] << std::endl;
    *out << "Test: Applying Simple Smoother" << std::endl;
    *out << "Test: START DATA" << std::endl;
    *out << "iterations\tVelocity_residual\tPressure_residual" << std::endl;
    SimpleSm->Apply(*xtest,*xR);
    xtest->norm2(norms);
    *out << "Test: ||x_1|| = " << norms[0] << std::endl;

    Teuchos::Array<Teuchos::ScalarTraits<Scalar>::magnitudeType> test = MueLu::Utilities<Scalar, LO, GO, Node>::ResidualNorm(*bOp, *xtest, *xR);
    *out << "residual norm: " << test[0] << std::endl;

    success = (test[0] < 1.0e-7);
    if (!success)
      *out << "no convergence" << std::endl;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
#else
  std::cout << "Epetra needs Serial node. Please recompile MueLu with the Serial node enabled." << std::endl;
  return EXIT_SUCCESS;
#endif // #if defined(HAVE_MUELU_SERIAL) && defined(HAVE_MUELU_EPETRA)
}