int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);

  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  int numProc = comm->getSize();
  int myRank  = comm->getRank();

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  ::Xpetra::Parameters xpetraParameters(clp);

  bool runHeavyTests = false;
  clp.setOption("heavytests", "noheavytests",  &runHeavyTests, "whether to exercise tests that take a long time to run");

  clp.recogniseAllOptions(true);
  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
  }

  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  // =========================================================================
  // Problem construction
  // =========================================================================
  ParameterList matrixParameters;
  matrixParameters.set("nx",         Teuchos::as<GO>(9999));
  matrixParameters.set("matrixType", "Laplace1D");
  RCP<Matrix>      A           = MueLuTests::TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(matrixParameters.get<GO>("nx"), lib);
  RCP<MultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", A->getRowMap(), matrixParameters);

  std::string outDir = "Output/";

  std::vector<std::string> dirList;
  if (runHeavyTests) {
    dirList.push_back("EasyParameterListInterpreter-heavy/");
    dirList.push_back("FactoryParameterListInterpreter-heavy/");
  } else {
    dirList.push_back("EasyParameterListInterpreter/");
    dirList.push_back("FactoryParameterListInterpreter/");
  }
#if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_AMESOS2_KLU2)
  // The ML interpreter have internal ifdef, which means that the resulting
  // output would depend on configuration (reguarl interpreter does not have
  // that). Therefore, we need to stabilize the configuration here.
  // In addition, we run ML parameter list tests only if KLU is available
  dirList.push_back("MLParameterListInterpreter/");
  dirList.push_back("MLParameterListInterpreter2/");
#endif
  int numLists = dirList.size();

  bool failed = false;
  Teuchos::Time timer("Interpreter timer");
  //double lastTime = timer.wallTime();
  for (int k = 0; k < numLists; k++) {
    Teuchos::ArrayRCP<std::string> fileList = MueLuTests::TestHelpers::GetFileList(dirList[k],
      (numProc == 1 ? std::string(".xml") : std::string("_np" + Teuchos::toString(numProc) + ".xml")));

    for (int i = 0; i < fileList.size(); i++) {
      // Set seed
      std::srand(12345);

      // Reset (potentially) cached value of the estimate
      A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());

      std::string xmlFile  = dirList[k] + fileList[i];
      std::string outFile  = outDir     + fileList[i];
      std::string baseFile = outFile.substr(0, outFile.find_last_of('.'));
      std::size_t found = baseFile.find("_np");
      if (numProc == 1 && found != std::string::npos) {
#ifdef HAVE_MPI
        baseFile = baseFile.substr(0, found);
#else
        std::cout << "Skipping \"" << xmlFile << "\" as MPI is not enabled" << std::endl;
        continue;
#endif
      }
      baseFile = baseFile + (lib == Xpetra::UseEpetra ? "_epetra" : "_tpetra");
      std::string goldFile = baseFile + ".gold";
      std::ifstream f(goldFile.c_str());
      if (!f.good()) {
        if (myRank == 0)
          std::cout << "Warning: comparison file " << goldFile << " not found.  Skipping test" << std::endl;
        continue;
      }

      std::filebuf    buffer;
      std::streambuf* oldbuffer = NULL;
      if (myRank == 0) {
        // Redirect output
        buffer.open((baseFile + ".out").c_str(), std::ios::out);
        oldbuffer = std::cout.rdbuf(&buffer);
      }

      // NOTE: we cannot use ParameterListInterpreter(xmlFile, comm), because we want to update the ParameterList
      // first to include "test" verbosity
      Teuchos::ParameterList paramList;
      Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *comm);
      if      (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/")
        paramList.set("verbosity", "test");
      else if (dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/")
        paramList.sublist("Hierarchy").set("verbosity", "Test");
      else if (dirList[k] == "MLParameterListInterpreter/")
        paramList.set("ML output",     42);
      else if (dirList[k] == "MLParameterListInterpreter2/")
        paramList.set("ML output",     10);

      try {
        timer.start();
        Teuchos::RCP<HierarchyManager> mueluFactory;

        // create parameter list interpreter
        // here we have to distinguish between the general MueLu parameter list interpreter
        // and the ML parameter list interpreter. Note that the ML paramter interpreter also
        // works with Tpetra matrices.
        if (dirList[k] == "EasyParameterListInterpreter/"         ||
            dirList[k] == "EasyParameterListInterpreter-heavy/"   ||
            dirList[k] == "FactoryParameterListInterpreter/"      ||
            dirList[k] == "FactoryParameterListInterpreter-heavy/") {
          mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList));

        } else if (dirList[k] == "MLParameterListInterpreter/") {
          mueluFactory = Teuchos::rcp(new MLParameterListInterpreter(paramList));

        } else if (dirList[k] == "MLParameterListInterpreter2/") {
          //std::cout << "ML ParameterList: " << std::endl;
          //std::cout << paramList << std::endl;
          RCP<ParameterList> mueluParamList = Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(paramList,"SA"));
          //std::cout << "MueLu ParameterList: " << std::endl;
          //std::cout << *mueluParamList << std::endl;
          mueluFactory = Teuchos::rcp(new ParameterListInterpreter(*mueluParamList));
        }

        RCP<Hierarchy> H = mueluFactory->CreateHierarchy();

        H->GetLevel(0)->template Set<RCP<Matrix> >("A", A);

        if (dirList[k] == "MLParameterListInterpreter/") {
          // MLParameterInterpreter needs the nullspace information if rebalancing is active!
          // add default constant null space vector
          RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1);
          nullspace->putScalar(1.0);
          H->GetLevel(0)->Set("Nullspace", nullspace);
        }

        H->GetLevel(0)->Set("Coordinates", coordinates);

        mueluFactory->SetupHierarchy(*H);

        if (strncmp(fileList[i].c_str(), "reuse", 5) == 0) {
          // Build the Hierarchy the second time
          // Should be faster if we actually do the reuse
          A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
          mueluFactory->SetupHierarchy(*H);
        }

        timer.stop();
      } catch (Teuchos::ExceptionBase& e) {
        std::string msg = e.what();
        msg = msg.substr(msg.find_last_of('\n')+1);

        if (myRank == 0) {
          std::cout << "Caught exception: " << msg << std::endl;

          // Redirect output back
          std::cout.rdbuf(oldbuffer);
          buffer.close();
        }

        if (msg == "Zoltan interface is not available" ||
            msg == "Zoltan2 interface is not available" ||
            msg == "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false.") {

          if (myRank == 0)
            std::cout << xmlFile << ": skipped (missing library)" << std::endl;

          continue;
        }
      }

      std::string cmd;
      if (myRank == 0) {
        // Redirect output back
        std::cout.rdbuf(oldbuffer);
        buffer.close();

        // Create a copy of outputs
        cmd = "cp -f ";
        system((cmd + baseFile + ".gold " + baseFile + ".gold_filtered").c_str());
        system((cmd + baseFile + ".out " + baseFile + ".out_filtered").c_str());

        // Tpetra produces different eigenvalues in Chebyshev due to using
        // std::rand() for generating random vectors, which may be initialized
        // using different seed, and may have different algorithm from one
        // gcc version to another, or to anogther compiler (like clang)
        // This leads to us always failing this test.
        // NOTE1 : Epetra, on the other hand, rolls out its out random number
        // generator, which always produces same results

        // Ignore the value of "lambdaMax"
        run_sed("'s/lambdaMax: [0-9]*.[0-9]*/lambdaMax = <ignored>/'", baseFile);

        // Ignore the value of "lambdaMin"
        run_sed("'s/lambdaMin: [0-9]*.[0-9]*/lambdaMin = <ignored>/'", baseFile);

        // Ignore the value of "chebyshev: max eigenvalue"
        // NOTE: we skip lines with default value ([default])
        run_sed("'/[default]/! s/chebyshev: max eigenvalue = [0-9]*.[0-9]*/chebyshev: max eigenvalue = <ignored>/'", baseFile);

        // Ignore the exact type of direct solver (it is selected semi-automatically
        // depending on how Trilinos was configured
        run_sed("'s/Amesos\\([2]*\\)Smoother{type = .*}/Amesos\\1Smoother{type = <ignored>}/'", baseFile);
        run_sed("'s/SuperLU solver interface, direct solve/<Direct> solver interface/'", baseFile);
        run_sed("'s/KLU2 solver interface/<Direct> solver interface/'", baseFile);
        run_sed("'s/Basker solver interface/<Direct> solver interface/'", baseFile);

        // Strip template args for some classes
        std::vector<std::string> classes;
        classes.push_back("Xpetra::Matrix");
        classes.push_back("MueLu::Constraint");
        classes.push_back("MueLu::SmootherPrototype");
        for (size_t q = 0; q < classes.size(); q++)
          run_sed("'s/" + classes[q] + "<.*>/" + classes[q] + "<ignored> >/'", baseFile);

#ifdef __APPLE__
        // Some Macs print outs ptrs as 0x0 instead of 0, fix that
        run_sed("'/RCP/ s/=0x0/=0/g'", baseFile);
#endif

        // Run comparison (ignoring whitespaces)
        cmd = "diff -u -w -I\"^\\s*$\" " + baseFile + ".gold_filtered " + baseFile + ".out_filtered";
        int ret = system(cmd.c_str());
        if (ret)
          failed = true;

        //std::ios_base::fmtflags ff(std::cout.flags());
        //std::cout.precision(2);
        //std::cout << xmlFile << " (" << std::setiosflags(std::ios::fixed)
        //          << timer.wallTime() - lastTime << " sec.) : " << (ret ? "failed" : "passed") << std::endl;
        //lastTime = timer.wallTime();
        //std::cout.flags(ff); // reset flags to whatever they were prior to printing time
        std::cout << xmlFile << " : " << (ret ? "failed" : "passed") << std::endl;
      }
    }
  }

  if (myRank == 0)
    std::cout << std::endl << "End Result: TEST " << (failed ? "FAILED" : "PASSED") << std::endl;

  return (failed ? EXIT_FAILURE : EXIT_SUCCESS);
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = true;
  bool verbose = true;
  try {
    Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);

    // Default is Laplace1D with nx = 8748.
    // It's a nice size for 1D and perfect aggregation. (6561=3^8)
    //Nice size for 1D and perfect aggregation on small numbers of processors. (8748=4*3^7)
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case
    Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

    // custom parameters
    int pauseForDebugger=0;
    //std::string aggOrdering = "natural";
    int minPerAgg=2; //was 3 in simple
    int maxNbrAlreadySelected=0;
    int printTimings=0;
    std::string xmlFile="parameters.xml";

    //clp.setOption("aggOrdering",&aggOrdering,"aggregation ordering strategy (natural,graph)");
    clp.setOption("debug",&pauseForDebugger,"pause to attach debugger");
    clp.setOption("maxNbrSel",&maxNbrAlreadySelected,"maximum # of nbrs allowed to be in other aggregates");
    clp.setOption("minPerAgg",&minPerAgg,"minimum #DOFs per aggregate");
    clp.setOption("timings",&printTimings,"print timings to screen");
    clp.setOption("xmlFile",&xmlFile,"file name containing MueLu multigrid parameters in XML format");

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    Teuchos::RCP<Teuchos::TimeMonitor> globalTimeMonitor = Teuchos::rcp (new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("Timings: Global Time")));

    if (pauseForDebugger) {
      Utilities::PauseForDebugger();
    }

    matrixParameters.check();
    xpetraParameters.check();
    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    if (comm->getRank() == 0) {
      std::cout << xpetraParameters << matrixParameters;
    }

    /**********************************************************************************/
    /* CREATE INITIAL MATRIX                                                          */
    /**********************************************************************************/
    Teuchos::RCP<const Map> map;
    Teuchos::RCP<Matrix> A;

    {
      Teuchos::TimeMonitor tm(*Teuchos::TimeMonitor::getNewTimer("Timings: Matrix Build"));

      map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);
      Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap
      A = Pr->BuildMatrix();

    }
    /**********************************************************************************/
    /*                                                                                */
    /**********************************************************************************/
    Teuchos::ParameterList paramList;
    Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *comm);

    // create parameter list interpreter
    Teuchos::RCP<HierarchyManager> mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList));

    Teuchos::RCP<Hierarchy> H = mueluFactory->CreateHierarchy();

    H->GetLevel(0)->Set< Teuchos::RCP<Matrix> >("A", A);

    Teuchos::RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1);
    nullspace->putScalar(1.0);
    H->GetLevel(0)->Set("Nullspace", nullspace);

    // set minimal information about number of layers for semicoarsening...
    // This information can also be provided as a user parameter in the xml file using the
    // parameter: "semicoarsen: num layers"
    H->GetLevel(0)->Set("NumZLayers",matrixParameters.GetParameterList().get<GO>("nz"));


    mueluFactory->SetupHierarchy(*H);

    for (int l=0; l<H->GetNumLevels(); l++) {
      Teuchos::RCP<MueLu::Level> level = H->GetLevel(l);
      if(level->IsAvailable("A", MueLu::NoFactory::get()) == false) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("P", MueLu::NoFactory::get()) == false && l>0) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("R", MueLu::NoFactory::get()) == false && l>0) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("PreSmoother",  MueLu::NoFactory::get()) == false) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("PostSmoother", MueLu::NoFactory::get()) == false && l<H->GetNumLevels()-1) { success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      if(level->IsAvailable("NumZLayers",   MueLu::NoFactory::get()) == true && l>0) {  success = false; H->GetLevel(l)->print(std::cout, MueLu::Debug);}
      H->GetLevel(l)->print(std::cout, MueLu::Debug);
    }
    ///////////////////////////////////////////////////////////

    // =========================================================================
    // System solution (Ax = b)
    // =========================================================================
    comm->barrier();
    typedef Teuchos::ScalarTraits<SC> STS;
    SC zero = STS::zero(), one = STS::one();

    Teuchos::RCP<Vector> X = VectorFactory::Build(A->getRowMap());
    Teuchos::RCP<Vector> B = VectorFactory::Build(A->getRowMap());

    {
      // we set seed for reproducibility
      Utilities::SetRandomSeed(*comm);
      X->randomize();
      A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

      Teuchos::Array<STS::magnitudeType> norms(1);
      B->norm2(norms);
      B->scale(one/norms[0]);
      X->putScalar(zero);
    }

    comm->barrier();

    H->IsPreconditioner(false);
    H->Iterate(*B, *X, 20);

    // Timer final summaries
    globalTimeMonitor = Teuchos::null; // stop this timer before summary

    if (printTimings)
      Teuchos::TimeMonitor::summarize();
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}