Exemple #1
0
  void generate_user_matrix_and_nullspace(std::string &matrixType,  Xpetra::UnderlyingLib & lib,Teuchos::ParameterList &galeriList,  RCP<const Teuchos::Comm<int> > &comm, RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > & A, RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > & nullspace){
    using Teuchos::RCP;

    RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    Teuchos::FancyOStream& out = *fancy;

    typedef typename Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> map_type;
    typedef typename Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> multivector_type;
    typedef typename Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node> matrixwrap_type;
    RCP<const map_type >   map;
    RCP<multivector_type > coordinates;
    if (matrixType == "Laplace1D") {
      map = Galeri::Xpetra::CreateMap<LocalOrdinal, GlobalOrdinal, Node>(lib, "Cartesian1D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<Scalar,LocalOrdinal,GlobalOrdinal,map_type,multivector_type>("1D", map, galeriList);

    } else if (matrixType == "Laplace2D" || matrixType == "Star2D" || matrixType == "BigStar2D" || matrixType == "Elasticity2D") {
      map = Galeri::Xpetra::CreateMap<LocalOrdinal, GlobalOrdinal, Node>(lib, "Cartesian2D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<Scalar,LocalOrdinal,GlobalOrdinal,map_type,multivector_type>("2D", map, galeriList);

    } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
      map = Galeri::Xpetra::CreateMap<LocalOrdinal, GlobalOrdinal, Node>(lib, "Cartesian3D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<Scalar,LocalOrdinal,GlobalOrdinal,map_type,multivector_type>("3D", map, galeriList);
    }

    // Expand map to do multiple DOF per node for block problems
    if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D")
      map = Xpetra::MapFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(map, (matrixType == "Elasticity2D" ? 2 : 3));

    out << "Processor subdomains in x direction: " << galeriList.get<GlobalOrdinal>("mx") << std::endl
        << "Processor subdomains in y direction: " << galeriList.get<GlobalOrdinal>("my") << std::endl
        << "Processor subdomains in z direction: " << galeriList.get<GlobalOrdinal>("mz") << std::endl
        << "========================================================" << std::endl;

    RCP<Galeri::Xpetra::Problem<map_type,matrixwrap_type,multivector_type> > Pr =
        Galeri::Xpetra::BuildProblem<Scalar,LocalOrdinal,GlobalOrdinal,map_type,matrixwrap_type,multivector_type>(matrixType, map, galeriList);

    A = Pr->BuildMatrix();

    if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") {
      nullspace = Pr->BuildNullspace();
      A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3);
    }

  }
Exemple #2
0
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP; // reference count pointers
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;
  using Teuchos::ParameterList;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  typedef Teuchos::ScalarTraits<SC> STS;
  SC zero = STS::zero(), one = STS::one();

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  Teuchos::CommandLineProcessor clp(false);

  GO nx = 100, ny = 100, nz = 100;
  Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case
  Xpetra::Parameters             xpetraParameters(clp);                          // manage parameters of Xpetra

  std::string xmlFileName       = "scalingTest.xml"; clp.setOption("xml",                   &xmlFileName,      "read parameters from a file [default = 'scalingTest.xml']");
  bool        printTimings      = true;              clp.setOption("timings", "notimings",  &printTimings,     "print timings to screen");
  int         writeMatricesOPT  = -2;                clp.setOption("write",                 &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)");
  std::string dsolveType        = "cg", solveType;   clp.setOption("solver",                &dsolveType,       "solve type: (none | cg | gmres | standalone)");
  double      dtol              = 1e-12, tol;        clp.setOption("tol",                   &dtol,             "solver convergence tolerance");

  std::string mapFile;                               clp.setOption("map",                   &mapFile,          "map data file");
  std::string matrixFile;                            clp.setOption("matrix",                &matrixFile,       "matrix data file");
  std::string coordFile;                             clp.setOption("coords",                &coordFile,        "coordinates data file");
  int         numRebuilds       = 0;                 clp.setOption("rebuild",               &numRebuilds,      "#times to rebuild hierarchy");
  int         maxIts            = 200;               clp.setOption("its",                   &maxIts,           "maximum number of solver iterations");
  bool        scaleResidualHistory = true;              clp.setOption("scale", "noscale",  &scaleResidualHistory, "scaled Krylov residual history");

  switch (clp.parse(argc, argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
  }

  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  ParameterList paramList;
  Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr<ParameterList>(&paramList), *comm);
  bool isDriver = paramList.isSublist("Run1");
  if (isDriver) {
    // update galeriParameters with the values from the XML file
    ParameterList& realParams = galeriParameters.GetParameterList();

    for (ParameterList::ConstIterator it = realParams.begin(); it != realParams.end(); it++) {
      const std::string& name = realParams.name(it);
      if (paramList.isParameter(name))
        realParams.setEntry(name, paramList.getEntry(name));
    }
  }

  // Retrieve matrix parameters (they may have been changed on the command line)
  // [for instance, if we changed matrix type from 2D to 3D we need to update nz]
  ParameterList galeriList = galeriParameters.GetParameterList();

  // =========================================================================
  // Problem construction
  // =========================================================================
  std::ostringstream galeriStream;
  comm->barrier();
  RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));
  RCP<TimeMonitor> tm                = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")));

  RCP<Matrix>      A;
  RCP<const Map>   map;
  RCP<MultiVector> coordinates;
  RCP<MultiVector> nullspace;
  if (matrixFile.empty()) {
    galeriStream << "========================================================\n" << xpetraParameters << galeriParameters;

    // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
    //                                 d1  d2  d3
    //                                 d4  d5  d6
    //                                 d7  d8  d9
    //                                 d10 d11 d12
    // A perfect distribution is only possible when the #processors is a perfect square.
    // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
    // size. For example, np=14 will give a 7-by-2 distribution.
    // If you don't want Galeri to do this, specify mx or my on the galeriList.
    std::string matrixType = galeriParameters.GetMatrixType();

    // Create map and coordinates
    // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it
    // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside
    if (matrixType == "Laplace1D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", map, galeriList);

    } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
               matrixType == "BigStar2D" || matrixType == "Elasticity2D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D", map, galeriList);

    } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D", map, galeriList);
    }

    // Expand map to do multiple DOF per node for block problems
    if (matrixType == "Elasticity2D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
    if (matrixType == "Elasticity3D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);

    galeriStream << "Processor subdomains in x direction: " << galeriList.get<int>("mx") << std::endl
                 << "Processor subdomains in y direction: " << galeriList.get<int>("my") << std::endl
                 << "Processor subdomains in z direction: " << galeriList.get<int>("mz") << std::endl
                 << "========================================================" << std::endl;

    if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") {
      // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet
      galeriList.set("right boundary" , "Neumann");
      galeriList.set("bottom boundary", "Neumann");
      galeriList.set("top boundary"   , "Neumann");
      galeriList.set("front boundary" , "Neumann");
      galeriList.set("back boundary"  , "Neumann");
    }

    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(galeriParameters.GetMatrixType(), map, galeriList);
    A = Pr->BuildMatrix();

    nullspace = MultiVectorFactory::Build(map, 1);
    if (matrixType == "Elasticity2D" ||
        matrixType == "Elasticity3D") {
      nullspace = Pr->BuildNullspace();
      A->SetFixedBlockSize((galeriParameters.GetMatrixType() == "Elasticity2D") ? 2 : 3);

    } else {
      nullspace->putScalar(one);
    }

  } else {
    if (!mapFile.empty())
      map = Utils2::ReadMap(mapFile, xpetraParameters.GetLib(), comm);
    comm->barrier();

    if (lib == Xpetra::UseEpetra) {
      A = Utils::Read(matrixFile, map);

    } else {
      // Tpetra matrix reader is still broken, so instead we read in
      // a matrix in a binary format and then redistribute it
      const bool binaryFormat = true;
      A = Utils::Read(matrixFile, lib, comm, binaryFormat);

      RCP<Matrix> newMatrix = MatrixFactory::Build(map, 1);
      RCP<Import> importer  = ImportFactory::Build(A->getRowMap(), map);
      newMatrix->doImport(*A, *importer, Xpetra::INSERT);
      newMatrix->fillComplete();

      A.swap(newMatrix);
    }

    comm->barrier();

    if (!coordFile.empty())
      coordinates = Utils2::ReadMultiVector(coordFile, map);

    nullspace = MultiVectorFactory::Build(map, 1);
    nullspace->putScalar(one);
  }

  comm->barrier();
  tm = Teuchos::null;

  galeriStream << "Galeri complete.\n========================================================" << std::endl;

  int numReruns = 1;
  if (paramList.isParameter("number of reruns"))
    numReruns = paramList.get<int>("number of reruns");

  const bool mustAlreadyExist = true;
  for (int rerunCount = 1; rerunCount <= numReruns; rerunCount++) {
    ParameterList mueluList, runList;

    bool stop = false;
    if (isDriver) {
      runList   = paramList.sublist("Run1",  mustAlreadyExist);
      mueluList = runList  .sublist("MueLu", mustAlreadyExist);
    } else {
      mueluList = paramList;
      stop = true;
    }

    int runCount = 1;
    do {
      A->SetMaxEigenvalueEstimate(-one);

      solveType = dsolveType;
      tol       = dtol;

      int   savedOut  = -1;
      FILE* openedOut = NULL;
      if (isDriver) {
        if (runList.isParameter("filename")) {
          // Redirect all output into a filename We have to redirect all output,
          // including printf's, therefore we cannot simply replace C++ cout
          // buffers, and have to use heavy machinary (dup2)
          std::string filename = runList.get<std::string>("filename");
          if (numReruns > 1)
            filename += "_run" + MueLu::toString(rerunCount);
          filename += (lib == Xpetra::UseEpetra ? ".epetra" : ".tpetra");

          savedOut  = dup(STDOUT_FILENO);
          openedOut = fopen(filename.c_str(), "w");
          dup2(fileno(openedOut), STDOUT_FILENO);
        }
        if (runList.isParameter("solver")) solveType = runList.get<std::string>("solver");
        if (runList.isParameter("tol"))    tol       = runList.get<double>     ("tol");
      }

      // Instead of checking each time for rank, create a rank 0 stream
      RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
      Teuchos::FancyOStream& fancyout = *fancy;
      fancyout.setOutputToRootOnly(0);

      fancyout << galeriStream.str();

      // =========================================================================
      // Preconditioner construction
      // =========================================================================
      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML")));

      RCP<HierarchyManager> mueLuFactory = rcp(new ParameterListInterpreter(mueluList));

      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup")));

      RCP<Hierarchy> H;
      for (int i = 0; i <= numRebuilds; i++) {
        A->SetMaxEigenvalueEstimate(-one);

        H = mueLuFactory->CreateHierarchy();
        H->GetLevel(0)->Set("A",           A);
        H->GetLevel(0)->Set("Nullspace",   nullspace);
        if (!coordinates.is_null())
          H->GetLevel(0)->Set("Coordinates", coordinates);
        mueLuFactory->SetupHierarchy(*H);
      }

      comm->barrier();
      tm = Teuchos::null;

      // =========================================================================
      // System solution (Ax = b)
      // =========================================================================
      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - LHS and RHS initialization")));

      RCP<Vector> X = VectorFactory::Build(map);
      RCP<Vector> B = VectorFactory::Build(map);

      {
        // we set seed for reproducibility
        Utils::SetRandomSeed(*comm);
        X->randomize();
        A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

        Teuchos::Array<STS::magnitudeType> norms(1);
        B->norm2(norms);
        B->scale(one/norms[0]);
        X->putScalar(zero);
      }
      tm = Teuchos::null;

      if (writeMatricesOPT > -2) {
        tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3.5 - Matrix output")));
        H->Write(writeMatricesOPT, writeMatricesOPT);
        tm = Teuchos::null;
      }

      comm->barrier();
      if (solveType == "none") {
        // Do not perform a solve

      } else if (solveType == "standalone") {
        tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve")));

        H->IsPreconditioner(false);
        H->Iterate(*B, *X, maxIts);

      } else if (solveType == "cg" || solveType == "gmres") {
#ifdef HAVE_MUELU_BELOS
        tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve")));

        // Operator and Multivector type that will be used with Belos
        typedef MultiVector          MV;
        typedef Belos::OperatorT<MV> OP;

        H->IsPreconditioner(true);

        // Define Operator and Preconditioner
        Teuchos::RCP<OP> belosOp   = Teuchos::rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(A)); // Turns a Xpetra::Matrix object into a Belos operator
        Teuchos::RCP<OP> belosPrec = Teuchos::rcp(new Belos::MueLuOp <SC, LO, GO, NO, LMO>(H)); // Turns a MueLu::Hierarchy object into a Belos operator

        // Construct a Belos LinearProblem object
        RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
        belosProblem->setRightPrec(belosPrec);

        bool set = belosProblem->setProblem();
        if (set == false) {
          fancyout << "\nERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
          return EXIT_FAILURE;
        }

        // Belos parameter list
        Teuchos::ParameterList belosList;
        belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
        belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
        belosList.set("Verbosity",             Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
        belosList.set("Output Frequency",      1);
        belosList.set("Output Style",          Belos::Brief);
        if (!scaleResidualHistory) 
          belosList.set("Implicit Residual Scaling", "None");

        // Create an iterative solver manager
        RCP< Belos::SolverManager<SC, MV, OP> > solver;
        if (solveType == "cg") {
          solver = rcp(new Belos::PseudoBlockCGSolMgr   <SC, MV, OP>(belosProblem, rcp(&belosList, false)));
        } else if (solveType == "gmres") {
          solver = rcp(new Belos::BlockGmresSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
        }

        // Perform solve
        Belos::ReturnType ret = Belos::Unconverged;
        try {
          ret = solver->solve();

          // Get the number of iterations for this solve.
          fancyout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;

        } catch(...) {
          fancyout << std::endl << "ERROR:  Belos threw an error! " << std::endl;
        }

        // Check convergence
        if (ret != Belos::Converged)
          fancyout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
        else
          fancyout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
#endif //ifdef HAVE_MUELU_BELOS
      } else {
        throw MueLu::Exceptions::RuntimeError("Unknown solver type: \"" + solveType + "\"");
      }
      comm->barrier();
      tm = Teuchos::null;
      globalTimeMonitor = Teuchos::null;

      if (printTimings)
        TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, false, true, false, Teuchos::Union);

      TimeMonitor::clearCounters();

      if (isDriver) {
        if (openedOut != NULL) {
          dup2(savedOut, STDOUT_FILENO);
          fclose(openedOut);
          openedOut = NULL;
        }
        try {
          runList   = paramList.sublist("Run" + MueLu::toString(++runCount), mustAlreadyExist);
          mueluList = runList  .sublist("MueLu", mustAlreadyExist);
        } catch (std::exception) {
          stop = true;
        }
      }

    } while (stop == false);
  }


  return 0;
} //main
Exemple #3
0
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP; // reference count pointers
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

  // Instead of checking each time for rank, create a rank 0 stream
  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  Teuchos::FancyOStream& fancyout = *fancy;
  fancyout.setOutputToRootOnly(0);


  // =========================================================================
  // Parameters initialization
  // =========================================================================
  Teuchos::CommandLineProcessor clp(false);

  GO nx = 100, ny = 100, nz = 100;
  Galeri::Xpetra::Parameters<GO> matrixParameters(clp, nx, ny, nz, "Laplace1D"); // manage parameters of the test case
  Xpetra::Parameters             xpetraParameters(clp);                          // manage parameters of Xpetra

  std::string xmlFileName = "tutorial1a.xml"; clp.setOption("xml",                   &xmlFileName,     "read parameters from a file. Otherwise, this example uses by default 'tutorial1a.xml'");
  int    amgAsPrecond      = 1;      clp.setOption("precond",               &amgAsPrecond,     "apply multigrid as preconditioner");
  int    amgAsSolver       = 0;      clp.setOption("fixPoint",              &amgAsSolver,      "apply multigrid as solver");
  bool   printTimings      = true;   clp.setOption("timings", "notimings",  &printTimings,     "print timings to screen");
  int    writeMatricesOPT  = -2;     clp.setOption("write",                 &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)");
  double tol               = 1e-12;  clp.setOption("tol",                   &tol,              "solver convergence tolerance");
  std::string krylovMethod = "cg";   clp.setOption("krylov",                &krylovMethod,     "outer Krylov method");

  std::string mapFile;               clp.setOption("map",                   &mapFile,          "map data file");
  std::string matrixFile;            clp.setOption("matrix",                &matrixFile,       "matrix data file");
  std::string coordFile;             clp.setOption("coords",                &coordFile,        "coordinates data file");

  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }

  // =========================================================================
  // Problem construction
  // =========================================================================
  RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))), tm;

  comm->barrier();
  tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")));

  RCP<Matrix>      A;
  RCP<MultiVector> coordinates;
  RCP<MultiVector> nullspace;
  if (matrixFile.empty()) {
    fancyout << "========================================================\n" << xpetraParameters << matrixParameters;

    // Retrieve matrix parameters (they may have been changed on the command line), and pass them to Galeri.
    // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
    //                                 d1  d2  d3
    //                                 d4  d5  d6
    //                                 d7  d8  d9
    //                                 d10 d11 d12
    // A perfect distribution is only possible when the #processors is a perfect square.
    // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
    // size. For example, np=14 will give a 7-by-2 distribution.
    // If you don't want Galeri to do this, specify mx or my on the galeriList.
    Teuchos::ParameterList pl = matrixParameters.GetParameterList();
    Teuchos::ParameterList galeriList;
    galeriList.set("nx", pl.get("nx",nx));
    galeriList.set("ny", pl.get("ny",ny));
    galeriList.set("nz", pl.get("nz",nz));
    // galeriList.set("mx", comm->getSize());
    // galeriList.set("my", 1);

    // Create map and coordinates
    // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it
    // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside
    RCP<const Map> map;
    if (matrixParameters.GetMatrixType() == "Laplace1D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D",map,matrixParameters.GetParameterList());
    }
    else if (matrixParameters.GetMatrixType() == "Laplace2D" || matrixParameters.GetMatrixType() == "Star2D" || matrixParameters.GetMatrixType() == "Elasticity2D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D",map,matrixParameters.GetParameterList());
    }
    else if (matrixParameters.GetMatrixType() == "Laplace3D" || matrixParameters.GetMatrixType() == "Elasticity3D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D",map,matrixParameters.GetParameterList());
    }
    // Expand map to do multiple DOF per node for block problems
    if (matrixParameters.GetMatrixType() == "Elasticity2D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
    if (matrixParameters.GetMatrixType() == "Elasticity3D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);

    if (comm->getRank() == 0) {
      GO mx = galeriList.get("mx", -1);
      GO my = galeriList.get("my", -1);
      GO mz = galeriList.get("mz", -1);
      fancyout << "Processor subdomains in x direction: " << mx << std::endl
          << "Processor subdomains in y direction: " << my << std::endl
          << "Processor subdomains in z direction: " << mz << std::endl
          << "========================================================" << std::endl;
    }

    Teuchos::ParameterList matrixParams = matrixParameters.GetParameterList();
    matrixParams.set("mx", galeriList.get("mx", -1));
    matrixParams.set("my", galeriList.get("my", -1));
    matrixParams.set("mz", galeriList.get("mz", -1));
    if (matrixParameters.GetMatrixType() == "Elasticity2D" || matrixParameters.GetMatrixType() == "Elasticity3D") {
      // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet
      matrixParams.set("right boundary" , "Neumann");
      matrixParams.set("bottom boundary", "Neumann");
      matrixParams.set("top boundary"   , "Neumann");
      matrixParams.set("front boundary" , "Neumann");
      matrixParams.set("back boundary"  , "Neumann");
    }

    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParams);
    A = Pr->BuildMatrix();

    nullspace = MultiVectorFactory::Build(map, 1);
    if (matrixParameters.GetMatrixType() == "Elasticity2D" ||
        matrixParameters.GetMatrixType() == "Elasticity3D") {
      nullspace = Pr->BuildNullspace();
      A->SetFixedBlockSize((matrixParameters.GetMatrixType() == "Elasticity2D") ? 2 : 3);

    } else {
      nullspace->putScalar(one);
    }
  } else {
    RCP<const Map> map = (mapFile.empty() ? Teuchos::null : Utils2::ReadMap(mapFile, xpetraParameters.GetLib(), comm));
    comm->barrier();

    A = Utils::Read(matrixFile, map);
    comm->barrier();

    coordinates = Utils2::ReadMultiVector(coordFile, map);

    nullspace = MultiVectorFactory::Build(map, 1);
    nullspace->putScalar(one);
  }
  RCP<const Map> map = A->getRowMap();

  comm->barrier();
  tm = Teuchos::null;

  fancyout << "Galeri complete.\n========================================================" << std::endl;

  // =========================================================================
  // Preconditioner construction
  // =========================================================================
  comm->barrier();
  tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML")));
  ParameterListInterpreter mueLuFactory(xmlFileName, *comm);

  comm->barrier();
  tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup")));

  RCP<Hierarchy> H = mueLuFactory.CreateHierarchy();

  H->GetLevel(0)->Set("A",           A);
  H->GetLevel(0)->Set("Nullspace",   nullspace);
  H->GetLevel(0)->Set("Coordinates", coordinates);

  mueLuFactory.SetupHierarchy(*H);

  comm->barrier();
  tm = Teuchos::null;

  // =========================================================================
  // System solution (Ax = b)
  // =========================================================================
  comm->barrier();
  tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - LHS and RHS initialization")));

  RCP<Vector> X = VectorFactory::Build(map);
  RCP<Vector> B = VectorFactory::Build(map);

  {
    // we set seed for reproducibility
    X->setSeed(846930886);
    X->randomize();
    A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

    Teuchos::Array<Teuchos::ScalarTraits<double>::magnitudeType> norms(1);
    B->norm2(norms);
    B->scale(1.0/norms[0]);
    X->putScalar(zero);
  }
  tm = Teuchos::null;

  if (writeMatricesOPT > -2)
    H->Write(writeMatricesOPT, writeMatricesOPT);

  comm->barrier();
  if (amgAsSolver) {
    tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve")));

    H->IsPreconditioner(false);
    H->Iterate(*B, 25, *X);

  } else if (amgAsPrecond) {
#ifdef HAVE_MUELU_BELOS
    tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve")));
    // Operator and Multivector type that will be used with Belos
    typedef MultiVector          MV;
    typedef Belos::OperatorT<MV> OP;
    H->IsPreconditioner(true);

    // Define Operator and Preconditioner
    Teuchos::RCP<OP> belosOp   = Teuchos::rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(A)); // Turns a Xpetra::Matrix object into a Belos operator
    Teuchos::RCP<OP> belosPrec = Teuchos::rcp(new Belos::MueLuOp<SC, LO, GO, NO, LMO>(H));  // Turns a MueLu::Hierarchy object into a Belos operator

    // Construct a Belos LinearProblem object
    RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
    belosProblem->setLeftPrec(belosPrec);

    bool set = belosProblem->setProblem();
    if (set == false) {
      fancyout << "\nERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return EXIT_FAILURE;
    }

    // Belos parameter list
    int maxIts = 2000;
    Teuchos::ParameterList belosList;
    belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
    belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
    belosList.set("Verbosity",             Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
    belosList.set("Output Frequency",      1);
    belosList.set("Output Style",          Belos::Brief);

    // Create an iterative solver manager
    RCP< Belos::SolverManager<SC, MV, OP> > solver;
    if (krylovMethod == "cg") {
      solver = rcp(new Belos::BlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
    } else if (krylovMethod == "gmres") {
      solver = rcp(new Belos::BlockGmresSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
    } else {
      TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::RuntimeError, "Invalid Krylov method.  Options are \"cg\" or \" gmres\".");
    }

    // Perform solve
    Belos::ReturnType ret = Belos::Unconverged;
    try {
      ret = solver->solve();

      // Get the number of iterations for this solve.
      fancyout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;

    } catch(...) {
      fancyout << std::endl << "ERROR:  Belos threw an error! " << std::endl;
    }

    // Check convergence
    if (ret != Belos::Converged)
      fancyout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
    else
      fancyout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
#endif //ifdef HAVE_MUELU_BELOS
  }
  comm->barrier();
  tm = Teuchos::null;
  globalTimeMonitor = Teuchos::null;

  if (printTimings) {
    TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, false, true, false, Teuchos::Union);
    //MueLu::MutuallyExclusiveTime<MueLu::BaseClass>::PrintParentChildPairs();
  }

  return 0;
} //main
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  typedef Teuchos::ScalarTraits<SC> STS;
  SC zero = STS::zero(), one = STS::one();

  bool success = false;
  bool verbose = true;
  try {
    RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    int numProc = comm->getSize();
    int myRank  = comm->getRank();

    RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    Teuchos::FancyOStream& out = *fancy;
    out.setOutputToRootOnly(0);

    // =========================================================================
    // Parameters initialization
    // =========================================================================
    Teuchos::CommandLineProcessor clp(false);
    Xpetra::Parameters xpetraParameters(clp);

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    const int numLists = 1;
    std::vector<std::string> dirList;
    dirList.push_back("Convergence/Laplace2D/");

    bool failed = false;
    for (int k = 0; k < numLists; k++) {
      const std::string& dirName = dirList[k];
      std::string problemFile = dirName + "problem.xml";

      ParameterList galeriParameters;
      Teuchos::updateParametersFromXmlFileAndBroadcast(problemFile, Teuchos::Ptr<Teuchos::ParameterList>(&galeriParameters), *comm);
      if (!galeriParameters.isParameter("mz"))
        galeriParameters.set<int>("mz", -1);

      // =========================================================================
      // Problem construction (copy-paste from Driver.cpp)
      // =========================================================================
      RCP<Matrix>       A;
      RCP<Map>          map;
      RCP<MultiVector>  nullspace, coordinates;

      // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
      //                                 d1  d2  d3
      //                                 d4  d5  d6
      //                                 d7  d8  d9
      //                                 d10 d11 d12
      // A perfect distribution is only possible when the #processors is a perfect square.
      // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
      // size. For example, np=14 will give a 7-by-2 distribution.
      // If you don't want Galeri to do this, specify mx or my on the galeriParameters.
      std::string matrixType = galeriParameters.get<std::string>("matrixType");

      // Create map and coordinates
      // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it
      // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside
      if (matrixType == "Laplace1D") {
        map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian1D", comm, galeriParameters);
        coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", map, galeriParameters);

      } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
                 matrixType == "BigStar2D" || matrixType == "Elasticity2D") {
        map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian2D", comm, galeriParameters);
        coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D", map, galeriParameters);

      } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
        map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian3D", comm, galeriParameters);
        coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D", map, galeriParameters);
      }

      // Expand map to do multiple DOF per node for block problems
      if (matrixType == "Elasticity2D")
        map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
      if (matrixType == "Elasticity3D")
        map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);

#if 0
      out << "========================================================\n" << xpetraParameters << galeriParameters;
      out << "Processor subdomains in x direction: " << galeriParameters.get<GO>("mx") << std::endl
          << "Processor subdomains in y direction: " << galeriParameters.get<GO>("my") << std::endl
          << "Processor subdomains in z direction: " << galeriParameters.get<GO>("mz") << std::endl
          << "========================================================" << std::endl;
#endif

      RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
          Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixType, map, galeriParameters);
      A = Pr->BuildMatrix();

      if (matrixType == "Elasticity2D" ||
          matrixType == "Elasticity3D") {
        nullspace = Pr->BuildNullspace();
        A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3);

      } else {
        nullspace = MultiVectorFactory::Build(map, 1);
        Teuchos::ArrayRCP<SC> nsData = nullspace->getDataNonConst(0);
        for (int i = 0; i < nsData.size(); i++)
          nsData[i] = one;
      }

      // =========================================================================
      // Run different configurations
      // =========================================================================
      Teuchos::ArrayRCP<std::string> fileList = MueLuTests::TestHelpers::GetFileList(dirList[k],
            (numProc == 1 ? std::string(".xml") : std::string("_np" + Teuchos::toString(numProc) + ".xml")));

      RCP<MultiVector> X = MultiVectorFactory::Build(map, 1);
      RCP<MultiVector> B = MultiVectorFactory::Build(map, 1);

      for (int i = 0; i < fileList.size(); i++) {
        if (fileList[i] == "problem.xml")
          continue;

        // Set seed
        Utilities::SetRandomSeed(*comm);

        // Reset (potentially) cached value of the estimate
        A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());

        std::string xmlFile = dirName + fileList[i];

        ParameterList paramList;
        Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *comm);

        std::string    solveType = paramList.get<std::string>   ("solver", "standalone");
        double         goldRate  = paramList.get<double>        ("convergence rate");
        ParameterList& mueluList = paramList.sublist            ("MueLu");

        TEUCHOS_TEST_FOR_EXCEPTION(solveType != "standalone" && solveType != "cg" && solveType != "gmres", MueLu::Exceptions::RuntimeError,
                                   "Unknown solver type \"" << solveType << "\"");
        bool           isPrec    = !(solveType == "standalone");

        if (!mueluList.isParameter("verbosity"))
          mueluList.set("verbosity", "none");

#ifndef HAVE_MUELU_BELOS
        if (isPrec)
          out << xmlFile << ": skipped (Belos is not enabled)" << std::endl;
#endif

        // =========================================================================
        // Preconditioner construction
        // =========================================================================
        RCP<Hierarchy> H;
        try {
          ParameterListInterpreter mueluFactory(mueluList);

          H = mueluFactory.CreateHierarchy();

          H->GetLevel(0)->Set("A",           A);
          H->GetLevel(0)->Set("Nullspace",   nullspace);
          H->GetLevel(0)->Set("Coordinates", coordinates);

          mueluFactory.SetupHierarchy(*H);

        } catch (Teuchos::ExceptionBase& e) {
          std::string msg = e.what();
          msg = msg.substr(msg.find_last_of('\n')+1);

          out << "Caught exception: " << msg << std::endl;

          if (msg == "Zoltan interface is not available" ||
              msg == "Zoltan2 interface is not available") {

            if (myRank == 0)
              out << xmlFile << ": skipped (missing library)" << std::endl;

            continue;
          }
        }

        // Set X, B
        {
          // TODO: do multiple vectors simultaneously to average

          // we set seed for reproducibility
          Utilities::SetRandomSeed(*comm);
          X->randomize();
          A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

          Teuchos::Array<STS::magnitudeType> norms(1);
          B->norm2(norms);
          B->scale(one/norms[0]);
          X->putScalar(zero);
        }

        const int    maxIts = 100;
        const double tol    = 1e-12;

        H->IsPreconditioner(isPrec);
        if (isPrec == false) {
          MueLu::ReturnType ret = H->Iterate(*B, *X, std::pair<LO,SC>(maxIts, tol));

          double rate = H->GetRate();

          if (abs(rate-goldRate) < 0.02) {
            out << xmlFile << ": passed (" <<
                (ret == MueLu::Converged ? "converged, " : "unconverged, ") <<
                "expected rate = " << goldRate << ", real rate = " << rate <<
                (ret == MueLu::Converged ? "" : " (after " + Teuchos::toString(maxIts) + " iterations)")
                << ")" << std::endl;
          } else {
            out << xmlFile << ": failed (" <<
                (ret == MueLu::Converged ? "converged, " : "unconverged, ") <<
                "expected rate = " << goldRate << ", real rate = " << rate <<
                (ret == MueLu::Converged ? "" : " (after " + Teuchos::toString(maxIts) + " iterations)")
                << ")" << std::endl;
            failed = true;
          }

        } else {
#ifdef HAVE_MUELU_BELOS
          // Operator and Multivector type that will be used with Belos
          typedef MultiVector          MV;
          typedef Belos::OperatorT<MV> OP;

          // Define Operator and Preconditioner
          RCP<OP> belosOp   = rcp(new Belos::XpetraOp<SC, LO, GO, NO>(A)); // Turns a Xpetra::Matrix object into a Belos operator
          RCP<OP> belosPrec = rcp(new Belos::MueLuOp <SC, LO, GO, NO>(H)); // Turns a MueLu::Hierarchy object into a Belos operator

          // Construct a Belos LinearProblem object
          RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
          belosProblem->setRightPrec(belosPrec);

          bool set = belosProblem->setProblem();
          if (set == false) {
            out << "\nERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
            return EXIT_FAILURE;
          }

          // Belos parameter list
          ParameterList belosList;
          belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
          belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
#if 1
          belosList.set("Verbosity",             Belos::Errors + Belos::Warnings);
#else
          belosList.set("Verbosity",             Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
          belosList.set("Output Frequency",      1);
          belosList.set("Output Style",          Belos::Brief);
#endif

          // Belos custom test to store residuals

          // Create an iterative solver manager
          RCP<Belos::SolverManager<SC, MV, OP> > solver;
          if (solveType == "cg")
            solver = rcp(new Belos::PseudoBlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
          else if (solveType == "gmres")
            solver = rcp(new Belos::BlockGmresSolMgr   <SC, MV, OP>(belosProblem, rcp(&belosList, false)));
          RCP<Belos::MyStatusTest<SC, MV, OP> > status = rcp(new Belos::MyStatusTest<SC, MV, OP>(tol));
          solver->setDebugStatusTest(status);

          // Perform solve
          Belos::ReturnType ret = Belos::Unconverged;
          try {
            ret = solver->solve();

            double rate = status->rate();

            if (abs(rate-goldRate) < 0.02) {
              out << xmlFile << ": passed (" <<
                  (ret == Belos::Converged ? "converged, " : "unconverged, ") <<
                  "expected rate = " << goldRate << ", real rate = " << rate <<
                  (ret == Belos::Converged ? "" : " (after " + Teuchos::toString(maxIts) + " iterations)")
                  << ")" << std::endl;
            } else {
              out << xmlFile << ": failed (" <<
                  (ret == Belos::Converged ? "converged, " : "unconverged, ") <<
                  "expected rate = " << goldRate << ", real rate = " << rate <<
                  (ret == Belos::Converged ? "" : " (after " + Teuchos::toString(maxIts) + " iterations)")
                  << ")" << std::endl;
              failed = true;
            }

          } catch(...) {
            out << xmlFile << ": failed (exception)" << std::endl;
            failed = true;
          }
#endif //ifdef HAVE_MUELU_BELOS
        }
      }
    }
    success = !failed;

    out << std::endl << "End Result: TEST " << (failed ? "FAILED" : "PASSED") << std::endl;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
void ConstructData(const std::string& matrixType, Teuchos::ParameterList& galeriList,
                   Xpetra::UnderlyingLib lib, Teuchos::RCP<const Teuchos::Comm<int> >& comm,
                   Teuchos::RCP<Xpetra::Matrix      <Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
                   Teuchos::RCP<const Xpetra::Map   <LocalOrdinal,GlobalOrdinal, Node> >&       map,
                   Teuchos::RCP<Xpetra::MultiVector <Scalar,LocalOrdinal,GlobalOrdinal,Node> >& coordinates,
                   Teuchos::RCP<Xpetra::MultiVector <Scalar,LocalOrdinal,GlobalOrdinal,Node> >& nullspace) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ArrayRCP;
  using Teuchos::RCP;
  using Teuchos::TimeMonitor;

  // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
  //                                 d1  d2  d3
  //                                 d4  d5  d6
  //                                 d7  d8  d9
  //                                 d10 d11 d12
  // A perfect distribution is only possible when the #processors is a perfect square.
  // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
  // size. For example, np=14 will give a 7-by-2 distribution.
  // If you don't want Galeri to do this, specify mx or my on the galeriList.

  // Create map and coordinates
  // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it
  // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside
  if (matrixType == "Laplace1D") {
    map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian1D", comm, galeriList);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", map, galeriList);

  } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
             matrixType == "BigStar2D" || matrixType == "Elasticity2D") {
    map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian2D", comm, galeriList);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D", map, galeriList);

  } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
    map = Galeri::Xpetra::CreateMap<LO, GO, Node>(lib, "Cartesian3D", comm, galeriList);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D", map, galeriList);
  }

  // Expand map to do multiple DOF per node for block problems
  if (matrixType == "Elasticity2D")
    map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
  if (matrixType == "Elasticity3D")
    map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);

  if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") {
    // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet
    galeriList.set("right boundary" , "Neumann");
    galeriList.set("bottom boundary", "Neumann");
    galeriList.set("top boundary"   , "Neumann");
    galeriList.set("front boundary" , "Neumann");
    galeriList.set("back boundary"  , "Neumann");
  }

  RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixType, map, galeriList);
  A = Pr->BuildMatrix();

  if (matrixType == "Elasticity2D" ||
      matrixType == "Elasticity3D") {
    nullspace = Pr->BuildNullspace();
    A->SetFixedBlockSize((matrixType == "Elasticity2D") ? 2 : 3);
  }
}