示例#1
0
  TEUCHOS_UNIT_TEST(SaPFactory_kokkos, EpetraVsTpetra)
  {
#   include "MueLu_UseShortNames.hpp"
    MueLu::VerboseObject::SetDefaultOStream(Teuchos::rcpFromRef(out));

    out << "version: " << MueLu::Version() << std::endl;
    out << "Compare results of Epetra and Tpetra" << std::endl;
    out << "for 3 level AMG solver using smoothed aggregation with" << std::endl;
    out << "one SGS sweep on each multigrid level as pre- and postsmoother" << std::endl;

    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    typedef Teuchos::ScalarTraits<SC> STS;
    SC zero = STS::zero(), one = STS::one();

    Array<STS::magnitudeType> results(2);

    // run test only on 1 proc
    if(comm->getSize() == 1) {
      Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;

      // run Epetra and Tpetra test
      for (int run = 0; run < 2; run++) { //TODO: create a subfunction instead or Tuple of UnderlyingLib
        if (run == 0) lib = Xpetra::UseEpetra;
        else          lib = Xpetra::UseTpetra;

        // generate problem
        LO maxLevels = 3;
        LO its       = 10;
        GO nEle      = 63;
        const RCP<const Map> map = MapFactory::Build(lib, nEle, 0, comm);
        Teuchos::ParameterList matrixParameters;
        matrixParameters.set("nx", nEle);

        RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
          Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>("Laplace1D", map, matrixParameters);
        RCP<Matrix> Op = Pr->BuildMatrix();

        // build nullspace
        RCP<MultiVector> nullSpace = MultiVectorFactory::Build(map,1);
        nullSpace->putScalar(one);
        Array<STS::magnitudeType> norms(1);
        nullSpace->norm1(norms);
        if (comm->getRank() == 0)
          out << "||NS|| = " << norms[0] << std::endl;

        // fill hierarchy
        RCP<Hierarchy> H = rcp( new Hierarchy() );
        H->setDefaultVerbLevel(Teuchos::VERB_HIGH);

        RCP<Level> Finest = H->GetLevel();
        Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
        Finest->Set("A",Op);                      // set fine level matrix
        Finest->Set("Nullspace",nullSpace);       // set null space information for finest level

        // define transfer operators
        RCP<CoupledAggregationFactory> CoupledAggFact = rcp(new CoupledAggregationFactory());
        CoupledAggFact->SetMinNodesPerAggregate(3);
        CoupledAggFact->SetMaxNeighAlreadySelected(0);
        CoupledAggFact->SetOrdering("natural");
        CoupledAggFact->SetPhase3AggCreation(0.5);

        RCP<TentativePFactory> Ptentfact = rcp(new TentativePFactory());
        RCP<SaPFactory>        Pfact = rcp( new SaPFactory());
        RCP<Factory>      Rfact = rcp( new TransPFactory() );
        RCP<RAPFactory>        Acfact = rcp( new RAPFactory() );
        H->SetMaxCoarseSize(1);

        // setup smoothers
        Teuchos::ParameterList smootherParamList;
        smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel");
        smootherParamList.set("relaxation: sweeps", (LO) 1);
        smootherParamList.set("relaxation: damping factor", (SC) 1.0);
        RCP<SmootherPrototype> smooProto = rcp( new TrilinosSmoother("RELAXATION", smootherParamList) );
        RCP<SmootherFactory> SmooFact = rcp( new SmootherFactory(smooProto) );
        Acfact->setVerbLevel(Teuchos::VERB_HIGH);

        RCP<SmootherFactory> coarseSolveFact = rcp(new SmootherFactory(smooProto, Teuchos::null));

        FactoryManager M;
        M.SetFactory("P", Pfact);
        M.SetFactory("R", Rfact);
        M.SetFactory("A", Acfact);
        M.SetFactory("Ptent", Ptentfact);
        M.SetFactory("Aggregates", CoupledAggFact);
        M.SetFactory("Smoother", SmooFact);
        M.SetFactory("CoarseSolver", coarseSolveFact);

        H->Setup(M, 0, maxLevels);

        // test some basic multigrid data
        RCP<Level> coarseLevel = H->GetLevel(1);
        TEST_EQUALITY(coarseLevel->IsRequested("A",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("P",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("PreSmoother",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("PostSmoother",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("R",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("A",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel->IsAvailable("P",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel->IsAvailable("PreSmoother",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel->IsAvailable("PostSmoother",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel->IsAvailable("R",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("A",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("P",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("PreSmoother",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("PostSmoother",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("R",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel->IsRequested("P",Pfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("P",Ptentfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("PreSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("PostSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("R",Rfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsRequested("A",Acfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("P",Pfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("P",Ptentfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("PreSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("PostSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("R",Rfact.get()), false);
        TEST_EQUALITY(coarseLevel->IsAvailable("A",Acfact.get()), false);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("P",Pfact.get()), 0);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("P",Ptentfact.get()), 0);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("PreSmoother",SmooFact.get()), 0);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("PostSmoother",SmooFact.get()), 0);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("R",Rfact.get()), 0);
        TEST_EQUALITY(coarseLevel->GetKeepFlag("A",Acfact.get()), 0);
        RCP<Matrix> P1 = coarseLevel->Get< RCP<Matrix> >("P");
        RCP<Matrix> R1 = coarseLevel->Get< RCP<Matrix> >("R");
        TEST_EQUALITY(P1->getGlobalNumRows(), 63);
        TEST_EQUALITY(P1->getGlobalNumCols(), 21);
        TEST_EQUALITY(R1->getGlobalNumRows(), 21);
        TEST_EQUALITY(R1->getGlobalNumCols(), 63);
        RCP<Level> coarseLevel2 = H->GetLevel(2);
        TEST_EQUALITY(coarseLevel2->IsRequested("A",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("P",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("R",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("PreSmoother",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("PostSmoother",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("A",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel2->IsAvailable("P",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel2->IsAvailable("PreSmoother",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel2->IsAvailable("PostSmoother",MueLu::NoFactory::get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("R",MueLu::NoFactory::get()), true);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("A",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("P",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("PreSmoother",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("PostSmoother",MueLu::NoFactory::get()), 0);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("R",MueLu::NoFactory::get()), MueLu::Final);
        TEST_EQUALITY(coarseLevel2->IsRequested("P",Pfact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("P",Ptentfact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsRequested("R",Rfact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("P",Pfact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("P",Ptentfact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("PreSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("PostSmoother",SmooFact.get()), false);
        TEST_EQUALITY(coarseLevel2->IsAvailable("R",Rfact.get()), false);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("P",Pfact.get()), 0);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("P",Ptentfact.get()), 0);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("PreSmoother",SmooFact.get()), 0);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("PostSmoother",SmooFact.get()), 0);
        TEST_EQUALITY(coarseLevel2->GetKeepFlag("R",Rfact.get()), 0);
        RCP<Matrix> P2 = coarseLevel2->Get< RCP<Matrix> >("P");
        RCP<Matrix> R2 = coarseLevel2->Get< RCP<Matrix> >("R");
        TEST_EQUALITY(P2->getGlobalNumRows(), 21);
        TEST_EQUALITY(P2->getGlobalNumCols(), 7);
        TEST_EQUALITY(R2->getGlobalNumRows(), 7);
        TEST_EQUALITY(R2->getGlobalNumCols(), 21);

        Teuchos::RCP<Xpetra::Matrix<Scalar,LO,GO> > PtentTPtent = Xpetra::MatrixMatrix<Scalar,LO,GO>::Multiply(*P1,true,*P1,false,out);
        TEST_EQUALITY(PtentTPtent->getGlobalMaxNumRowEntries()-3<1e-12, true);
        TEST_EQUALITY(P1->getGlobalMaxNumRowEntries()-2<1e-12, true);
        TEST_EQUALITY(P2->getGlobalMaxNumRowEntries()-2<1e-12, true);

        // Define RHS
        RCP<MultiVector> X = MultiVectorFactory::Build(map,1);
        RCP<MultiVector> RHS = MultiVectorFactory::Build(map,1);

        X->putScalar(1.0);
        X->norm2(norms);
        if (comm->getRank() == 0)
          out << "||X_true|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl;

        Op->apply(*X,*RHS,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);

        // Use AMG directly as an iterative method
        {
          X->putScalar( (SC) 0.0);

          H->Iterate(*RHS,*X,its);

          X->norm2(norms);
          if (comm->getRank() == 0)
            out << "||X_" << std::setprecision(2) << its << "|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl;
          results[run] = norms[0];
        }
      }

      TEST_EQUALITY(results[0] - results[1] < 1e-10, true); // check results of EPETRA vs TPETRA
    } // comm->getSize == 1

  } //SaPFactory_EpetraVsTpetra
Teuchos::RCP<Vector> runExample(std::vector<size_t> stridingInfo, LocalOrdinal stridedBlockId, GlobalOrdinal offset) {
    using Teuchos::RCP;
    using Teuchos::rcp;

    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Teuchos::Time myTime("global");
    Teuchos::TimeMonitor Mt(myTime);

#ifndef HAVE_TEUCHOS_LONG_LONG_INT
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

    // custom parameters
    LO maxLevels = 4;

    GO maxCoarseSize=1; //FIXME clp doesn't like long long int
    std::string aggOrdering = "natural";
    int minPerAgg=3;
    int maxNbrAlreadySelected=0;

    ////////////////////////////////////////////////////////////////////////////////////////
    // prepare redistribution of matrix (parallelization)
    int globalNumDofs = 7020;
    int nProcs = comm->getSize();
    int nDofsPerNode = 2;

    int nLocalDofs = (int) globalNumDofs / nProcs;
    nLocalDofs = nLocalDofs - (nLocalDofs % nDofsPerNode);
    int nCumulatedDofs = 0;
    sumAll(comm,nLocalDofs, nCumulatedDofs);

    if(comm->getRank() == nProcs-1) {
        nLocalDofs += globalNumDofs - nCumulatedDofs;
    }

    std::cout << "PROC: " << comm->getRank() << " numLocalDofs=" << nLocalDofs << std::endl;

    ////////////////////////////////////////////////////////////////////////////////////////
    // read in problem
    Epetra_Map emap (globalNumDofs, nLocalDofs, 0, *Xpetra::toEpetra(comm));
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    std::cout << "Reading matrix market file" << std::endl;
    EpetraExt::MatrixMarketFileToCrsMatrix("stru2d_A.txt",emap,emap,emap,ptrA);
    EpetraExt::MatrixMarketFileToVector("stru2d_b.txt",emap,ptrf);
    EpetraExt::MatrixMarketFileToMultiVector( "stru2d_ns.txt", emap, ptrNS);
    RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
    RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);

    ////////////////////////////////////////////
    // Epetra_CrsMatrix -> Xpetra::Matrix
    RCP<CrsMatrix> exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(epA));
    RCP<CrsMatrixWrap> crsOp = Teuchos::rcp(new CrsMatrixWrap(exA));
    RCP<Matrix> Op = Teuchos::rcp_dynamic_cast<Matrix>(crsOp);
    Op->SetFixedBlockSize(nDofsPerNode);

    // Epetra_Vector -> Xpetra::Vector
    RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

    RCP<MultiVector> xNS = Teuchos::rcp(new Xpetra::EpetraMultiVector(epNS));

    // Epetra_Map -> Xpetra::Map
    const RCP< const Map> map = Xpetra::toXpetra(emap);

    ////////////////////////////////////////////
    // create new MueLu hierarchy
    RCP<Hierarchy> H = rcp ( new Hierarchy() );
    H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    H->SetMaxCoarseSize(maxCoarseSize);

    // build finest Level
    RCP<MueLu::Level> Finest = H->GetLevel();
    Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    Finest->Set("A",Op);
    Finest->Set("Nullspace",xNS);

    // prepare CoalesceDropFactory
    RCP<CoalesceDropFactory> dropFact = rcp(new CoalesceDropFactory());
    //dropFact->SetVariableBlockSize();

    // prepare aggregation strategy
    RCP<CoupledAggregationFactory> CoupledAggFact = rcp(new CoupledAggregationFactory());
    CoupledAggFact->SetFactory("Graph", dropFact);
    *out << "========================= Aggregate option summary  =========================" << std::endl;
    *out << "min DOFs per aggregate :                " << minPerAgg << std::endl;
    *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl;
    CoupledAggFact->SetMinNodesPerAggregate(minPerAgg); //TODO should increase if run anything other than 1D
    CoupledAggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected);
    std::transform(aggOrdering.begin(), aggOrdering.end(), aggOrdering.begin(), ::tolower);
    if (aggOrdering == "natural") {
        *out << "aggregate ordering :                    NATURAL" << std::endl;
        CoupledAggFact->SetOrdering(MueLu::AggOptions::NATURAL);
    } else if (aggOrdering == "random") {
        *out << "aggregate ordering :                    RANDOM" << std::endl;
        CoupledAggFact->SetOrdering(MueLu::AggOptions::RANDOM);
    } else if (aggOrdering == "graph") {
        *out << "aggregate ordering :                    GRAPH" << std::endl;
        CoupledAggFact->SetOrdering(MueLu::AggOptions::GRAPH);
    } else {
        std::string msg = "main: bad aggregation option """ + aggOrdering + """.";
        throw(MueLu::Exceptions::RuntimeError(msg));
    }
    CoupledAggFact->SetPhase3AggCreation(0.5);
    *out << "=============================================================================" << std::endl;

    // build transfer operators
    RCP<TentativePFactory> TentPFact = rcp(new TentativePFactory());

    /*TentPFact->setStridingData(stridingInfo);
    TentPFact->setStridedBlockId(stridedBlockId);
    TentPFact->setDomainMapOffset(offset);*/

    RCP<CoarseMapFactory> coarseMapFact = Teuchos::rcp(new CoarseMapFactory());
    coarseMapFact->setStridingData(stridingInfo);
    coarseMapFact->setStridedBlockId(stridedBlockId);
    coarseMapFact->setDomainMapOffset(offset);

    RCP<SaPFactory> Pfact  = rcp( new SaPFactory() );
    //RCP<PgPFactory> Pfact  = rcp( new PgPFactory() );
    //RCP<TentativePFactory> Pfact  = rcp( new TentativePFactory() );
    RCP<Factory>   Rfact  = rcp( new TransPFactory() );
    //RCP<Factory>   Rfact  = rcp( new GenericRFactory() );

    // RAP Factory
    RCP<RAPFactory> Acfact = rcp( new RAPFactory() );
    Acfact->setVerbLevel(Teuchos::VERB_HIGH);

    // register aggregation export factory in RAPFactory
    //RCP<MueLu::AggregationExportFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node, LocalMatOps> > aggExpFact = rcp(new MueLu::AggregationExportFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node, LocalMatOps>());
    //aggExpFact->SetParameter("Output filename","aggs_level%LEVELID_proc%PROCID.out");
    //Acfact->AddTransferFactory(aggExpFact);

    // build level smoothers
    RCP<SmootherPrototype> smooProto;
    std::string ifpackType;
    Teuchos::ParameterList ifpackList;
    ifpackList.set("relaxation: sweeps", (LO) 1);
    ifpackList.set("relaxation: damping factor", (SC) 1.0); // 0.7
    ifpackType = "RELAXATION";
    ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");

    smooProto = Teuchos::rcp( new TrilinosSmoother(ifpackType, ifpackList) );
    RCP<SmootherFactory> SmooFact;
    if (maxLevels > 1)
        SmooFact = rcp( new SmootherFactory(smooProto) );

    // create coarsest smoother
    RCP<SmootherPrototype> coarsestSmooProto;
    std::string type = "";
    Teuchos::ParameterList coarsestSmooList;
#if defined(HAVE_AMESOS_SUPERLU)
    coarsestSmooProto = Teuchos::rcp( new DirectSolver("Superlu", coarsestSmooList) );
#else
    coarsestSmooProto = Teuchos::rcp( new DirectSolver("Klu", coarsestSmooList) );
#endif
    RCP<SmootherFactory> coarsestSmooFact;
    coarsestSmooFact = rcp(new SmootherFactory(coarsestSmooProto, Teuchos::null));

    FactoryManager M;
    //M.SetFactory("Graph", dropFact);
    //M.SetFactory("UnAmalgamationInfo", dropFact);
    M.SetFactory("Aggregates", CoupledAggFact);
    M.SetFactory("Ptent", TentPFact);
    M.SetFactory("P", Pfact);
    M.SetFactory("R", Rfact);
    M.SetFactory("A", Acfact);
    M.SetFactory("Smoother", SmooFact);
    M.SetFactory("CoarseSolver", coarsestSmooFact);
    M.SetFactory("CoarseMap", coarseMapFact);

    H->Setup(M, 0, maxLevels);

    RCP<Vector> xLsg = VectorFactory::Build(map);

    // Use AMG directly as an iterative method
    {
        xLsg->putScalar( (SC) 0.0);

        H->Iterate(*xRhs,10,*xLsg);

        //xLsg->describe(*out,Teuchos::VERB_EXTREME);
    }

    //
    // Solve Ax = b using AMG as a preconditioner in AztecOO
    //
    {
        RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
        X->PutScalar(0.0);
        Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

        AztecOO aztecSolver(epetraProblem);
        aztecSolver.SetAztecOption(AZ_solver, AZ_cg);

        MueLu::EpetraOperator aztecPrec(H);
        aztecSolver.SetPrecOperator(&aztecPrec);

        int maxIts = 50;
        double tol = 1e-8;

        aztecSolver.Iterate(maxIts, tol);
    }

    return xLsg;
}
示例#3
0
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP; // reference count pointers
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  int MyPID   = comm->getRank();
  int NumProc = comm->getSize();

  const Teuchos::RCP<Epetra_Comm> epComm = Teuchos::rcp_const_cast<Epetra_Comm>(Xpetra::toEpetra(comm));

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  //SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

  // Instead of checking each time for rank, create a rank 0 stream
  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  Teuchos::FancyOStream& fancyout = *fancy;
  fancyout.setOutputToRootOnly(0);

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  Teuchos::CommandLineProcessor clp(false);
  GO nx = 100, ny = 100;
  GO maxCoarseSize = 10;
  LO maxLevels = 4;
  clp.setOption("nx",                   &nx,              "mesh size in x direction");
  clp.setOption("ny",                   &ny,              "mesh size in y direction");
  clp.setOption("maxCoarseSize",        &maxCoarseSize,   "maximum coarse size");
  clp.setOption("maxLevels",            &maxLevels,       "maximum number of multigrid levels");
  int mgridSweeps = 1; clp.setOption("mgridSweeps", &mgridSweeps, "number of multigrid sweeps within Multigrid solver.");
  std::string printTimings = "no";   clp.setOption("timings", &printTimings,     "print timings to screen [yes/no]");
  double tol               = 1e-12;  clp.setOption("tol",                   &tol,              "solver convergence tolerance");

  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }

  // =========================================================================
  // Problem construction
  // =========================================================================
  RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time"))), tm;

  comm->barrier();
  tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")));

  Teuchos::ParameterList GaleriList;
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);
  GaleriList.set("mx", epComm->NumProc());
  GaleriList.set("my", 1);
  GaleriList.set("lx", 1.0); // length of x-axis
  GaleriList.set("ly", 1.0); // length of y-axis
  GaleriList.set("diff", 1e-5);
  GaleriList.set("conv", 1.0);

  // create map
  Teuchos::RCP<Epetra_Map> epMap = Teuchos::rcp(Galeri::CreateMap("Cartesian2D", *epComm, GaleriList));

  // create coordinates
  Teuchos::RCP<Epetra_MultiVector> epCoord = Teuchos::rcp(Galeri::CreateCartesianCoordinates("2D", epMap.get(), GaleriList));

  // create matrix
  Teuchos::RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(Galeri::CreateCrsMatrix("Recirc2D", epMap.get(), GaleriList));

  // Epetra -> Xpetra
  Teuchos::RCP<CrsMatrix> exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(epA));
  Teuchos::RCP<CrsMatrixWrap> exAWrap = Teuchos::rcp(new CrsMatrixWrap(exA));

  RCP<Matrix> A = Teuchos::rcp_dynamic_cast<Matrix>(exAWrap);
  int numPDEs = 1;
  A->SetFixedBlockSize(numPDEs);

  // set rhs and solution vector
  RCP<Epetra_Vector> B = Teuchos::rcp(new Epetra_Vector(*epMap));
  RCP<Epetra_Vector> X = Teuchos::rcp(new Epetra_Vector(*epMap));
  B->PutScalar(1.0);
  X->PutScalar(0.0);

  // Epetra -> Xpetra
  RCP<Vector> xB = Teuchos::rcp(new Xpetra::EpetraVector(B));
  RCP<Vector> xX = Teuchos::rcp(new Xpetra::EpetraVector(X));

  xX->setSeed(100);
  xX->randomize();

  // build null space vector
  RCP<const Map> map = A->getRowMap();
  RCP<MultiVector> nullspace = MultiVectorFactory::Build(map, numPDEs);

  for (int i=0; i<numPDEs; ++i) {
    Teuchos::ArrayRCP<Scalar> nsValues = nullspace->getDataNonConst(i);
    int numBlocks = nsValues.size() / numPDEs;
    for (int j=0; j< numBlocks; ++j) {
      nsValues[j*numPDEs + i] = 1.0;
    }
  }

  comm->barrier();
  tm = Teuchos::null;

  fancyout << "========================================================\nGaleri complete.\n========================================================" << std::endl;

  // =========================================================================
  // Preconditioner construction
  // =========================================================================
  comm->barrier();
  tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML")));

  RCP<Hierarchy> H = rcp ( new Hierarchy() );
  H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  H->SetMaxCoarseSize(maxCoarseSize);
  H->setlib(Xpetra::UseEpetra);

  // build finest Level
  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->setlib(Xpetra::UseEpetra);
  Finest->Set("A",A);
  Finest->Set("Nullspace",nullspace);

  // create factories for transfer operators
  RCP<TentativePFactory> PFact = Teuchos::rcp(new TentativePFactory());
  RCP<TransPFactory>     RFact = Teuchos::rcp(new TransPFactory());
  RFact->SetFactory("P", PFact);

  // build level smoothers
  // use symmetric Gauss-Seidel both for fine and coarse level smoother
  RCP<SmootherPrototype> smooProto;
  std::string ifpackType;
  Teuchos::ParameterList ifpackList;
  ifpackList.set("relaxation: sweeps", (LO) 1);
  ifpackList.set("relaxation: damping factor", (SC) 1.0);
  ifpackType = "RELAXATION";
  ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");

  smooProto = Teuchos::rcp( new TrilinosSmoother(ifpackType, ifpackList) );
  RCP<SmootherFactory> SmooFact;
  if (maxLevels > 1)
    SmooFact = rcp( new SmootherFactory(smooProto) );

  // design multigrid hierarchy
  FactoryManager M;
  M.SetFactory("P", PFact);
  M.SetFactory("R", RFact);
  M.SetFactory("Nullspace", PFact);
  M.SetFactory("Smoother", SmooFact);
  M.SetFactory("CoarseSolver", SmooFact);

  H->Setup(M, 0, maxLevels);

  comm->barrier();
  tm = Teuchos::null;

  // =========================================================================
  // System solution (Ax = b)
  // =========================================================================

    //
    // generate exact solution using a direct solver
    //
    RCP<Epetra_Vector> exactLsgVec = rcp(new Epetra_Vector(X->Map()));
    {
      fancyout << "========================================================\nCalculate exact solution." << std::endl;
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - direct solve")));
      exactLsgVec->PutScalar(0.0);
      exactLsgVec->Update(1.0,*X,1.0);
      Epetra_LinearProblem epetraProblem(epA.get(), exactLsgVec.get(), B.get());

      Amesos amesosFactory;
      RCP<Amesos_BaseSolver> rcp_directSolver = Teuchos::rcp(amesosFactory.Create("Amesos_Klu", epetraProblem));
      rcp_directSolver->SymbolicFactorization();
      rcp_directSolver->NumericFactorization();
      rcp_directSolver->Solve();

      comm->barrier();
      tm = Teuchos::null;
    }

    //
    // Solve Ax = b using AMG as a preconditioner in AztecOO
    //

    RCP<Epetra_Vector> precLsgVec = rcp(new Epetra_Vector(X->Map()));
    {
      fancyout << "========================================================\nUse multigrid hierarchy as preconditioner within CG." << std::endl;
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - AMG as preconditioner")));

      precLsgVec->PutScalar(0.0);
      precLsgVec->Update(1.0,*X,1.0);
      Epetra_LinearProblem epetraProblem(epA.get(), precLsgVec.get(), B.get());

      AztecOO aztecSolver(epetraProblem);
      aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

      MueLu::EpetraOperator aztecPrec(H);
      aztecSolver.SetPrecOperator(&aztecPrec);

      int maxIts = 100;
      //double tol2 = 1e-8;

      aztecSolver.Iterate(maxIts, tol);

      comm->barrier();
      tm = Teuchos::null;
    }

    //////////////////

    // use multigrid hierarchy as solver
    RCP<Vector> mgridLsgVec = VectorFactory::Build(map);
    mgridLsgVec->putScalar(0.0);
    {
      fancyout << "========================================================\nUse multigrid hierarchy as solver." << std::endl;
      tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Multigrid Solve")));
      mgridLsgVec->update(1.0,*xX,1.0);
      H->IsPreconditioner(false);
      H->Iterate(*xB, mgridSweeps, *mgridLsgVec);
      comm->barrier();
      tm = Teuchos::null;
    }

    //////////////////

    fancyout << "========================================================\nExport results.\n========================================================" << std::endl;
    ofstream myfile;
    std::stringstream ss; ss << "example" << MyPID << ".txt";
    myfile.open (ss.str().c_str());

    //////////////////

    // loop over all procs
    for (int iproc=0; iproc < NumProc; iproc++) {
      if (MyPID==iproc) {
        int NumVectors1 = 2;
        int NumMyElements1 = epCoord->Map(). NumMyElements();
        int MaxElementSize1 = epCoord->Map().MaxElementSize();
        int * FirstPointInElementList1 = NULL;
        if (MaxElementSize1!=1) FirstPointInElementList1 = epCoord->Map().FirstPointInElementList();
        double ** A_Pointers = epCoord->Pointers();

        if (MyPID==0) {
          myfile.width(8);
          myfile <<  "#     MyPID"; myfile << "    ";
          myfile.width(12);
          if (MaxElementSize1==1)
            myfile <<  "GID  ";
          else
            myfile <<  "     GID/Point";
          for (int j = 0; j < NumVectors1 ; j++)
          {
            myfile.width(20);
            myfile <<  "Value  ";
          }
          myfile << std::endl;
        }
        for (int i=0; i < NumMyElements1; i++) {
          for (int ii=0; ii< epCoord->Map().ElementSize(i); ii++) {
            int iii;
            myfile.width(10);
            myfile <<  MyPID; myfile << "    ";
            myfile.width(10);
            if (MaxElementSize1==1) {
              if(epCoord->Map().GlobalIndicesInt())
              {
                int * MyGlobalElements1 = epCoord->Map().MyGlobalElements();
                myfile << MyGlobalElements1[i] << "    ";
              }

              iii = i;
            }
            else {
              if(epCoord->Map().GlobalIndicesInt())
              {

                int * MyGlobalElements1 = epCoord->Map().MyGlobalElements();
                myfile <<  MyGlobalElements1[i]<< "/" << ii << "    ";
              }

              iii = FirstPointInElementList1[i]+ii;
            }
            for (int j = 0; j < NumVectors1 ; j++)
            {
              myfile.width(20);
              myfile <<  A_Pointers[j][iii];
            }

            myfile.precision(18); // set high precision for output

            // add solution vector entry
            myfile.width(25); myfile << (*exactLsgVec)[iii];

            // add preconditioned solution vector entry
            myfile.width(25); myfile << (*precLsgVec)[iii];

            Teuchos::ArrayRCP<SC> mgridLsgVecData = mgridLsgVec->getDataNonConst(0);
            myfile.width(25); myfile << mgridLsgVecData[iii];


            myfile.precision(6); // set default precision
            myfile << std::endl;
          }
        } // end loop over all lines on current proc
        myfile << std::flush;

        // syncronize procs
        comm->barrier();
        comm->barrier();
        comm->barrier();

      } // end myProc
    }

    ////////////
    myfile.close();

  comm->barrier();
  tm = Teuchos::null;
  globalTimeMonitor = Teuchos::null;

  if (printTimings == "yes") {
    TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, false, true, false, Teuchos::Union);
  }

  return 0;
} //main
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
  //
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  out->setOutputToRootOnly(0);
  *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

  // Timing
  Teuchos::Time myTime("global");
  Teuchos::TimeMonitor MM(myTime);

#ifndef HAVE_TEUCHOS_LONG_LONG_INT
  *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

  // custom parameters
  LocalOrdinal maxLevels = 3;

  GlobalOrdinal maxCoarseSize=1; //FIXME clp doesn't like long long int

  int globalNumDofs = 8898;  // used for the maps
  int nDofsPerNode = 3;      // used for generating the fine level null-space

  // build strided maps
  // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
  std::vector<size_t> stridingInfo;
  stridingInfo.push_back(2);
  stridingInfo.push_back(1);

  /////////////////////////////////////// build strided maps
  // build strided maps:
  // xstridedfullmap: full map (velocity and pressure dof gids), continous
  // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
  // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
  Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
  RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
  RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
  RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

  /////////////////////////////////////// transform Xpetra::Map objects to Epetra
  // this is needed for AztecOO
  const RCP<const Epetra_Map> fullmap = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
  RCP<const Epetra_Map>       velmap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
  RCP<const Epetra_Map>       premap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

  /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

  // read in problem
  Epetra_CrsMatrix * ptrA = 0;
  Epetra_Vector * ptrf = 0;
  Epetra_MultiVector* ptrNS = 0;

  *out << "Reading matrix market file" << std::endl;

  EpetraExt::MatrixMarketFileToCrsMatrix("A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  EpetraExt::MatrixMarketFileToVector("b5932_re1000.txt",*fullmap,ptrf);
  //EpetraExt::MatrixMarketFileToCrsMatrix("/home/wiesner/promotion/trilinos/fc16-debug/packages/muelu/test/navierstokes/A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  //EpetraExt::MatrixMarketFileToVector("/home/wiesner/promotion/trilinos/fc16-debug/packages/muelu/test/navierstokes/b5932_re1000.txt",*fullmap,ptrf);

  RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
  RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
  RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


  /////////////////////////////////////// split system into 2x2 block system

  *out << "Split matrix into 2x2 block matrix" << std::endl;

  // split fullA into A11,..., A22
  Teuchos::RCP<Epetra_CrsMatrix> A11;
  Teuchos::RCP<Epetra_CrsMatrix> A12;
  Teuchos::RCP<Epetra_CrsMatrix> A21;
  Teuchos::RCP<Epetra_CrsMatrix> A22;

  if(MueLuTests::SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
    *out << "Problem with splitting matrix"<< std::endl;

  /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

  // build Xpetra objects from Epetra_CrsMatrix objects
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A11));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A12));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A21));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A22));

  /////////////////////////////////////// generate MapExtractor object

  std::vector<Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > > xmaps;
  xmaps.push_back(xstridedvelmap);
  xmaps.push_back(xstridedpremap);

  Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LocalOrdinal,GlobalOrdinal,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LocalOrdinal,GlobalOrdinal>::Build(xstridedfullmap,xmaps);

  /////////////////////////////////////// build blocked transfer operator
  // using the map extractor
  Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal>(map_extractor,map_extractor,10));
  bOp->setMatrix(0,0,xA11);
  bOp->setMatrix(0,1,xA12);
  bOp->setMatrix(1,0,xA21);
  bOp->setMatrix(1,1,xA22);

  bOp->fillComplete();

  //////////////////////////////////////////////////// create Hierarchy
  RCP<Hierarchy> H = rcp ( new Hierarchy() );
  H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  //H->setDefaultVerbLevel(Teuchos::VERB_NONE);
  H->SetMaxCoarseSize(maxCoarseSize);

  //////////////////////////////////////////////////////// finest Level
  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));

  /////////////////////////////////////////////// define subblocks of A
  // make A11 block and A22 block available as variable "A" generated
  // by A11Fact and A22Fact
  RCP<SubBlockAFactory> A11Fact = rcp(new SubBlockAFactory());
  A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  RCP<SubBlockAFactory> A22Fact = rcp(new SubBlockAFactory());
  A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
  A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

  ////////////////////////////////////////// prepare null space for A11
  RCP<MultiVector> nullspace11 = MultiVectorFactory::Build(xstridedvelmap, 2);  // this is a 2D standard null space

  for (int i=0; i<nDofsPerNode-1; ++i) {
    Teuchos::ArrayRCP<Scalar> nsValues = nullspace11->getDataNonConst(i);
    int numBlocks = nsValues.size() / (nDofsPerNode - 1);
    for (int j=0; j< numBlocks; ++j) {
      nsValues[j*(nDofsPerNode - 1) + i] = 1.0;
    }
  }

  Finest->Set("Nullspace1",nullspace11);

  ///////////////////////////////////////// define CoalesceDropFactory and Aggregation for A11
  // set up amalgamation for A11. Note: we're using a default null space factory (Teuchos::null)
  RCP<AmalgamationFactory> amalgFact11 = rcp(new AmalgamationFactory());
  amalgFact11->SetFactory("A", A11Fact);

  amalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);
  RCP<CoalesceDropFactory> dropFact11 = rcp(new CoalesceDropFactory());
  dropFact11->SetFactory("A", A11Fact);
  dropFact11->SetFactory("UnAmalgamationInfo", amalgFact11);
  dropFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);
  RCP<UncoupledAggregationFactory> CoupledAggFact11 = rcp(new UncoupledAggregationFactory());
  CoupledAggFact11->SetFactory("Graph", dropFact11);
  CoupledAggFact11->SetMinNodesPerAggregate(9);
  CoupledAggFact11->SetMaxNeighAlreadySelected(2);
  CoupledAggFact11->SetOrdering(MueLu::AggOptions::NATURAL);
  //CoupledAggFact11->SetPhase3AggCreation(0.5);

  ///////////////////////////////////////// define transfer ops for A11
#if 0
  // use PG-AMG
  RCP<PgPFactory> P11Fact = rcp(new PgPFactory());

  RCP<GenericRFactory> R11Fact = rcp(new GenericRFactory());
  Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1",P11tentFact));

  Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));

  RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact11->setStridingData(stridingInfo);
  coarseMapFact11->setStridedBlockId(0);

  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> M11 = rcp(new FactoryManager());
  M11->SetFactory("A", A11Fact);
  M11->SetFactory("P", P11Fact);
  M11->SetFactory("R", R11Fact);
  M11->SetFactory("Aggregates", CoupledAggFact11);
  M11->SetFactory("UnAmalgamationInfo", amalgFact11);
  M11->SetFactory("Nullspace", nspFact11);
  // M11->SetFactory("Ptent", P11tentFact);
  M11->SetFactory("CoarseMap", coarseMapFact11);
#else
  RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());

  RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

  Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));
  nspFact11->SetFactory("Nullspace1",P11Fact);

  RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact11->setStridingData(stridingInfo);
  coarseMapFact11->setStridedBlockId(0);

  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> M11 = rcp(new FactoryManager());
  M11->SetFactory("A", A11Fact);
  M11->SetFactory("P", P11Fact);
  M11->SetFactory("R", R11Fact);
  M11->SetFactory("Aggregates", CoupledAggFact11);
  M11->SetFactory("UnAmalgamationInfo", amalgFact11);
  M11->SetFactory("Nullspace", nspFact11);
  // M11->SetFactory("Ptent", P11Fact);
  M11->SetFactory("CoarseMap", coarseMapFact11);
#endif
  M11->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

  ////////////////////////////////////////// prepare null space for A22
  RCP<MultiVector> nullspace22 = MultiVectorFactory::Build(xstridedpremap, 1);  // this is a 2D standard null space
  Teuchos::ArrayRCP<Scalar> nsValues22 = nullspace22->getDataNonConst(0);
  for (int j=0; j< nsValues22.size(); ++j) {
    nsValues22[j] = 1.0;
  }

  Finest->Set("Nullspace2",nullspace22);

  ///////////////////////////////////////// define transfer ops for A22
#if 0
  // use PGAMG
  RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory(A22Fact));
  RCP<TentativePFactory> P22tentFact = rcp(new TentativePFactory(CoupledAggFact11, amalgFact22));

  RCP<SaPFactory> P22Fact = rcp(new SaPFactory(P22tentFact));

  //RCP<GenericRFactory> R22Fact = rcp(new GenericRFactory(P22Fact));
  RCP<TransPFactory> R22Fact = rcp(new TransPFactory(P22Fact));

  Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2",P22tentFact));
  RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory(CoupledAggFact11, nspFact22));
  coarseMapFact22->setStridingData(stridingInfo);
  coarseMapFact22->setStridedBlockId(1);

  //////////////////////////////// define factory manager for (2,2) block
  RCP<FactoryManager> M22 = rcp(new FactoryManager());
  M22->SetFactory("A", A22Fact);
  M22->SetFactory("P", P22Fact);
  M22->SetFactory("R", R22Fact);
  M22->SetFactory("Aggregates", AggFact22);
  M22->SetFactory("Nullspace", nspFact22);
  M22->SetFactory("Ptent", P22tentFact);
  M22->SetFactory("CoarseMap", coarseMapFact22);
  M22->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

#else
  // use TentativePFactory
  RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory());
  RCP<TentativePFactory> P22Fact = rcp(new TentativePFactory()); // check me (fed with A22) wrong column GIDS!!!

  RCP<TransPFactory> R22Fact = rcp(new TransPFactory());

  Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2"));
  nspFact22->SetFactory("Nullspace2", P22Fact);
  RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact22->setStridingData(stridingInfo);
  coarseMapFact22->setStridedBlockId(1);

  //////////////////////////////// define factory manager for (2,2) block
  RCP<FactoryManager> M22 = rcp(new FactoryManager());
  M22->SetFactory("A", A22Fact);
  M22->SetFactory("P", P22Fact);
  M22->SetFactory("R", R22Fact);
  M22->SetFactory("Aggregates", CoupledAggFact11);
  M22->SetFactory("Nullspace", nspFact22);
  M11->SetFactory("UnAmalgamationInfo", amalgFact22);
  M22->SetFactory("Ptent", P22Fact);
  M22->SetFactory("CoarseMap", coarseMapFact22);
  M22->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
#endif

  /////////////////////////////////////////// define blocked transfer ops
  RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory());
  PFact->AddFactoryManager(M11);
  PFact->AddFactoryManager(M22);

  RCP<GenericRFactory> RFact = rcp(new GenericRFactory());
  RFact->SetFactory("P", PFact);

  RCP<Factory> AcFact = rcp(new BlockedRAPFactory());
  AcFact->SetFactory("P", PFact);
  AcFact->SetFactory("R", RFact);

  *out << "Creating Simple Smoother" << std::endl;

  //////////////////////////////////////////////////////////////////////
  // Smoothers

  RCP<SubBlockAFactory> A00Fact = Teuchos::rcp(new SubBlockAFactory());
  A00Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A00Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  A00Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  std::string ifpackTypePredictSmoother;
  Teuchos::ParameterList ifpackListPredictSmoother;
  ifpackListPredictSmoother.set("relaxation: sweeps", (LocalOrdinal) 1);
  ifpackListPredictSmoother.set("relaxation: damping factor", (Scalar) 0.5);
  ifpackTypePredictSmoother = "RELAXATION";
  ifpackListPredictSmoother.set("relaxation: type", "Gauss-Seidel");
  RCP<SmootherPrototype> smoProtoPredict     = rcp( new TrilinosSmoother(ifpackTypePredictSmoother, ifpackListPredictSmoother, 0) );
  smoProtoPredict->SetFactory("A", A00Fact);
  RCP<SmootherFactory> SmooPredictFact = rcp( new SmootherFactory(smoProtoPredict) );

  RCP<FactoryManager> MPredict = rcp(new FactoryManager());
  MPredict->SetFactory("A",                 A00Fact);         // SchurComplement operator for correction step (defined as "A")
  MPredict->SetFactory("Smoother",          SmooPredictFact);    // solver/smoother for correction step
  MPredict->SetFactory("PreSmoother",               SmooPredictFact);
  MPredict->SetFactory("PostSmoother",              SmooPredictFact);
  MPredict->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

  ////////////////////////////////////////////////
  // SchurComp
  // create SchurComp factory (SchurComplement smoother is provided by local FactoryManager)
  Teuchos::RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
  SFact->SetParameter("omega", Teuchos::ParameterEntry(0.8));
  SFact->SetParameter("lumping", Teuchos::ParameterEntry(true));
  SFact->SetFactory("A", MueLu::NoFactory::getRCP()); // 2x2 blocked operator

  // define SchurComplement solver
  std::string ifpackTypeSchurSmoother;
  ifpackTypeSchurSmoother = "RELAXATION";
  Teuchos::ParameterList ifpackListSchurSmoother;
  ifpackListSchurSmoother.set("relaxation: sweeps", (LocalOrdinal) 10);
  ifpackListSchurSmoother.set("relaxation: damping factor", (Scalar) 0.8);
  ifpackListSchurSmoother.set("relaxation: type", "Gauss-Seidel");
  RCP<SmootherPrototype> smoProtoSC     = rcp( new TrilinosSmoother(ifpackTypeSchurSmoother, ifpackListSchurSmoother, 0) );
  smoProtoSC->SetFactory("A", SFact); // explicitely use SchurComplement matrix as input for smoother
  RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

  // setup local factory manager for SchurComplementFactory
  Teuchos::RCP<FactoryManager> MSchur = Teuchos::rcp(new FactoryManager());
  MSchur->SetFactory("A", SFact);              // SchurCompFactory as generating factory for SchurComp equation
  MSchur->SetFactory("Smoother", SmooSCFact);
  MSchur->SetIgnoreUserData(true);


  /////////////////////////////////////////////////////
  // create smoother prototype

  RCP<SimpleSmoother> smootherPrototype     = rcp( new SimpleSmoother() );
  smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
  smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(0.6));

  smootherPrototype->AddFactoryManager(MPredict,0);    // set temporary factory manager for prediction step
  smootherPrototype->AddFactoryManager(MSchur,1);      // set temporary factory manager for correction step
  smootherPrototype->SetFactory("A", MueLu::NoFactory::getRCP());

  /////////////////////////////////////////////////////
  // create smoother factories
  RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );                // pre and postsmoothing with SIMPLE on the finest and intermedium levels
  RCP<SmootherFactory>   coarseSmootherFact    = rcp( new SmootherFactory(smootherPrototype, Teuchos::null) ); // only presmoothing on finest level (we do not want to run two SIMPLE iterations on the coarsest level)

  // main factory manager
  FactoryManager M;
  M.SetFactory("A",            AcFact);
  M.SetFactory("P",            PFact);
  M.SetFactory("R",            RFact);
  M.SetFactory("Smoother",     smootherFact); // TODO fix me
  M.SetFactory("CoarseSolver", coarseSmootherFact);

  //////////////////////////////////// setup multigrid

  H->Setup(M,0,maxLevels);

  Finest->print(*out);

  RCP<Level> coarseLevel = H->GetLevel(1);
  coarseLevel->print(*out);

  //RCP<Level> coarseLevel2 = H->GetLevel(2);
  //coarseLevel2->print(*out);

  RCP<MultiVector> xLsg = MultiVectorFactory::Build(xstridedfullmap,1);

  // Use AMG directly as an iterative method
#if 0
  {
    xLsg->putScalar( (SC) 0.0);

    // Epetra_Vector -> Xpetra::Vector
    RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

    // calculate initial (absolute) residual
    Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
    xRhs->norm2(norms);
    *out << "||x_0|| = " << norms[0] << std::endl;

    // apply ten multigrid iterations
    H->Iterate(*xRhs,100,*xLsg);


    // calculate and print residual
    RCP<MultiVector> xTmp = MultiVectorFactory::Build(xstridedfullmap,1);
    bOp->apply(*xLsg,*xTmp,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);
    xRhs->update((SC)-1.0,*xTmp,(SC)1.0);
    xRhs->norm2(norms);
    *out << "||x|| = " << norms[0] << std::endl;
  }
#endif

  //
  // Solve Ax = b using AMG as a preconditioner in AztecOO
  //
  {
    RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
    X->PutScalar(0.0);
    Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

    AztecOO aztecSolver(epetraProblem);
    aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

    MueLu::EpetraOperator aztecPrec(H);
    aztecSolver.SetPrecOperator(&aztecPrec);

    int maxIts = 50;
    double tol = 1e-8;

    aztecSolver.Iterate(maxIts, tol);
  }

  return EXIT_SUCCESS;
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcpFromRef;
    using namespace MueLuTests;

    Teuchos::oblackholestream blackhole;
    Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

    bool success = false;
    bool verbose = true;
    try {
        RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
        RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
        out->setOutputToRootOnly(0);
        *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

        // Timing
        Teuchos::Time myTime("global");
        Teuchos::TimeMonitor MM(myTime);

        // read in some command line parameters
        Teuchos::CommandLineProcessor clp(false);

        int rebalanceBlocks = 1;
        clp.setOption("rebalanceBlocks",       &rebalanceBlocks,     "rebalance blocks (1=yes, else=no)");

        switch (clp.parse(argc,argv)) {
        case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:
            return EXIT_SUCCESS;
            break;
        case Teuchos::CommandLineProcessor::PARSE_ERROR:
        case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION:
            return EXIT_FAILURE;
            break;
        case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:
            break;
        }


#if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
#ifndef HAVE_XPETRA_INT_LONG_LONG
        *out << "Warning: scaling test was not compiled with long long int support" << std::endl;


        // custom parameters
        LocalOrdinal maxLevels = 3;

        GlobalOrdinal maxCoarseSize=1; //FIXME clp doesn't like long long int

        int globalNumDofs = 1500;  // used for the maps
        int nDofsPerNode = 3;      // used for generating the fine level null-space

        // build strided maps
        // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
        std::vector<size_t> stridingInfo;
        stridingInfo.push_back(2);
        stridingInfo.push_back(1);

        /////////////////////////////////////// build strided maps
        // build strided maps:
        // xstridedfullmap: full map (velocity and pressure dof gids), continous
        // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
        // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
        Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
        RCP<const StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
        RCP<const StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
        RCP<const StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

        /////////////////////////////////////// transform Xpetra::Map objects to Epetra
        // this is needed for AztecOO
        const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
        RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
        RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

        /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

        // read in problem
        Epetra_CrsMatrix * ptrA = 0;
        Epetra_Vector * ptrf = 0;
        Epetra_MultiVector* ptrNS = 0;

        *out << "Reading matrix market file" << std::endl;

        EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
        EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);
        //EpetraExt::MatrixMarketFileToCrsMatrix("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
        //EpetraExt::MatrixMarketFileToVector("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/b_re1000_5932.txt",*fullmap,ptrf);

        RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
        RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
        RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


        /////////////////////////////////////// split system into 2x2 block system

        *out << "Split matrix into 2x2 block matrix" << std::endl;

        // split fullA into A11,..., A22
        Teuchos::RCP<Epetra_CrsMatrix> A11;
        Teuchos::RCP<Epetra_CrsMatrix> A12;
        Teuchos::RCP<Epetra_CrsMatrix> A21;
        Teuchos::RCP<Epetra_CrsMatrix> A22;

        if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
            *out << "Problem with splitting matrix"<< std::endl;

        /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

        // build Xpetra objects from Epetra_CrsMatrix objects
        Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A11));
        Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A12));
        Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A21));
        Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A22));

        /////////////////////////////////////// generate MapExtractor object

        std::vector<Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > > xmaps;
        xmaps.push_back(xstridedvelmap);
        xmaps.push_back(xstridedpremap);

        Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LocalOrdinal,GlobalOrdinal,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LocalOrdinal,GlobalOrdinal>::Build(xstridedfullmap,xmaps);

        /////////////////////////////////////// build blocked transfer operator
        // using the map extractor
        Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal>(map_extractor,map_extractor,10));
        bOp->setMatrix(0,0,xA11);
        bOp->setMatrix(0,1,xA12);
        bOp->setMatrix(1,0,xA21);
        bOp->setMatrix(1,1,xA22);

        bOp->fillComplete();

        //////////////////////////////////////////////////// create Hierarchy
        RCP<Hierarchy> H = rcp ( new Hierarchy() );
        H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
        //H->setDefaultVerbLevel(Teuchos::VERB_NONE);
        H->SetMaxCoarseSize(maxCoarseSize);

        //////////////////////////////////////////////////////// finest Level
        RCP<MueLu::Level> Finest = H->GetLevel();
        Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
        Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));


        ////////////////////////////////////////// prepare null space for A11
        RCP<MultiVector> nullspace11 = MultiVectorFactory::Build(xstridedvelmap, 2);  // this is a 2D standard null space

        for (int i=0; i<nDofsPerNode-1; ++i) {
            Teuchos::ArrayRCP<Scalar> nsValues = nullspace11->getDataNonConst(i);
            int numBlocks = nsValues.size() / (nDofsPerNode - 1);
            for (int j=0; j< numBlocks; ++j) {
                nsValues[j*(nDofsPerNode - 1) + i] = 1.0;
            }
        }

        Finest->Set("Nullspace1",nullspace11);

        ////////////////////////////////////////// prepare null space for A22
        RCP<MultiVector> nullspace22 = MultiVectorFactory::Build(xstridedpremap, 1);  // this is a 2D standard null space
        Teuchos::ArrayRCP<Scalar> nsValues22 = nullspace22->getDataNonConst(0);
        for (int j=0; j< nsValues22.size(); ++j) {
            nsValues22[j] = 1.0;
        }

        Finest->Set("Nullspace2",nullspace22);


        /////////////////////////////////////////// define rebalanced block AC factory
        // This is the main factory for "A" and defines the input for
        //   - the SubBlockAFactory objects
        //   - the rebalanced block Ac factory
        RCP<RebalanceBlockAcFactory> RebalancedAcFact = rcp(new RebalanceBlockAcFactory());

        /////////////////////////////////////////// define non-rebalanced blocked transfer ops
        RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory()); // use row map index base from bOp
        RCP<GenericRFactory> RFact = rcp(new GenericRFactory());
        RFact->SetFactory("P", PFact);

        // non-rebalanced block coarse matrix factory
        // output is non-rebalanced coarse block matrix Ac
        // used as input for rebalanced block coarse factory RebalancedAcFact
        RCP<Factory> AcFact = rcp(new BlockedRAPFactory());
        AcFact->SetFactory("A", MueLu::NoFactory::getRCP());
        AcFact->SetFactory("P", PFact);  // use non-rebalanced block prolongator as input
        AcFact->SetFactory("R", RFact);  // use non-rebalanced block restrictor as input

        // define matrix sub-blocks of possibly rebalanced block matrix A
        // These are used as input for
        //   - the sub blocks of the transfer operators
        RCP<SubBlockAFactory> A11Fact = Teuchos::rcp(new SubBlockAFactory());
        A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
        A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
        A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
        RCP<SubBlockAFactory> A22Fact = Teuchos::rcp(new SubBlockAFactory());
        A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
        A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
        A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

        /////////////////////////////////////////// define rebalancing factories
        // define sub blocks of the coarse non-rebalanced block matrix Ac
        // input is the block operator generated by AcFact
        RCP<SubBlockAFactory> rebA11Fact = Teuchos::rcp(new SubBlockAFactory());
        rebA11Fact->SetFactory("A",AcFact);
        rebA11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
        rebA11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
        RCP<SubBlockAFactory> rebA22Fact = Teuchos::rcp(new SubBlockAFactory());
        rebA22Fact->SetFactory("A",AcFact);
        rebA22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
        rebA22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

        // define rebalancing factory for coarse block matrix A(1,1)
        RCP<AmalgamationFactory> rebAmalgFact11 = rcp(new AmalgamationFactory());
        rebAmalgFact11->SetFactory("A", rebA11Fact);
        rebAmalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

        RCP<MueLu::IsorropiaInterface<LO, GO, NO> > isoInterface1 = rcp(new MueLu::IsorropiaInterface<LO, GO, NO>());
        isoInterface1->SetFactory("A", rebA11Fact);
        isoInterface1->SetFactory("UnAmalgamationInfo", rebAmalgFact11);

        RCP<MueLu::RepartitionInterface<LO, GO, NO> > repInterface1 = rcp(new MueLu::RepartitionInterface<LO, GO, NO>());
        repInterface1->SetFactory("A", rebA11Fact);
        repInterface1->SetFactory("AmalgamatedPartition", isoInterface1);

        // Repartitioning (creates "Importer" from "Partition")
        RCP<Factory> RepartitionFact = rcp(new RepartitionFactory());
        {
            Teuchos::ParameterList paramList;
            paramList.set("repartition: min rows per proc", 200);
            paramList.set("repartition: max imbalance", 1.3);
            if(rebalanceBlocks == 1)
                paramList.set("repartition: start level",1);
            else
                paramList.set("repartition: start level",10); // supress rebalancing
            RepartitionFact->SetParameterList(paramList);
        }
        RepartitionFact->SetFactory("A", rebA11Fact);
        RepartitionFact->SetFactory("Partition", repInterface1);

        // define rebalancing factory for coarse block matrix A(1,1)
        RCP<AmalgamationFactory> rebAmalgFact22 = rcp(new AmalgamationFactory());
        rebAmalgFact22->SetFactory("A", rebA22Fact);
        rebAmalgFact22->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

        RCP<MueLu::RepartitionInterface<LO, GO, NO> > repInterface2 = rcp(new MueLu::RepartitionInterface<LO, GO, NO>());
        repInterface2->SetFactory("A", rebA22Fact);
        repInterface2->SetFactory("AmalgamatedPartition", isoInterface1);

        // second repartition factory
        RCP<Factory> RepartitionFact2 = rcp(new RepartitionFactory());
        {
            Teuchos::ParameterList paramList;
            paramList.set("repartition: min rows per proc", 100);
            paramList.set("repartition: max imbalance", 1.2);
            if(rebalanceBlocks == 1)
                paramList.set("repartition: start level",1);
            else
                paramList.set("repartition: start level",10); // supress rebalancing
            RepartitionFact2->SetParameterList(paramList);
        }
        RepartitionFact2->SetFactory("A", rebA22Fact);
        RepartitionFact2->SetFactory("Partition", repInterface2); // this is not valid

        ////////////////////////////////////////// build non-rebalanced matrix blocks
        // build factories for transfer operator P(1,1) and R(1,1)
        RCP<AmalgamationFactory> amalgFact11 = rcp(new AmalgamationFactory());
        amalgFact11->SetFactory("A", A11Fact);
        amalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

        RCP<CoalesceDropFactory> dropFact11 = rcp(new CoalesceDropFactory());
        dropFact11->SetFactory("A", A11Fact);
        dropFact11->SetFactory("UnAmalgamationInfo", amalgFact11);
        dropFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

        RCP<UncoupledAggregationFactory> UncoupledAggFact11 = rcp(new UncoupledAggregationFactory());
        UncoupledAggFact11->SetFactory("Graph", dropFact11);
        UncoupledAggFact11->SetMinNodesPerAggregate(9);
        UncoupledAggFact11->SetMaxNeighAlreadySelected(2);
        UncoupledAggFact11->SetOrdering("natural");

        RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
        coarseMapFact11->setStridingData(stridingInfo);
        coarseMapFact11->setStridedBlockId(0);

        RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());
        RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

        Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));
        nspFact11->SetFactory("Nullspace1",P11Fact); // pick "Nullspace1" from Finest level

        //////////////////////////////// define factory manager for (1,1) block
        RCP<FactoryManager> M11 = rcp(new FactoryManager());
        M11->SetFactory("A", A11Fact);  // rebalanced fine-level block operator
        M11->SetFactory("P", P11Fact);  // non-rebalanced transfer operator block P(1,1)
        M11->SetFactory("R", R11Fact);  // non-rebalanced transfer operator block R(1,1)
        M11->SetFactory("Aggregates", UncoupledAggFact11);
        M11->SetFactory("Graph", dropFact11);
        M11->SetFactory("DofsPerNode", dropFact11);
        M11->SetFactory("UnAmalgamationInfo", amalgFact11);
        M11->SetFactory("Nullspace", nspFact11); // TODO check me?
        M11->SetFactory("CoarseMap", coarseMapFact11);
        M11->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

        ////////////////////////////////////////// build non-rebalanced matrix blocks
        // build factories for transfer operator P(2,2) and R(2,2)
        RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory());
        RCP<TentativePFactory> P22Fact = rcp(new TentativePFactory());
        RCP<TransPFactory> R22Fact = rcp(new TransPFactory());

        // connect null space and tentative PFactory
        Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2"));
        nspFact22->SetFactory("Nullspace2", P22Fact); // define null space generated by P22Fact as null space for coarse level (non-rebalanced)

        RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory());
        coarseMapFact22->setStridingData(stridingInfo);
        coarseMapFact22->setStridedBlockId(1);

        //////////////////////////////// define factory manager for (2,2) block
        RCP<FactoryManager> M22 = rcp(new FactoryManager());
        M22->SetFactory("A", A22Fact); // rebalanced fine-level block operator
        M22->SetFactory("P", P22Fact); // non-rebalanced transfer operator P(2,2)
        M22->SetFactory("R", R22Fact); // non-rebalanced transfer operator R(2,2)
        M22->SetFactory("Aggregates", UncoupledAggFact11); // aggregates from block (1,1)
        M22->SetFactory("Nullspace", nspFact22);
        M22->SetFactory("UnAmalgamationInfo", amalgFact22);
        M22->SetFactory("Ptent", P22Fact);
        M22->SetFactory("CoarseMap", coarseMapFact22);
        M22->SetIgnoreUserData(true);

        /////////////////////////////////////////// define rebalanced blocked transfer ops
        //////////////////////////////// define factory manager for (1,1) block
        RCP<FactoryManager> rebM11 = rcp(new FactoryManager());
        rebM11->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
        rebM11->SetFactory("Importer", RepartitionFact);
        rebM11->SetFactory("Nullspace", nspFact11);
        //rebM11->SetIgnoreUserData(true);

        RCP<FactoryManager> rebM22 = rcp(new FactoryManager());
        rebM22->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
        rebM22->SetFactory("Importer", RepartitionFact2); // use dummy repartitioning factory
        rebM22->SetFactory("Nullspace", nspFact22);

        // Reordering of the transfer operators
        RCP<RebalanceBlockInterpolationFactory> RebalancedBlockPFact = rcp(new RebalanceBlockInterpolationFactory());
        RebalancedBlockPFact->SetFactory("P", PFact); // use non-rebalanced block P operator as input
        RebalancedBlockPFact->AddFactoryManager(rebM11);
        RebalancedBlockPFact->AddFactoryManager(rebM22);

        RCP<RebalanceBlockRestrictionFactory> RebalancedBlockRFact = rcp(new RebalanceBlockRestrictionFactory());
        //RebalancedBlockRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction")));
        RebalancedBlockRFact->SetFactory("R", RFact); // non-rebalanced block P operator
        RebalancedBlockRFact->AddFactoryManager(rebM11);
        RebalancedBlockRFact->AddFactoryManager(rebM22);

        ///////////////////////////////////////// initialize non-rebalanced block transfer operators
        // output are the non-rebalanced block transfer operators used as input in AcFact to build
        // the non-rebalanced coarse level block matrix Ac
        PFact->AddFactoryManager(M11);  // use non-rebalanced information from sub block factory manager M11
        PFact->AddFactoryManager(M22);  // use non-rebalanced information from sub block factory manager M22

        ///////////////////////////////////////// initialize rebalanced coarse block AC factory
        RebalancedAcFact->SetFactory("A", AcFact);   // use non-rebalanced block operator as input
        RebalancedAcFact->AddFactoryManager(rebM11);
        RebalancedAcFact->AddFactoryManager(rebM22);

        //////////////////////////////////////////////////////////////////////
        // Smoothers

        //Another factory manager for braes sarazin smoother
        //Schur Complement Factory, using the factory to generate AcFact
        SC omega = 1.3;
        RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
        SFact->SetParameter("omega", Teuchos::ParameterEntry(omega));
        SFact->SetFactory("A", MueLu::NoFactory::getRCP()); // this finally be the rebalanced block operator!

        //Smoother Factory, using SFact as a factory for A
        std::string ifpackSCType;
        Teuchos::ParameterList ifpackSCList;
        ifpackSCList.set("relaxation: sweeps", (LocalOrdinal) 3);
        ifpackSCList.set("relaxation: damping factor", (Scalar) 1.0);
        ifpackSCType = "RELAXATION";
        ifpackSCList.set("relaxation: type", "Gauss-Seidel");
        RCP<SmootherPrototype> smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
        smoProtoSC->SetFactory("A", SFact);
        RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

        RCP<BraessSarazinSmoother> smootherPrototype     = rcp( new BraessSarazinSmoother() );
        smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
        smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));
        smootherPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
        RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );

        RCP<BraessSarazinSmoother> coarseSolverPrototype = rcp( new BraessSarazinSmoother() );
        coarseSolverPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
        coarseSolverPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));
        coarseSolverPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
        RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );

        RCP<FactoryManager> MB = rcp(new FactoryManager());
        MB->SetFactory("A",     SFact);
        MB->SetFactory("Smoother",    SmooSCFact);
        MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
        smootherPrototype->AddFactoryManager(MB,0);
        coarseSolverPrototype->AddFactoryManager(MB,0);


        ////////////////////////////////////////// define main factory manager
        FactoryManager M;
        M.SetFactory("A",            RebalancedAcFact);     // rebalance block AC Factory using importer
        M.SetFactory("P",            RebalancedBlockPFact); // rebalance prolongator using non-balanced Ac
        M.SetFactory("R",            RebalancedBlockRFact); // rebalance restrictor and null space using non-balanced Ac
        M.SetFactory("Smoother",     smootherFact);
        M.SetFactory("PreSmoother",     smootherFact);
        M.SetFactory("PostSmoother",     smootherFact);
        M.SetFactory("CoarseSolver", coarseSolverFact);

        H->Setup(M,0,maxLevels);

        /**out << std::endl;
         *out << "print content of multigrid levels:" << std::endl;

         Finest->print(*out);

         RCP<Level> coarseLevel = H->GetLevel(1);
         coarseLevel->print(*out);

         RCP<Level> coarseLevel2 = H->GetLevel(2);
         coarseLevel2->print(*out);*/

        RCP<MultiVector> xLsg = MultiVectorFactory::Build(xstridedfullmap,1);

        // Use AMG directly as an iterative method
#if 0
        {
            xLsg->putScalar( (SC) 0.0);

            // Epetra_Vector -> Xpetra::Vector
            RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

            // calculate initial (absolute) residual
            Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
            xRhs->norm2(norms);
            *out << "||x_0|| = " << norms[0] << std::endl;

            // apply ten multigrid iterations
            H->Iterate(*xRhs,*xLsg,100);


            // calculate and print residual
            RCP<MultiVector> xTmp = MultiVectorFactory::Build(xstridedfullmap,1);
            bOp->apply(*xLsg,*xTmp,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);
            xRhs->update((SC)-1.0,*xTmp,(SC)1.0);
            xRhs->norm2(norms);
            *out << "||x|| = " << norms[0] << std::endl;
        }
#endif

        //
        // Solve Ax = b using AMG as a preconditioner in AztecOO
        //
        {
            RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
            X->PutScalar(0.0);
            Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

            AztecOO aztecSolver(epetraProblem);
            aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

            MueLu::EpetraOperator aztecPrec(H);
            aztecSolver.SetPrecOperator(&aztecPrec);

            int maxIts = 50;
            double tol = 1e-8;

            aztecSolver.Iterate(maxIts, tol);
        }

#endif // end ifndef HAVE_LONG_LONG_INT
#endif // #if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
        success = true;
    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

    return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP;
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);

  typedef Teuchos::ScalarTraits<SC> ST;

  bool success = false;
  bool verbose = true;
  try {
    RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    //
    // Parameters
    //

    Teuchos::CommandLineProcessor clp(false);
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748);
    Xpetra::Parameters xpetraParameters(clp);

    bool optRecycling           = true;  clp.setOption("recycling",             "no-recycling",             &optRecycling,           "Enable recycling of the multigrid preconditioner");

    /* DO NOT WORK YET
       bool optRecyclingRAPpattern = true;  clp.setOption("recycling-rap-pattern", "no-recycling-rap-pattern", &optRecyclingRAPpattern, "Enable recycling of Ac=RAP pattern");
       bool optRecyclingAPpattern  = false; clp.setOption("recycling-ap-pattern",  "no-recycling-ap-pattern",  &optRecyclingAPpattern,  "Enable recycling of AP pattern");
       */
    bool optRecyclingRAPpattern = false;
    bool optRecyclingAPpattern  = false;

    switch (clp.parse(argc, argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    // option dependencies
    if (optRecycling == false) {
      optRecyclingRAPpattern = false;
      optRecyclingAPpattern  = false;
    }

    //
    // Construct the problems
    //

    RCP<const Map> map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm);
    Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList());
    RCP<Matrix> A1 = Pr->BuildMatrix();

    RCP<Matrix> A2 = Pr->BuildMatrix(); // TODO: generate another problem would be more meaningful (ex: scale A1)

    //
    // First solve
    //

    FactoryManager M;

    Hierarchy H(A1);

    if (optRecycling) {
      // Configuring "Keep" options before running the first setup.

      // Note: "Keep" flags should not be set on the default factories provided by the FactoryManager because in the current
      // implementation of FactoryManager, those factories are freed and reallocated between Hierarchy::Setup() calls (see FactoryManager::Clean() and Hierarchy::Setup()).
      // So we define our own factories here.

      // AGGREGATES:
      // Note: aggregates are only used to build Ptent, so it is not useful to keep them. Keeping Ptent is enough.
      // RCP<Factory> AggFact   = rcp(new CoupledAggregationFactory());
      // M.SetFactory("Aggregates", AggFact);
      // H.Keep("Aggregates", AggFact.get());

      // PTENT:
      RCP<Factory> PtentFact = rcp(new TentativePFactory());
      M.SetFactory("Ptent", PtentFact);
      H.Keep("P",           PtentFact.get());
    }

    RCP<Factory> AcFact = rcp(new RAPFactory());
    M.SetFactory("A", AcFact);

    if (optRecyclingRAPpattern) {
      H.Keep("RAP graph", AcFact.get());
    }
    if (optRecyclingAPpattern) {
      H.Keep("AP graph", AcFact.get());
    }
    //

    H.Setup(M);

    {
      RCP<Vector> X = VectorFactory::Build(map);
      RCP<Vector> B = VectorFactory::Build(map);

      X->putScalar((Scalar) 0.0);
      B->setSeed(846930886); B->randomize();

      int nIts = 9;
      H.Iterate(*B, *X, nIts);

      ST::magnitudeType residualNorms = Utilities::ResidualNorm(*A1, *X, *B)[0];
      if (comm->getRank() == 0)
        std::cout << "||Residual|| = " << residualNorms << std::endl;
    }

    //
    // Second solve
    //

    std::cout << "Status of the preconditioner between runs:" << std::endl;
    H.print(*getFancyOStream(Teuchos::rcpFromRef(std::cout)), MueLu::High);

    // Change the problem
    RCP<Level> finestLevel = H.GetLevel(0);
    finestLevel->Set("A", A2);

    if (optRecycling) {
      // Optional: this makes sure that the aggregates are never requested (and built) during the second run.
      // If someone request the aggregates, an exception will be thrown.
      M.SetFactory("Aggregates", MueLu::NoFactory::getRCP());
    }

    // Redo the setup
    H.Setup(M);

    {
      RCP<Vector> X = VectorFactory::Build(map);
      RCP<Vector> B = VectorFactory::Build(map);

      X->putScalar((Scalar) 0.0);
      B->setSeed(846930886); B->randomize();

      int nIts = 9;
      H.Iterate(*B, *X, nIts);

      ST::magnitudeType residualNorms = Utilities::ResidualNorm(*A2, *X, *B)[0];
      if (comm->getRank() == 0)
        std::cout << "||Residual|| = " << residualNorms << std::endl;
    }

    //
    // Clean-up
    //

    // Remove kept data from the preconditioner. This will force recomputation on future runs. "Keep" flags are also removed.

    if (optRecycling) {
      //if aggregates explicitly kept: H.Delete("Aggregates", M.GetFactory("Aggregates").get());
      H.Delete("P",           M.GetFactory("Ptent").get());
    }
    if (optRecyclingRAPpattern) {
      H.Delete("RAP graph", M.GetFactory("A").get());
    }
    if (optRecyclingAPpattern) {
      H.Delete("AP graph", M.GetFactory("A").get());
    }

    std::cout << "Status of the preconditioner at the end:" << std::endl;
    H.print(*getFancyOStream(Teuchos::rcpFromRef(std::cout)), MueLu::High);

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;

  //
  // MPI initialization
  //

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, &blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    //
    // Process command line arguments
    //
    Teuchos::CommandLineProcessor  clp(false);
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 81); // manage parameters of the test case
    Xpetra::Parameters             xpetraParameters(clp);     // manage parameters of xpetra

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
      default:;
    }

    if (comm->getRank() == 0) std::cout << xpetraParameters << matrixParameters;

    //
    // Setup test case (Ax = b)
    //

    // Linear Algebra Library
    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    // Distribution
    RCP<const Map> map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);

    // Matrix
    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC, LO, GO, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList());
    RCP<Matrix> A = Pr->BuildMatrix();

    // User defined nullspace
    RCP<MultiVector> nullSpace = VectorFactory::Build(map,1); nullSpace->putScalar((SC) 1.0);

    // Define B
    RCP<Vector> X = VectorFactory::Build(map,1);
    RCP<Vector> B = VectorFactory::Build(map,1);
    X->setSeed(846930886);
    X->randomize();
    A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0);

    // X = 0
    X->putScalar((SC) 0.0);

    //
    // Create a multigrid configuration
    //

    // Transfer operators
    RCP<TentativePFactory> TentativePFact = rcp( new TentativePFactory() );
    RCP<SaPFactory>        SaPFact        = rcp( new SaPFactory() );
    RCP<TransPFactory>     RFact          = rcp( new TransPFactory());

    FactoryManager M;
    M.SetFactory("Ptent", TentativePFact);
    M.SetFactory("P",     SaPFact);
    M.SetFactory("R",     RFact);

    M.SetFactory("Smoother", Teuchos::null);      //skips smoother setup
    M.SetFactory("CoarseSolver", Teuchos::null);  //skips coarsest solve setup

    //
    // Multigrid setup phase
    //

    int startLevel = 0;
    int maxLevels = 10;

    std::cout << "=============== Setup transfers only ====================" << std::endl;

    Hierarchy H;
    H.SetDefaultVerbLevel(MueLu::Medium);

    RCP<Level> finestLevel = H.GetLevel();
    finestLevel->Set("A", A);
    finestLevel->Set("Nullspace", nullSpace);

    // Indicate which Hierarchy operators we want to keep
    H.Keep("P", SaPFact.get());  //SaPFact is the generating factory for P.
    H.Keep("R", RFact.get());    //RFact is the generating factory for R.
    H.Keep("Ptent", TentativePFact.get());  //SaPFact is the generating factory for P.

    H.Setup(M,startLevel,maxLevels);

    std::cout << "=============== Setup smoothers only ====================" << std::endl;

    // Create a new A.
    RCP<Matrix> newA = Pr->BuildMatrix();
    finestLevel->Set("A", newA);

    // Create Gauss-Seidel smoother.
    std::string ifpackType = "RELAXATION";
    Teuchos::ParameterList ifpackList;
    ifpackList.set("relaxation: sweeps", (LO) 3);
    ifpackList.set("relaxation: damping factor", (SC) 1.0);
    RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(ifpackType, ifpackList));

    M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype)));

    // Create coarsest solver.
    RCP<SmootherPrototype> coarseSolverPrototype = rcp( new DirectSolver() );
    RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );
    M.SetFactory("CoarseSolver", coarseSolverFact);

    // Note that we pass the number of levels back in.
    H.Setup(M,startLevel, H.GetNumLevels());

    std::cout << "=============== Solve ====================" << std::endl;

    //
    // Solve Ax = B
    //

    LO nIts = 9;
    H.Iterate(*B, *X, nIts);

    //
    // Print relative residual norm
    //

    Teuchos::ScalarTraits<SC>::magnitudeType residualNorms = Utilities::ResidualNorm(*A, *X, *B)[0];
    if (comm->getRank() == 0) {
      std::ios::fmtflags f(std::cout.flags());
      std::cout << "||Residual|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorms << std::endl;
       std::cout.flags(f);
    }

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
示例#8
0
  TEUCHOS_UNIT_TEST(GenericRFactory, SymmetricProblem)
  {
    out << "version: " << MueLu::Version() << std::endl;
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    // generate problem
    LO maxLevels = 3;
    LO nEle = 63;
    const RCP<const Map> map = MapFactory::Build(TestHelpers::Parameters::getLib(), nEle, 0, comm);
    Teuchos::ParameterList matrixParameters;
    matrixParameters.set("nx",nEle);

    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC, LO, GO, Map, CrsMatrixWrap, MultiVector>("Laplace1D", map, matrixParameters);
    RCP<Matrix> Op = Pr->BuildMatrix();

    // build nullspace
    RCP<MultiVector> nullSpace = MultiVectorFactory::Build(map,1);
    nullSpace->putScalar( (SC) 1.0);
    Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
    nullSpace->norm1(norms);
    if (comm->getRank() == 0)
      out << "||NS|| = " << norms[0] << std::endl;

    // fill hierarchy
    RCP<Hierarchy> H = rcp( new Hierarchy() );
    H->setDefaultVerbLevel(Teuchos::VERB_HIGH);

    RCP<Level> Finest = H->GetLevel();
    Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    Finest->Set("A",Op);                      // set fine level matrix
    Finest->Set("Nullspace",nullSpace);       // set null space information for finest level

    // define transfer operators
    RCP<CoupledAggregationFactory> CoupledAggFact = rcp(new CoupledAggregationFactory());
    CoupledAggFact->SetMinNodesPerAggregate(3);
    CoupledAggFact->SetMaxNeighAlreadySelected(0);
    CoupledAggFact->SetOrdering("natural");
    CoupledAggFact->SetPhase3AggCreation(0.5);

    RCP<SaPFactory>         Pfact = rcp( new SaPFactory());
    RCP<Factory>           Rfact = rcp( new GenericRFactory() );
    H->SetMaxCoarseSize(1);

    // setup smoothers
    Teuchos::ParameterList smootherParamList;
    smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel");
    smootherParamList.set("relaxation: sweeps", (LO) 1);
    smootherParamList.set("relaxation: damping factor", (SC) 1.0);
    RCP<SmootherPrototype> smooProto = rcp( new TrilinosSmoother("RELAXATION", smootherParamList) );
    RCP<SmootherFactory> SmooFact = rcp( new SmootherFactory(smooProto) );
    //Acfact->setVerbLevel(Teuchos::VERB_HIGH);

    RCP<SmootherFactory> coarseSolveFact = rcp(new SmootherFactory(smooProto, Teuchos::null));

    FactoryManager M;
    M.SetFactory("P", Pfact);
    M.SetFactory("R", Rfact);
    M.SetFactory("Aggregates", CoupledAggFact);
    M.SetFactory("Smoother", SmooFact);
    M.SetFactory("CoarseSolver", coarseSolveFact);

    H->Setup(M, 0, maxLevels);

    RCP<Level> coarseLevel = H->GetLevel(1);
    RCP<Matrix> P1 = coarseLevel->Get< RCP<Matrix> >("P");
    RCP<Matrix> R1 = coarseLevel->Get< RCP<Matrix> >("R");
    RCP<Level> coarseLevel2 = H->GetLevel(2);
    RCP<Matrix> P2 = coarseLevel2->Get< RCP<Matrix> >("P");
    RCP<Matrix> R2 = coarseLevel2->Get< RCP<Matrix> >("R");

    TEST_EQUALITY(Finest->IsAvailable("PreSmoother"), true);
    TEST_EQUALITY(Finest->IsAvailable("PostSmoother"), true);
    TEST_EQUALITY(coarseLevel->IsAvailable("PreSmoother"), true);
    TEST_EQUALITY(coarseLevel->IsAvailable("PostSmoother"), true);
    TEST_EQUALITY(coarseLevel2->IsAvailable("PreSmoother"), true);
    TEST_EQUALITY(coarseLevel2->IsAvailable("PostSmoother"), false);

    // test some basic multgrid data
    TEST_EQUALITY(P1->getGlobalNumEntries(), R1->getGlobalNumEntries());
    TEST_EQUALITY(P1->getGlobalNumRows(), R1->getGlobalNumCols());
    TEST_EQUALITY(P1->getGlobalNumCols(), R1->getGlobalNumRows());
    TEST_EQUALITY(P2->getGlobalNumEntries(), R2->getGlobalNumEntries());
    TEST_EQUALITY(P2->getGlobalNumRows(), R2->getGlobalNumCols());
    TEST_EQUALITY(P2->getGlobalNumCols(), R2->getGlobalNumRows());


    //RCP<Teuchos::FancyOStream> fos = getFancyOStream(Teuchos::rcpFromRef(cout));

    // since A is chosen symmetric, it is P^T = R
    // check P^T * P = R * P
    // note: the Epetra matrix-matrix multiplication using implicit transpose is buggy in parallel case
    //       (for multiplication of a square matrix with a rectangular matrix)
    //       however it seems to work for two rectangular matrices
    Teuchos::RCP<Xpetra::Matrix<Scalar,LO,GO> > RP = MueLu::Utils<Scalar,LO,GO>::Multiply(*R1,false,*P1,false,out);
    Teuchos::RCP<Xpetra::Matrix<Scalar,LO,GO> > PtP = MueLu::Utils<Scalar,LO,GO>::Multiply(*P1,true,*P1,false,out);

    RCP<Vector> x = VectorFactory::Build(RP->getDomainMap());
    RCP<Vector> bRP  = VectorFactory::Build(RP->getRangeMap());
    RCP<Vector> bPtP = VectorFactory::Build(PtP->getRangeMap());

    x->randomize();
    RP->apply(*x,*bRP);
    PtP->apply(*x,*bPtP);

    TEST_EQUALITY(bRP->norm1() - bPtP->norm1() < 1e-12, true);

    Teuchos::RCP<Xpetra::Matrix<Scalar,LO,GO> > RP2 = MueLu::Utils<Scalar,LO,GO>::Multiply(*R2,false,*P2,false,out);
    Teuchos::RCP<Xpetra::Matrix<Scalar,LO,GO> > PtP2 = MueLu::Utils<Scalar,LO,GO>::Multiply(*P2,true,*P2,false,out);

    x = VectorFactory::Build(RP2->getDomainMap());
    bRP  = VectorFactory::Build(RP2->getRangeMap());
    bPtP = VectorFactory::Build(PtP2->getRangeMap());

    x->randomize();
    RP2->apply(*x,*bRP);
    PtP2->apply(*x,*bPtP);

    TEST_EQUALITY(bRP->norm1() - bPtP->norm1() < 1e-12, true);


    //R1->describe(*fos,Teuchos::VERB_EXTREME);

    /*RCP<CrsMatrixWrap> crsP1 = rcp_dynamic_cast<CrsMatrixWrap>(P1);
      RCP<CrsMatrix> crsMat = crsP1->getCrsMatrix();
      RCP<Xpetra::EpetraCrsMatrix> epcrsMat = rcp_dynamic_cast<Xpetra::EpetraCrsMatrix>(crsMat);
      RCP<Epetra_CrsMatrix> epMat = epcrsMat->getEpetra_CrsMatrixNonConst();
      EpetraExt::RowMatrixToMatrixMarketFile( "Test.mat", *epMat);*/

    //P1->describe(*fos,Teuchos::VERB_EXTREME);

    //R1->getRangeMap()->describe(*fos,Teuchos::VERB_EXTREME);
    //P1->describe(*fos,Teuchos::VERB_EXTREME);
    //R1->describe(*fos,Teuchos::VERB_EXTREME);



  }
示例#9
0
int main(int argc, char *argv[]) {
  using Teuchos::RCP;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  out->setOutputToRootOnly(0);
  *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

  // Timing
  Teuchos::Time myTime("global");
  Teuchos::TimeMonitor M(myTime);

#ifndef HAVE_TEUCHOS_LONG_LONG_INT
  *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

  // custom parameters
  LO maxLevels = 10;
  LO its=40;
  std::string smooType="sgs";
  int sweeps=1;
  int maxCoarseSize=1;  //FIXME clp doesn't like long long int
  std::string aggOrdering = "natural";
  int minPerAgg=2;
  int maxNbrAlreadySelected=0;

  // read in problem
  Epetra_Map emap(10201,0,*Xpetra::toEpetra(comm));
  Epetra_CrsMatrix * ptrA = 0;
  Epetra_Vector * ptrf = 0;

  std::cout << "Reading matrix market file" << std::endl;
  EpetraExt::MatrixMarketFileToCrsMatrix("Condif2Mat.mat",emap,emap,emap,ptrA);
  EpetraExt::MatrixMarketFileToVector("Condif2Rhs.mat",emap,ptrf);
  RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
  RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);

  // Epetra_CrsMatrix -> Xpetra::Matrix
  RCP<Xpetra::CrsMatrix<double, int, int> > exA = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(epA));
  RCP<Xpetra::CrsMatrixWrap<double, int, int> > crsOp = Teuchos::rcp(new Xpetra::CrsMatrixWrap<double, int, int>(exA));
  RCP<Xpetra::Matrix<double, int, int> > Op = Teuchos::rcp_dynamic_cast<Xpetra::Matrix<double, int, int> >(crsOp);

  // Epetra_Vector -> Xpetra::Vector
  RCP<Xpetra::Vector<double,int,int> > xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

  // Epetra_Map -> Xpetra::Map
  const RCP< const Xpetra::Map<int, int> > map = Xpetra::toXpetra(emap);

  // build nullspace
  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(map,1);
  nullSpace->putScalar( (SC) 1.0);
  /*for (size_t i=0; i<nullSpace->getLocalLength(); i++) {
    Teuchos::ArrayRCP< Scalar > data0 = nullSpace->getDataNonConst(0);
    Teuchos::ArrayRCP< Scalar > data1 = nullSpace->getDataNonConst(1);

    GlobalOrdinal gid = map->getGlobalElement(Teuchos::as<LocalOrdinal>(i));
    if(gid % 2 == 0) {
      data0[i] = 1.0; data1[i] = 0.0;
    }
    else {
      data0[i] = 0.0; data1[i] = 1.0;
    }
  }*/

  RCP<MueLu::Hierarchy<SC,LO,GO,NO,LMO> > H = rcp ( new Hierarchy() );
  H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  H->SetMaxCoarseSize((GO) maxCoarseSize);;

  // build finest Level
  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->Set("A",Op);
  Finest->Set("Nullspace",nullSpace);

  RCP<CoupledAggregationFactory> CoupledAggFact = rcp(new CoupledAggregationFactory());
  *out << "========================= Aggregate option summary  =========================" << std::endl;
  *out << "min DOFs per aggregate :                " << minPerAgg << std::endl;
  *out << "min # of root nbrs already aggregated : " << maxNbrAlreadySelected << std::endl;
  CoupledAggFact->SetMinNodesPerAggregate(minPerAgg);  //TODO should increase if run anything other than 1D
  CoupledAggFact->SetMaxNeighAlreadySelected(maxNbrAlreadySelected);
  std::transform(aggOrdering.begin(), aggOrdering.end(), aggOrdering.begin(), ::tolower);
  if (aggOrdering == "natural") {
       *out << "aggregate ordering :                    NATURAL" << std::endl;
       CoupledAggFact->SetOrdering(MueLu::AggOptions::NATURAL);
  } else if (aggOrdering == "random") {
       *out << "aggregate ordering :                    RANDOM" << std::endl;
       CoupledAggFact->SetOrdering(MueLu::AggOptions::RANDOM);
  } else if (aggOrdering == "graph") {
       *out << "aggregate ordering :                    GRAPH" << std::endl;
       CoupledAggFact->SetOrdering(MueLu::AggOptions::GRAPH);
  } else {
    std::string msg = "main: bad aggregation option """ + aggOrdering + """.";
    throw(MueLu::Exceptions::RuntimeError(msg));
  }
  CoupledAggFact->SetPhase3AggCreation(0.5);
  *out << "=============================================================================" << std::endl;

  // build transfer operators
  RCP<TentativePFactory> TentPFact = rcp(new TentativePFactory(CoupledAggFact));

  *out << " afer TentativePFactory " << std::endl;
  //RCP<TentativePFactory> Pfact = rcp(new TentativePFactory(CoupledAggFact));
  //RCP<Factory>          Rfact = rcp( new TransPFactory(Pfact));
  //RCP<SaPFactory>       Pfact = rcp( new SaPFactory(TentPFact) );
  //RCP<Factory>         Rfact = rcp( new TransPFactory(Pfact));
  RCP<ThresholdAFilterFactory> Afiltered = rcp(new ThresholdAFilterFactory("A",NULL,0.0005));
  RCP<PgPFactory>       Pfact = rcp( new PgPFactory(TentPFact,Afiltered) );
  RCP<Factory>         Rfact = rcp( new GenericRFactory(Pfact));
  RCP<RAPFactory>       Acfact = rcp( new RAPFactory(Pfact, Rfact) );
  Acfact->setVerbLevel(Teuchos::VERB_HIGH);

  *out << " after ACFactory " << std::endl;

  // build level smoothers

  RCP<SmootherPrototype> smooProto;
  std::string ifpackType;
  Teuchos::ParameterList ifpackList;
  ifpackList.set("relaxation: sweeps", (LO) sweeps);
  ifpackList.set("relaxation: damping factor", (SC) 0.9); // 0.7
    ifpackType = "RELAXATION";
    ifpackList.set("relaxation: type", "Gauss-Seidel");


  smooProto = Teuchos::rcp( new TrilinosSmoother(Xpetra::UseEpetra, ifpackType, ifpackList) );
  RCP<SmootherFactory> SmooFact;
  if (maxLevels > 1)
    SmooFact = rcp( new SmootherFactory(smooProto) );

  Teuchos::ParameterList status;

#if 0  // both variants are equivalent
  // fill FactoryManager object by hand
  FactoryManager Manager;
  Manager.SetFactory("A",            Acfact);
  Manager.SetFactory("P",            Pfact);
  Manager.SetFactory("R",            Rfact);
  Manager.SetFactory("Smoother",     SmooFact);
  Manager.SetFactory("CoarseSolver", Teuchos::null);

  status = H->Setup(Manager);
#else
  // use FullPopulate (short, but outdated?)
  status = H->FullPopulate(*Pfact,*Rfact,*Acfact,*SmooFact,0,maxLevels);
#endif

  H->SetCoarsestSolver(*SmooFact,MueLu::PRE);


  *out  << "======================\n Multigrid statistics \n======================" << std::endl;
  status.print(*out,Teuchos::ParameterList::PrintOptions().indent(2));

  /**********************************************************************************/
  /* SOLVE PROBLEM                                                                  */
  /**********************************************************************************/

  RCP<MultiVector> x = MultiVectorFactory::Build(map,1);
  RCP<Xpetra::MultiVector<double,int,int> > rhs = Teuchos::rcp_dynamic_cast<Xpetra::MultiVector<double,int,int> >(xRhs);//(new Xpetra::EpetraVector(epv));

  // Use AMG directly as an iterative method
  {
    x->putScalar( (SC) 0.0);

    H->Iterate(*rhs,its,*x);

    //x->describe(*out,Teuchos::VERB_EXTREME);
  }

  RCP<Level> coarseLevel = H->GetLevel(1);
  coarseLevel->print(*out);
  RCP<Level> coarseLevel2 = H->GetLevel(1);
  coarseLevel2->print(*out);


  //M.summarize();

  return EXIT_SUCCESS;
}
  TEUCHOS_UNIT_TEST(MultiVectorTransferFactory, ThreeLevels)
  {
    out << "version: " << MueLu::Version() << std::endl;

    out << "Tests usage on a three-level hierarchy." << std::endl;

    GO nx = 199;
    RCP<Matrix> A = TestHelpers::TestFactory<SC, LO, GO, NO, LMO>::Build1DPoisson(nx);


    // Set up three level hierarchy.
    RCP<Hierarchy> H = rcp( new Hierarchy() );
    H->setDefaultVerbLevel(Teuchos::VERB_HIGH);

    RCP<Level> fineLevel = H->GetLevel();
    fineLevel->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    fineLevel->Set("A",A);                       // set fine level matrix
    RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A->getRowMap(),1);
    nullSpace->putScalar( (SC) 1.0);
    fineLevel->Set("Nullspace",nullSpace);       // set null space information for finest level

    RCP<CoupledAggregationFactory> CoupledAggFact = rcp(new CoupledAggregationFactory());
    CoupledAggFact->SetMinNodesPerAggregate(3);
    CoupledAggFact->SetMaxNeighAlreadySelected(0);
    CoupledAggFact->SetOrdering(MueLu::AggOptions::NATURAL);
    CoupledAggFact->SetPhase3AggCreation(0.5);

    RCP<TentativePFactory> PFact  = rcp(new TentativePFactory()); //just using plain aggregation
    RCP<Factory>           RFact  = rcp(new TransPFactory());
    RCP<RAPFactory>        AcFact = rcp(new RAPFactory());
    H->SetMaxCoarseSize(1);

    Teuchos::ParameterList smootherParamList;
    smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel");
    smootherParamList.set("relaxation: sweeps", (LO) 1);
    smootherParamList.set("relaxation: damping factor", (SC) 1.0);
    RCP<SmootherPrototype> smooProto = rcp( new TrilinosSmoother("RELAXATION", smootherParamList) );
    RCP<SmootherFactory> SmooFact = rcp( new SmootherFactory(smooProto) );
    AcFact->setVerbLevel(Teuchos::VERB_HIGH);

    FactoryManager M;
    M.SetFactory("Aggregates", CoupledAggFact);
    M.SetFactory("P", PFact);
    M.SetFactory("Ptent", PFact); // for nullspace
    M.SetFactory("R", RFact);
    M.SetFactory("A", AcFact);
    M.SetFactory("Smoother", SmooFact);
    M.SetFactory("CoarseSolver", SmooFact); // This line avoid dependency to Amesos/Amesos2 for this test.

    //set up the transfer factory
    RCP<MultiVector> fineOnes = MultiVectorFactory::Build(A->getRowMap(),1);
    fineOnes->putScalar(1.0);
    fineLevel->Set("onesVector",fineOnes);
    RCP<MueLu::MultiVectorTransferFactory<SC, LO, GO, NO, LMO> > mvtf = rcp(new MueLu::MultiVectorTransferFactory<SC, LO, GO, NO, LMO>("onesVector"));
    mvtf->SetFactory("R",RFact);
    M.SetFactory("onesVector",mvtf);
    AcFact->AddTransferFactory(mvtf);

    int maxLevels = 3;
    H->Setup(M, 0, maxLevels);

/*
    //FIXME we probably need to do some requests....
    coarseLevel.Request("onesVector",mvtf.get());
    coarseLevel.Request("R",RFact.get());
    coarseLevel.Request("P",TentativePFact.get());
*/

/*
    RCP<MultiVector> coarseOnes = coarseLevel.Get<RCP<MultiVector> >("onesVector",mvtf.get());
    Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> vn(1);
    coarseOnes->norm2(vn);

    TEST_FLOATING_EQUALITY(vn[0]*vn[0],((SC)fineOnes->getGlobalLength()),1e-12);
*/
  } // ThreeLevels
示例#11
0
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP; using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    //#ifndef HAVE_XPETRA_INT_LONG_LONG
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
    //#endif

    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);

    Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    //RCP<TimeMonitor> globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));

    xpetraParameters.check();

    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    if (comm->getRank() == 0) {
      std::cout << xpetraParameters;
      // TODO: print custom parameters // Or use paramList::print()!
    }

    /**********************************************************************************/
    /* CREATE INITIAL MATRIX                                                          */
    /**********************************************************************************/
    RCP<const Map> bigMap;
    RCP<const Map> map1;
    RCP<const Map> map2;
    GO numElements = 500;
    GO numElements1 = 400;
    GO numElements2 = 100;

    //bigMap = MapFactory::Build(Xpetra::UseEpetra, numElements,  0, comm); // ok this is the problem :-)
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(1);
    map1   = StridedMapFactory::Build(lib, numElements1, 0, stridingInfo, comm, -1);
    map2   = StridedMapFactory::Build(lib, numElements2, numElements1, stridingInfo, comm, -1);

    std::vector<GlobalOrdinal> localGids; // vector with all local GIDs on cur proc
    Teuchos::ArrayView< const GlobalOrdinal > map1eleList = map1->getNodeElementList(); // append all local gids from map1 and map2
    localGids.insert(localGids.end(), map1eleList.begin(), map1eleList.end());
    Teuchos::ArrayView< const GlobalOrdinal > map2eleList = map2->getNodeElementList();
    localGids.insert(localGids.end(), map2eleList.begin(), map2eleList.end());
    Teuchos::ArrayView<GlobalOrdinal> eleList(&localGids[0],localGids.size());
    bigMap = StridedMapFactory::Build(lib, numElements, eleList, 0, stridingInfo, comm); // create full big map (concatenation of map1 and map2)

    std::vector<Teuchos::RCP<const Map> > maps;
    maps.push_back(map1); maps.push_back(map2);

    Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LO, GO, Node> > mapExtractor = Xpetra::MapExtractorFactory<Scalar,LO,GO,Node>::Build(bigMap, maps);

    RCP<CrsMatrixWrap> Op11 = MueLuTests::GenerateProblemMatrix(map1,2,-1,-1);
    RCP<CrsMatrixWrap> Op22 = MueLuTests::GenerateProblemMatrix(map2,3,-2,-1);

    /*Op11->describe(*out,Teuchos::VERB_EXTREME);
      Op22->describe(*out,Teuchos::VERB_EXTREME);*/

    // build blocked operator
    Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO>(mapExtractor,mapExtractor,10));

    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > crsMat11 = Op11->getCrsMatrix();
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > crsMat22 = Op22->getCrsMatrix();
    bOp->setMatrix(0,0,crsMat11);
    bOp->setMatrix(1,1,crsMat22);

    bOp->fillComplete();

    // build hierarchy
    Hierarchy H;
    H.SetMaxCoarseSize(50);
    RCP<Level> levelOne = H.GetLevel();
    levelOne->Set("A", Teuchos::rcp_dynamic_cast<Matrix>(bOp)); // set blocked operator

    RCP<SubBlockAFactory> A11Fact = Teuchos::rcp(new SubBlockAFactory());
    A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
    A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
    RCP<SubBlockAFactory> A22Fact = Teuchos::rcp(new SubBlockAFactory());
    A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
    A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

    RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());
    RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

    RCP<TentativePFactory> P22TentFact = rcp(new TentativePFactory());
    RCP<PgPFactory> P22Fact = rcp(new PgPFactory());
    RCP<GenericRFactory> R22Fact = rcp(new GenericRFactory());

    std::string ifpackType;
    Teuchos::ParameterList ifpackList;
    ifpackList.set("relaxation: sweeps", (LO) 5);
    ifpackList.set("relaxation: damping factor", (SC) 1.0);
    ifpackType = "RELAXATION";
    ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");
    RCP<SmootherPrototype> smoProto11     = rcp( new TrilinosSmoother(ifpackType, ifpackList, 0) );
    smoProto11->SetFactory("A", A11Fact);
    RCP<SmootherPrototype> smoProto22     = rcp( new TrilinosSmoother(ifpackType, ifpackList, 0) );
    smoProto22->SetFactory("A", A22Fact);

    //RCP<SmootherPrototype> smoProto11     = rcp( new DirectSolver("", Teuchos::ParameterList(), A11Fact) );
    //RCP<SmootherPrototype> smoProto22     = rcp( new DirectSolver("", Teuchos::ParameterList(), A22Fact) );

    RCP<SmootherFactory> Smoo11Fact = rcp( new SmootherFactory(smoProto11) );
    RCP<SmootherFactory> Smoo22Fact = rcp( new SmootherFactory(smoProto22) );

    RCP<FactoryManager> M11 = rcp(new FactoryManager());
    M11->SetFactory("A", A11Fact);
    M11->SetFactory("P", P11Fact);
    M11->SetFactory("Ptent", P11Fact); //for Nullspace
    M11->SetFactory("R", R11Fact);
    M11->SetFactory("Smoother", Smoo11Fact);
    M11->SetIgnoreUserData(true);

    RCP<FactoryManager> M22 = rcp(new FactoryManager());
    M22->SetFactory("A", A22Fact);
    M22->SetFactory("P", P22Fact);
    M22->SetFactory("R", R22Fact);
    M22->SetFactory("Ptent", P22TentFact); //for both P22 and Nullspace
    M22->SetFactory("Smoother", Smoo22Fact);
    M22->SetIgnoreUserData(true);

    RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory());
    PFact->AddFactoryManager(M11);
    PFact->AddFactoryManager(M22);

    RCP<GenericRFactory> RFact = rcp(new GenericRFactory());

    RCP<Factory> AcFact = rcp(new BlockedRAPFactory());

    // Smoothers
    RCP<BlockedGaussSeidelSmoother> smootherPrototype     = rcp( new BlockedGaussSeidelSmoother() );
    smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(2));
    smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(1.0));
    smootherPrototype->AddFactoryManager(M11,0);
    smootherPrototype->AddFactoryManager(M22,1);
    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );

    // Coarse grid correction
    RCP<BlockedGaussSeidelSmoother> coarseSolverPrototype = rcp( new BlockedGaussSeidelSmoother() );
    coarseSolverPrototype->AddFactoryManager(M11,0);
    coarseSolverPrototype->AddFactoryManager(M22,1);
    RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );

    // main factory manager
    FactoryManager M;
    M.SetFactory("A",            AcFact);
    M.SetFactory("P",            PFact);
    M.SetFactory("R",            RFact);
    M.SetFactory("Smoother",     smootherFact); // TODO fix me
    M.SetFactory("CoarseSolver", coarseSolverFact);

    H.SetVerbLevel(MueLu::Test);
    H.Setup(M);

    std::cout << "main AcFact = " << AcFact.get() << std::endl;
    RCP<Level> l0 = H.GetLevel(0);
    RCP<Level> l1 = H.GetLevel(1);
    RCP<Level> l2 = H.GetLevel(2);

    l0->print(*out,Teuchos::VERB_EXTREME);
    l1->print(*out,Teuchos::VERB_EXTREME);
    l2->print(*out,Teuchos::VERB_EXTREME);

    // Define B
    RCP<Vector> X = VectorFactory::Build(bigMap,1);
    RCP<Vector> B = VectorFactory::Build(bigMap,1);
    X->setSeed(846930886);
    X->randomize();
    bOp->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0);

    // X = 0
    X->putScalar((SC) 0.0);

    LO nIts = 9;
    H.Iterate(*B, *X, nIts);

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
示例#12
0
文件: Emin.cpp 项目: yunkb/trilinos
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  /**********************************************************************************/
  /* SET TEST PARAMETERS                                                            */
  /**********************************************************************************/
  // Note: use --help to list available options.
  Teuchos::CommandLineProcessor clp(false);

  // Default is Laplace1D with nx = 8748.
  // It's a nice size for 1D and perfect aggregation. (6561=3^8)
  //Nice size for 1D and perfect aggregation on small numbers of processors. (8748=4*3^7)
  Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case
  Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

  // custom parameters
  int nSmoothers=2;
  LO maxLevels = 3;
  LO its=10;
  std::string coarseSolver="ifpack2";
  // std::string coarseSolver="amesos2";
  int pauseForDebugger=0;
  clp.setOption("nSmoothers",&nSmoothers,"number of Gauss-Seidel smoothers in the MergedSmootehrs");
  clp.setOption("maxLevels",&maxLevels,"maximum number of levels allowed. If 1, then a MergedSmoother is used on the coarse grid");
  clp.setOption("its",&its,"number of multigrid cycles");
  clp.setOption("coarseSolver",&coarseSolver,"amesos2 or ifpack2 (Tpetra specific. Ignored for Epetra)");
  clp.setOption("debug",&pauseForDebugger,"pause to attach debugger");

  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }

  matrixParameters.check();
  xpetraParameters.check();
  // TODO: check custom parameters

  if (comm->getRank() == 0) {
    // matrixParameters.print();
    // xpetraParameters.print();
    // TODO: print custom parameters
  }

  if (pauseForDebugger) {
    Utils::PauseForDebugger();
  }

  /**********************************************************************************/
  /* CREATE INITIAL MATRIX                                                          */
  /**********************************************************************************/
  const RCP<const Map> map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm);
  RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap
  RCP<Matrix> Op = Pr->BuildMatrix();

#ifdef NEUMANN
  // Tranform matrix to Neumann b.c.
  // Essentially, we need to update two diagonal elements

  // TODO: calls to getLocalRowView not really needed

  Op->resumeFill();

  Teuchos::ArrayView<const LO> indices;
  Teuchos::ArrayView<const SC> values;
  Teuchos::Array<SC> newValues(2, 0.0);

  size_t myRank = Op->getRowMap()->getComm()->getRank();
  size_t nCpus  = Op->getRowMap()->getComm()->getSize();
  if (myRank == 0) { // JG TODO: can we use rowMap->isNodeLocalElement(0) instead for more genericity?
    //LO firstRow = 0;
    newValues[0] = 1.0; newValues[1] = -1.0;
    Op->getLocalRowView(0, indices, values);
    Op->replaceLocalValues(0, indices, newValues);
  }
  if (myRank == nCpus-1) { // JG TODO: can we use rowMap->isNodeLocalElement(lastRow) instead for more genericity?
    LO lastRow = Op->getNodeNumRows()-1;
    newValues[0] = -1.0; newValues[1] = 1.0;
    Op->getLocalRowView(lastRow, indices, values);
    Op->replaceLocalValues(lastRow, indices, newValues);
  }

  Op->fillComplete();
#endif // NEUMANN

  /**********************************************************************************/
  /*                                                                                */
  /**********************************************************************************/

  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(map,1);
  nullSpace->putScalar( (SC) 1.0);
  Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
  nullSpace->norm1(norms);
  if (comm->getRank() == 0)
    std::cout << "||NS|| = " << norms[0] << std::endl;

  RCP<MueLu::Hierarchy<SC,LO,GO,NO,LMO> > H = rcp( new Hierarchy() );
  H->SetDefaultVerbLevel(MueLu::Extreme);
  H->IsPreconditioner(false);

  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->Set("A", Op);
  Finest->Set("Nullspace", nullSpace);

  FactoryManager M;

  M.SetFactory("Aggregates", rcp(new CoupledAggregationFactory()));
  M.SetFactory("Ptent",      rcp(new TentativePFactory()));
  M.SetFactory("P",          rcp(new SaPFactory()));

#ifdef EMIN
  // Energy-minimization
  RCP<PatternFactory> PatternFact = rcp(new PatternFactory());
#if 0
  PatternFact->SetFactory("P", M.GetFactory("Ptent"));
#else
  PatternFact->SetFactory("P", M.GetFactory("P"));
#endif
  M.SetFactory("Ppattern",   PatternFact);

  RCP<EminPFactory> EminPFact = rcp(new EminPFactory());
  EminPFact->SetFactory("P", M.GetFactory("Ptent"));
  M.SetFactory("P",          EminPFact);

  RCP<NullspacePresmoothFactory> NullPreFact = rcp(new NullspacePresmoothFactory());
  NullPreFact->SetFactory("Nullspace", M.GetFactory("Nullspace"));
  M.SetFactory("Nullspace",  NullPreFact);
#endif

  RCP<SmootherPrototype> smooProto = gimmeMergedSmoother(nSmoothers, xpetraParameters.GetLib(), coarseSolver, comm->getRank());
  M.SetFactory("Smoother",   rcp(new SmootherFactory(smooProto)));

  Teuchos::ParameterList status;

  RCP<SmootherPrototype> coarseProto;
  if (maxLevels != 1)
    coarseProto = gimmeCoarseProto(xpetraParameters.GetLib(), coarseSolver, comm->getRank());
  else
    coarseProto = gimmeMergedSmoother(nSmoothers, xpetraParameters.GetLib(), coarseSolver, comm->getRank());

  if (coarseProto == Teuchos::null)
    return EXIT_FAILURE;

#ifdef NEUMANN
  // Use coarse level projection solver
  RCP<SmootherPrototype> projectedSolver = rcp(new ProjectorSmoother(coarseProto));
  RCP<SmootherFactory> coarseSolveFact   = rcp(new SmootherFactory(projectedSolver));
#else
  RCP<SmootherFactory> coarseSolveFact   = rcp(new SmootherFactory(coarseProto));
#endif
  M.SetFactory("CoarseSolver", coarseSolveFact);

  H->EnableGraphDumping("graph.dot", 2);

  H->Setup(M, 0, maxLevels);
  //if (comm->getRank() == 0) {
  //  std::cout  << "======================\n Multigrid statistics \n======================" << std::endl;
  //  status.print(std::cout,Teuchos::ParameterList::PrintOptions().indent(2));
  //}

  // Define RHS
  RCP<MultiVector> X = MultiVectorFactory::Build(map,1);
  RCP<MultiVector> RHS = MultiVectorFactory::Build(map,1);

  X->setSeed(846930886);
  X->randomize();
  X->norm2(norms);
  if (comm->getRank() == 0)
    std::cout << "||X_true|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl;


  Op->apply(*X,*RHS,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);

  // Use AMG directly as an iterative method
  {
    X->putScalar( (SC) 0.0);

    H->Iterate(*RHS,its,*X);

    X->norm2(norms);
    if (comm->getRank() == 0)
      std::cout << "||X_" << std::setprecision(2) << its << "|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(10) << norms[0] << std::endl;
  }

  return EXIT_SUCCESS;

}
示例#13
0
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP; using Teuchos::rcp;
  using Teuchos::TimeMonitor;
  //using Galeri::Xpetra::CreateCartesianCoordinates;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, &blackhole);

  // USER GUIDE // define communicator
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  // USER GUIDE // create fancy output stream
  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  out->setOutputToRootOnly(0);
  *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

  // out->setOutputToRootOnly(-1);
  // out->precision(12);

  //FIXME we need a HAVE_MUELU_LONG_LONG_INT option
  //
  // NOTE (mfh 11 Aug 2015) I just added a HAVE_XPETRA_INT_LONG_LONG option.
  //
  #ifndef HAVE_XPETRA_INT_LONG_LONG
  *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
  #endif

  //
  // SET TEST PARAMETERS
  //
  // Note: use --help to list available options.
  Teuchos::CommandLineProcessor clp(false);

  // Default is Laplace1D with nx = 8748.
  // It's a nice size for 1D and perfect aggregation. (6561 = 3^8)
  //Nice size for 1D and perfect aggregation on small numbers of processors. (8748 = 4*3^7)
  Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 8748); // manage parameters of the test case
  Xpetra::Parameters xpetraParameters(clp);                   // manage parameters of xpetra

  // Custom command line parameters
  // - Debug
  int optDebug   = 0;                     clp.setOption("debug",          &optDebug,              "pause to attach debugger");
  int optDump    = 0;                     clp.setOption("dump",           &optDump,               "write matrix to file");
  int optTimings = 0;                     clp.setOption("timings",        &optTimings,            "print timings to screen");

  // - Levels
  LO  optMaxLevels     = 10;              clp.setOption("maxLevels",      &optMaxLevels,          "maximum number of levels allowed");
  int optMaxCoarseSize = 50;              clp.setOption("maxCoarseSize",  &optMaxCoarseSize,      "maximum #dofs in coarse operator"); //FIXME clp doesn't like long long int

  // - Smoothed-Aggregation
  Scalar optSaDamping = 4./3;             clp.setOption("saDamping",      &optSaDamping,          "prolongator damping factor");

  // - Aggregation
  std::string optAggOrdering = "natural"; clp.setOption("aggOrdering",    &optAggOrdering,        "aggregation ordering strategy (natural, random, graph)");
  int optMinPerAgg = 2;                   clp.setOption("minPerAgg",      &optMinPerAgg,          "minimum #DOFs per aggregate");
  int optMaxNbrSel = 0;                   clp.setOption("maxNbrSel",      &optMaxNbrSel,          "maximum # of nbrs allowed to be in other aggregates");

  // - R
  int optExplicitR = 1;                   clp.setOption("explicitR",      &optExplicitR,          "restriction will be explicitly stored as transpose of prolongator");

  // - Smoothers
  std::string optSmooType = "sgs";        clp.setOption("smooType",       &optSmooType,           "smoother type ('l1-sgs', 'sgs 'or 'cheby')");
  int optSweeps = 2;                      clp.setOption("sweeps",         &optSweeps,             "sweeps to be used in SGS (or Chebyshev degree)");

  // - Repartitioning
#if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN)
  int optRepartition = 1;                 clp.setOption("repartition",    &optRepartition,        "enable repartitioning (0=no repartitioning, 1=Zoltan RCB, 2=Isorropia+Zoltan PHG");
  LO optMinRowsPerProc = 2000;            clp.setOption("minRowsPerProc", &optMinRowsPerProc,     "min #rows allowable per proc before repartitioning occurs");
  double optNnzImbalance = 1.2;           clp.setOption("nnzImbalance",   &optNnzImbalance,       "max allowable nonzero imbalance before repartitioning occurs");
#else
  int optRepartition = 0;
#endif // HAVE_MPI && HAVE_MUELU_ZOLTAN

  // - Solve
  int    optFixPoint = 1;                 clp.setOption("fixPoint",       &optFixPoint,           "apply multigrid as solver");
  int    optPrecond  = 1;                 clp.setOption("precond",        &optPrecond,            "apply multigrid as preconditioner");
  LO     optIts      = 10;                clp.setOption("its",            &optIts,                "number of multigrid cycles");
  double optTol      = 1e-7;              clp.setOption("tol",            &optTol,                "stopping tolerance for Krylov method");

  switch (clp.parse(argc, argv)) {
  case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
  case Teuchos::CommandLineProcessor::PARSE_ERROR:
  case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
  case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }

  RCP<TimeMonitor> globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));

  if (optDebug) {
    Utils::PauseForDebugger();
  }

  matrixParameters.check();
  xpetraParameters.check();
  // TODO: check custom parameters
  std::transform(optSmooType.begin(), optSmooType.end(), optSmooType.begin(), ::tolower);
  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  if (comm->getRank() == 0) {
    std::cout << xpetraParameters << matrixParameters;
    // TODO: print custom parameters // Or use paramList::print()!
  }

  //
  // CREATE INITIAL MATRIX                                                          */
  //
  RCP<const Map> map;
  RCP<Matrix> A;

  RCP<MultiVector> coordinates;
  {
    TimeMonitor tm(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build"));

    map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);
    Teuchos::RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); //TODO: Matrix vs. CrsMatrixWrap
    A = Pr->BuildMatrix();

    if (matrixParameters.GetMatrixType() == "Laplace1D") {
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("1D", map, matrixParameters.GetParameterList());
    }
    else if (matrixParameters.GetMatrixType() == "Laplace2D") {
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("2D", map, matrixParameters.GetParameterList());
    }
    else if (matrixParameters.GetMatrixType() == "Laplace3D") {
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("3D", map, matrixParameters.GetParameterList());
    }
  }

  //
  //
  //

  // dump matrix to file
  if (optDump) {
    std::string fileName = "Amat.mm";
    Utils::Write(fileName, *A);
  }

  // USER GUIDE   // define near null space
  RCP<MultiVector> nullspace = MultiVectorFactory::Build(map, 1);
  nullspace->putScalar( (SC) 1.0);
  // USER GUIDE   //
  Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);

  nullspace->norm1(norms);
  if (comm->getRank() == 0)
    std::cout << "||NS|| = " << norms[0] << std::endl;

  // USER GUIDE   // create new hierarchy
  RCP<MueLu::Hierarchy<SC, LO, GO, NO> > H;
  // USER GUIDE   //

  //
  //
  // SETUP
  //
  //

  {
    TimeMonitor tm(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup"));

    //
    // Hierarchy
    //

    // USER GUIDE     // instantiate new Hierarchy object
    H = rcp(new Hierarchy());
    H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    H->SetMaxCoarseSize((GO) optMaxCoarseSize);
    // USER GUIDE     //

    //
    // Finest level
    //

    // USER GUIDE     // create a fine level object
    RCP<Level> Finest = H->GetLevel();
    Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    Finest->Set("A",           A);
    Finest->Set("Nullspace",   nullspace);
    Finest->Set("Coordinates", coordinates); //FIXME: XCoordinates, YCoordinates, ..
    // USER GUIDE //

    //
    // FactoryManager
    //

    // USER GUIDE     // define a factory manager
    FactoryManager M;
    // USER GUIDE     //

    //
    //
    // Aggregation
    //

    {
      RCP<UncoupledAggregationFactory> AggregationFact = rcp(new UncoupledAggregationFactory());
      *out << "========================= Aggregate option summary =========================" << std::endl;
      *out << "min DOFs per aggregate :                " << optMinPerAgg << std::endl;
      *out << "min # of root nbrs already aggregated : " << optMaxNbrSel << std::endl;
      AggregationFact->SetMinNodesPerAggregate(optMinPerAgg);  //TODO should increase if run anything othpermRFacter than 1D
      AggregationFact->SetMaxNeighAlreadySelected(optMaxNbrSel);
      std::transform(optAggOrdering.begin(), optAggOrdering.end(), optAggOrdering.begin(), ::tolower);
      if (optAggOrdering == "natural" || optAggOrdering == "random" || optAggOrdering == "graph") {
        *out << "aggregate ordering :                    " << optAggOrdering << std::endl;
        AggregationFact->SetOrdering(optAggOrdering);
      } else {
        std::string msg = "main: bad aggregation option """ + optAggOrdering + """.";
        throw(MueLu::Exceptions::RuntimeError(msg));
      }
      //AggregationFact->SetPhase3AggCreation(0.5);
      M.SetFactory("Aggregates", AggregationFact);

    *out << "=============================================================================" << std::endl;
    }

    //
    // Transfer
    //

    {
      //
      // Non rebalanced factories
      //

      // USER GUIDE       // declare some factories (potentially overwrite default factories)
      RCP<SaPFactory> PFact = rcp(new SaPFactory());
      PFact->SetParameter("sa: damping factor", ParameterEntry(optSaDamping));

      RCP<Factory>    RFact = rcp(new TransPFactory());

      RCP<RAPFactory> AFact = rcp(new RAPFactory());
      AFact->setVerbLevel(Teuchos::VERB_HIGH);
      // USER GUIDE       //

      if (!optExplicitR) {
        H->SetImplicitTranspose(true);
        ParameterList Aclist = *(AFact->GetValidParameterList());
        Aclist.set("transpose: use implicit", true);
        AFact->SetParameterList(Aclist);
        if (comm->getRank() == 0) std::cout << "\n\n* ***** USING IMPLICIT RESTRICTION OPERATOR ***** *\n" << std::endl;
      }

      //
      // Repartitioning (if needed)
      //

      if (optRepartition == 0) {
        // No repartitioning

        // USER GUIDE         // configure factory manager
        M.SetFactory("P", PFact);
        M.SetFactory("R", RFact);
        M.SetFactory("A", AFact);
        // USER GUIDE         //

      } else {
#if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN)
        // Repartitioning

        // The Factory Manager will be configured to return the rebalanced versions of P, R, A by default.
        // Everytime we want to use the non-rebalanced versions, we need to explicitly define the generating factory.
        RFact->SetFactory("P", PFact);
        //
        AFact->SetFactory("P", PFact);
        AFact->SetFactory("R", RFact);

        // Transfer coordinates
        RCP<CoordinatesTransferFactory> TransferCoordinatesFact = rcp(new CoordinatesTransferFactory());
        AFact->AddTransferFactory(TransferCoordinatesFact); // FIXME REMOVE

        // Compute partition (creates "Partition" object)
        if(optRepartition == 1) { // use plain Zoltan Interface

        } else if (optRepartition == 2) { // use Isorropia + Zoltan interface

        }

        // Repartitioning (creates "Importer" from "Partition")
        RCP<Factory> RepartitionFact = rcp(new RepartitionFactory());
        {
          Teuchos::ParameterList paramList;
          paramList.set("repartition: min rows per proc", optMinRowsPerProc);
          paramList.set("repartition: max imbalance", optNnzImbalance);
          RepartitionFact->SetParameterList(paramList);
        }
        RepartitionFact->SetFactory("A", AFact);

        if(optRepartition == 1) {
          RCP<Factory> ZoltanFact = rcp(new ZoltanInterface());
          ZoltanFact->SetFactory("A", AFact);
          ZoltanFact->SetFactory("Coordinates", TransferCoordinatesFact);
          RepartitionFact->SetFactory("Partition", ZoltanFact);
        }
        else if(optRepartition == 2) {
#if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA)
          RCP<MueLu::IsorropiaInterface<LO, GO, NO> > isoInterface = rcp(new MueLu::IsorropiaInterface<LO, GO, NO>());
          isoInterface->SetFactory("A", AFact);
          // we don't need Coordinates here!
          RepartitionFact->SetFactory("Partition", isoInterface);
#else
          if (comm->getRank() == 0)
            std::cout << "Please recompile Trilinos with Isorropia support enabled." << std::endl;
          return EXIT_FAILURE;
#endif
        }


        // Reordering of the transfer operators
        RCP<Factory> RebalancedPFact = rcp(new RebalanceTransferFactory());
        RebalancedPFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Interpolation")));
        RebalancedPFact->SetFactory("P", PFact);
        RebalancedPFact->SetFactory("Coordinates", TransferCoordinatesFact);
        RebalancedPFact->SetFactory("Nullspace", M.GetFactory("Ptent")); // TODO

        RCP<Factory> RebalancedRFact = rcp(new RebalanceTransferFactory());
        RebalancedRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction")));
        RebalancedRFact->SetFactory("R", RFact);

        // Compute Ac from rebalanced P and R
        RCP<Factory> RebalancedAFact = rcp(new RebalanceAcFactory());
        RebalancedAFact->SetFactory("A", AFact);

        // Configure FactoryManager
        M.SetFactory("A", RebalancedAFact);
        M.SetFactory("P", RebalancedPFact);
        M.SetFactory("R", RebalancedRFact);
        M.SetFactory("Nullspace",   RebalancedPFact);
        M.SetFactory("Coordinates", RebalancedPFact);
        M.SetFactory("Importer",    RepartitionFact);

#else
        TEUCHOS_TEST_FOR_EXCEPT(true);
#endif
      } // optRepartition

    } // Transfer

    //
    // Smoothers
    //

    {
      // USER GUIDE       // define smoother object
      std::string ifpackType;
      Teuchos::ParameterList ifpackList;
      ifpackList.set("relaxation: sweeps", (LO) optSweeps);
      ifpackList.set("relaxation: damping factor", (SC) 1.0);
      if (optSmooType == "sgs") {
        ifpackType = "RELAXATION";
        ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");
      }
      // USER GUIDE       //
      else if (optSmooType == "l1-sgs") {
        ifpackType = "RELAXATION";
        ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");
        ifpackList.set("relaxation: use l1", true);
      } else if (optSmooType == "cheby") {
        ifpackType = "CHEBYSHEV";
        ifpackList.set("chebyshev: degree", (LO) optSweeps);

        if (matrixParameters.GetMatrixType() == "Laplace1D") {
          ifpackList.set("chebyshev: ratio eigenvalue", (SC) 3);
        }
        else if (matrixParameters.GetMatrixType() == "Laplace2D") {
          ifpackList.set("chebyshev: ratio eigenvalue", (SC) 7);
        }
        else if (matrixParameters.GetMatrixType() == "Laplace3D") {
          ifpackList.set("chebyshev: ratio eigenvalue", (SC) 20);
        }
        // ifpackList.set("chebyshev: max eigenvalue", (double) -1.0);
        // ifpackList.set("chebyshev: min eigenvalue", (double) 1.0);
      }

      // USER GUIDE       // create smoother factory
      RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(ifpackType, ifpackList));
      M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype)));
      // USER GUIDE       //
    }

    //
    // Setup preconditioner
    //

    // USER GUIDE     // setup multigrid hierarchy
    int startLevel = 0;
    H->Setup(M, startLevel, optMaxLevels);
    // USER GUIDE     //

  } // end of Setup TimeMonitor

  /*{ // some debug output
    // print out content of levels
    std::cout << "FINAL CONTENT of multigrid levels" << std::endl;
    for(LO l = 0; l < H->GetNumLevels(); l++) {
      RCP<Level> coarseLevel = H->GetLevel(l);
      coarseLevel->print(*out);
    }
    std::cout << "END FINAL CONTENT of multigrid levels" << std::endl;
  } // end debug output*/

  //
  //
  // SOLVE
  //
  //

  // USER GUIDE   // Define X, B
  RCP<MultiVector> X = MultiVectorFactory::Build(map, 1);
  RCP<MultiVector> B = MultiVectorFactory::Build(map, 1);

  X->setSeed(846930886);
  X->randomize();
  A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0);
  B->norm2(norms);
  B->scale(1.0/norms[0]);
  // USER GUIDE   //

  //
  // Use AMG directly as an iterative method
  //

  if (optFixPoint) {

    X->putScalar( (SC) 0.0);

    TimeMonitor tm(*TimeMonitor::getNewTimer("ScalingTest: 3 - Fixed Point Solve"));

    H->IsPreconditioner(false);
    H->Iterate(*B, *X, optIts);

  } // optFixedPt

  //
  // Use AMG as a preconditioner in Belos
  //

#ifdef HAVE_MUELU_BELOS

  if (optPrecond) {

    RCP<TimeMonitor> tm;
    tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve")));

    // USER GUIDE     // Operator and Multivector type that will be used with Belos
    typedef MultiVector          MV;
    typedef Belos::OperatorT<MV> OP;
    H->IsPreconditioner(true);

    // Define Operator and Preconditioner
    Teuchos::RCP<OP> belosOp   = Teuchos::rcp(new Belos::XpetraOp<SC, LO, GO, NO>(A)); // Turns a Xpetra::Operator object into a Belos operator
    Teuchos::RCP<OP> belosPrec = Teuchos::rcp(new Belos::MueLuOp<SC, LO, GO, NO>(H));  // Turns a MueLu::Hierarchy object into a Belos operator

    // Construct a Belos LinearProblem object
    RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
    belosProblem->setLeftPrec(belosPrec);

    bool set = belosProblem->setProblem();
    if (set == false) {
      if (comm->getRank() == 0)
        std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return EXIT_FAILURE;
    }
    // USER GUIDE     //

    // USER GUIDE     // Belos parameter list
    int maxIts = 100;
    Teuchos::ParameterList belosList;
    belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
    belosList.set("Convergence Tolerance", optTol);    // Relative convergence tolerance requested
    //belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::TimingDetails + Belos::StatusTestDetails);
    belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
    belosList.set("Output Frequency", 1);
    belosList.set("Output Style", Belos::Brief);

    // Create an iterative solver manager
    RCP< Belos::SolverManager<SC, MV, OP> > solver = rcp(new Belos::BlockCGSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
    // USER GUIDE     //

    // Perform solve
    Belos::ReturnType ret = Belos::Unconverged;
    try {
      {
        TimeMonitor tm2(*TimeMonitor::getNewTimer("ScalingTest: 5bis - Belos Internal Solve"));
        // USER GUIDE         // solve linear system
        ret = solver->solve();
        // USER GUIDE         //
      } // end of TimeMonitor

      // Get the number of iterations for this solve.
      if (comm->getRank() == 0)
        std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;

      // Compute actual residuals.
      int numrhs = 1;
      std::vector<double> actual_resids( numrhs ); //TODO: double?
      std::vector<double> rhs_norm( numrhs );
      RCP<MultiVector> resid = MultiVectorFactory::Build(map, numrhs);

      typedef Belos::OperatorTraits<SC, MV, OP>  OPT;
      typedef Belos::MultiVecTraits<SC, MV>     MVT;

      OPT::Apply( *belosOp, *X, *resid );
      MVT::MvAddMv( -1.0, *resid, 1.0, *B, *resid );
      MVT::MvNorm( *resid, actual_resids );
      MVT::MvNorm( *B, rhs_norm );
      *out<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
      for ( int i = 0; i<numrhs; i++) {
        double actRes = actual_resids[i]/rhs_norm[i];
        *out<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
        //if (actRes > tol) { badRes = true; }
      }

    } //try

    catch(...) {
      if (comm->getRank() == 0)
        std::cout << std::endl << "ERROR:  Belos threw an error! " << std::endl;
    }

    // USER GUIDE     // Check convergence
    if (ret != Belos::Converged) {
      if (comm->getRank() == 0) std::cout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
    } else {
      if (comm->getRank() == 0) std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
    }
    // USER GUIDE     //
    tm = Teuchos::null;

  } //if (optPrecond)

#endif // HAVE_MUELU_BELOS

  //
  // Timer final summaries
  //

  globalTimeMonitor = Teuchos::null; // stop this timer before summary

  if (optTimings)
    TimeMonitor::summarize();

  //

  return EXIT_SUCCESS;
}
示例#14
0
int main(int argc, char *argv[]) {

  // RCPs
  using Teuchos::RCP;
  using Teuchos::rcp;

  // MPI initialization using Teuchos
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  // predetermined DOF and parameter info
  // for case 1
  int nAcousticDOFs = 126;
  int nPaddedDOFs   = 3*nAcousticDOFs;
  int nElasticDOFs  = 243;
  int nTotalDOFs    = nPaddedDOFs+nElasticDOFs;
  int nDOFsPerNode  = 3;
  int nTotalNodes   = nTotalDOFs/nDOFsPerNode;
  int nnzeros_stiff = 12869;
  int nnzeros_damp  = 98;
  int nnzeros_mass  = 5635;
  int maxDOFsPerRow = 100;
  double freq = 11.0;
  double h=0.5;
  int nx=3;
  int ny=3;
  int nz=23;
  // for case 2
  /*int nAcousticDOFs = 600;
  int nPaddedDOFs   = 3*nAcousticDOFs;
  int nElasticDOFs  = 1425;
  int nTotalDOFs    = nPaddedDOFs+nElasticDOFs;
  int nDOFsPerNode  = 3;
  int nTotalNodes   = nTotalDOFs/nDOFsPerNode;
  int nnzeros_stiff = 94557;
  int nnzeros_damp  = 338;
  int nnzeros_mass  = 39715;
  int maxDOFsPerRow = 100;
  double freq = 22.0;
  double h=0.25;
  int nx=5;
  int ny=5;
  int nz=43;*/
  // for case 3
  /*int nAcousticDOFs = 3564;
  int nPaddedDOFs   = 3*nAcousticDOFs;
  int nElasticDOFs  = 9477;
  int nTotalDOFs    = nPaddedDOFs+nElasticDOFs;
  int nDOFsPerNode  = 3;
  int nTotalNodes   = nTotalDOFs/nDOFsPerNode;
  int nnzeros_stiff = 719287;
  int nnzeros_damp  = 1250;
  int nnzeros_mass  = 296875;
  int maxDOFsPerRow = 100;
  double freq = 44.0;
  double h=0.125;
  int nx=9;
  int ny=9;
  int nz=83;*/

  // Some constants
  SC omega  = (SC) 2.0*M_PI*freq;
  SC omega2 = omega*omega;
  SC one(1.0,0.0);
  SC ii(0.0,1.0);

  // Construct a Map that puts approximately the same number of mesh nodes per processor
  RCP<const Tpetra::Map<LO, GO, NO> > map = Tpetra::createUniformContigMap<LO, GO>(nTotalNodes, comm);
  // Tpetra map into Xpetra map
  RCP<const Map> xmap = Xpetra::toXpetra(map);
  // Map takes constant number of DOFs per node
  xmap = MapFactory::Build(xmap,nDOFsPerNode);
  map = Xpetra::toTpetra(xmap);

  // Create a CrsMatrix using the map
  // K - only stiffness matrix
  // M - only mass matrix
  // A - original Helmholtz operator
  // S - shifted Helmholtz operator
  RCP<TCRS> K = rcp(new TCRS(map,maxDOFsPerRow));
  RCP<TCRS> M = rcp(new TCRS(map,maxDOFsPerRow));
  RCP<TCRS> A = rcp(new TCRS(map,maxDOFsPerRow));
  RCP<TCRS> S = rcp(new TCRS(map,maxDOFsPerRow));

  // Read data from .txt files and put into appropriate matrices
  // Note: must pad acoustic DOFs with identity matrix
  // stiffness matrix
  std::ifstream matfile_stiff;
  matfile_stiff.open("coupled_stiff.txt");
  for (int i = 0; i < nnzeros_stiff; i++) {
    int current_row, current_column;
    double current_value;
    matfile_stiff >> current_row >> current_column >> current_value ;
    SC cpx_current_value(current_value,0.0);
    if(current_row < nAcousticDOFs)    { current_row    = current_row*3;                             }
    else                               { current_row    = current_row-nAcousticDOFs+nPaddedDOFs;     }
    if(current_column < nAcousticDOFs) { current_column = current_column*3;                          }
    else                               { current_column = current_column-nAcousticDOFs+nPaddedDOFs;  }
    if(map->isNodeGlobalElement(current_row)==true) {
      K->insertGlobalValues(current_row,
			    Teuchos::tuple<GO> (current_column),
			    Teuchos::tuple<SC> (cpx_current_value));
    }
  }
  // pad with identity
  for(int i = 0; i < nAcousticDOFs; i++) {
    if(map->isNodeGlobalElement(3*i+1)==true) {
      K->insertGlobalValues(3*i+1,
			    Teuchos::tuple<GO> (3*i+1),
			    Teuchos::tuple<SC> (one));
    }
    if(map->isNodeGlobalElement(3*i+2)==true) {
      K->insertGlobalValues(3*i+2,
			    Teuchos::tuple<GO> (3*i+2),
			    Teuchos::tuple<SC> (one));
    }
  }
  // damping matrix - lump into stiffness matrix
  std::ifstream matfile_damp;
  matfile_damp.open("coupled_damp.txt");
  for (int i = 0; i < nnzeros_damp; i++) {
    int current_row, current_column;
    double current_value;
    matfile_damp >> current_row >> current_column >> current_value ;
    SC cpx_current_value(current_value,0.0);
    if(current_row < nAcousticDOFs)    { current_row    = current_row*3;                             }
    else                               { current_row    = current_row-nAcousticDOFs+nPaddedDOFs;     }
    if(current_column < nAcousticDOFs) { current_column = current_column*3;                          }
    else                               { current_column = current_column-nAcousticDOFs+nPaddedDOFs;  }
    if(map->isNodeGlobalElement(current_row)==true) {
      K->insertGlobalValues(current_row,
			    Teuchos::tuple<GO> (current_column),
			    Teuchos::tuple<SC> (ii*omega*cpx_current_value));
    }
  }
  // mass matrix
  std::ifstream matfile_mass;
  matfile_mass.open("coupled_mass.txt");
  for (int i = 0; i < nnzeros_mass; i++) {
    int current_row, current_column;
    double current_value;
    matfile_mass >> current_row >> current_column >> current_value ;
    SC cpx_current_value(current_value,0.0);
    if(current_row < nAcousticDOFs)    { current_row    = current_row*3;                             }
    else                               { current_row    = current_row-nAcousticDOFs+nPaddedDOFs;     }
    if(current_column < nAcousticDOFs) { current_column = current_column*3;                          }
    else                               { current_column = current_column-nAcousticDOFs+nPaddedDOFs;  }
    if(map->isNodeGlobalElement(current_row)==true) {
      M->insertGlobalValues(current_row,
			     Teuchos::tuple<GO> (current_column),
			     Teuchos::tuple<SC> (cpx_current_value));
    }
  }
  // pad with identity
  for(int i = 0; i < nAcousticDOFs; i++) {
    if(map->isNodeGlobalElement(3*i+1)==true) {
      M->insertGlobalValues(3*i+1,
			    Teuchos::tuple<GO> (3*i+1),
			    Teuchos::tuple<SC> (one));
    }
    if(map->isNodeGlobalElement(3*i+2)==true) {
      M->insertGlobalValues(3*i+2,
			    Teuchos::tuple<GO> (3*i+2),
			    Teuchos::tuple<SC> (one));
    }
  }
  // Complete fill
  K->fillComplete();
  M->fillComplete();

  // Turn Tpetra::CrsMatrix into MueLu::Matrix
  RCP<XCRS> mueluK_ = rcp(new XTCRS(K));
  RCP<XMAT> mueluK  = rcp(new XWRAP(mueluK_));
  RCP<XCRS> mueluM_ = rcp(new XTCRS(M));
  RCP<XMAT> mueluM  = rcp(new XWRAP(mueluM_));
  mueluK->SetFixedBlockSize(nDOFsPerNode);
  mueluM->SetFixedBlockSize(nDOFsPerNode);
  // combine to make Helmholtz and shifted Laplace operators
  RCP<XMAT> mueluA, mueluS;
  SC shift1(1.0,0.5);
  MueLu::Utils2<SC,LO,GO,NO,LMO>::TwoMatrixAdd(mueluK, false, (SC) 1.0, mueluM, false, -omega2, mueluA);
  MueLu::Utils2<SC,LO,GO,NO,LMO>::TwoMatrixAdd(mueluK, false, (SC) 1.0, mueluM, false, -shift1*omega2, mueluS);
  mueluA->fillComplete();
  mueluS->fillComplete();
  xmap=mueluA->getDomainMap();
  map=Xpetra::toTpetra(xmap);

  // MultiVector of coordinates
  // NOTE: must be of size equal to the number of DOFs!
  RCP<XMV> coordinates;
  coordinates = MultiVectorFactory::Build(xmap, 3);
  for(int k=0; k<nz; k++) {
    for(int j=0; j<ny; j++) {
      for(int i=0; i<nx; i++) {
	int curidx = i+nx*j+nx*ny*k;
        int curidx0 = curidx*3+0;
        int curidx1 = curidx*3+1;
        int curidx2 = curidx*3+2;
	SC ih = (SC) (i*h);
	SC jh = (SC) (j*h);
	SC kh = (SC) (k*h);
	if(xmap->isNodeGlobalElement(curidx0)==true) {
	  coordinates->replaceGlobalValue(curidx0,0,ih);
	  coordinates->replaceGlobalValue(curidx0,1,jh);
	  coordinates->replaceGlobalValue(curidx0,2,kh);
	}
	if(xmap->isNodeGlobalElement(curidx1)==true) {
	  coordinates->replaceGlobalValue(curidx1,0,ih);
	  coordinates->replaceGlobalValue(curidx1,1,jh);
	  coordinates->replaceGlobalValue(curidx1,2,kh);
	}
	if(xmap->isNodeGlobalElement(curidx2)==true) {
	  coordinates->replaceGlobalValue(curidx2,0,ih);
	  coordinates->replaceGlobalValue(curidx2,1,jh);
	  coordinates->replaceGlobalValue(curidx2,2,kh);
	}
      }
    }
  }

  // Multigrid Hierarchy
  RCP<Hierarchy> H = rcp(new Hierarchy(mueluK));
  FactoryManager Manager;

  // Prolongation/Restriction
  RCP<TPFactory>  TentPFact    = rcp(  new TPFactory       );
  RCP<SaPFactory> Pfact        = rcp(  new SaPFactory      );
  RCP<GRFactory>  Rfact        = rcp(  new GRFactory       );
  RCP<RAPShiftFactory> Acfact  = rcp(  new RAPShiftFactory );
  RCP<RBMFactory> RBMfact      = rcp(  new RBMFactory(3)   );
  RBMfact->setLastAcousticDOF(nPaddedDOFs);

  // Smoothers
  RCP<SmootherPrototype> smooProto;
  std::string ifpack2Type;
  Teuchos::ParameterList ifpack2List;
  // Krylov smoother
  /*ifpack2Type = "KRYLOV";
  ifpack2List.set("krylov: iteration type",1);
  ifpack2List.set("krylov: number of iterations",4);
  ifpack2List.set("krylov: residual tolerance",1e-6);
  ifpack2List.set("krylov: block size",1);
  ifpack2List.set("krylov: zero starting solution",true);
  ifpack2List.set("krylov: preconditioner type",1);*/
  // Additive Schwarz smoother
  //ifpack2Type = "SCHWARZ";
  //ifpack2List.set("schwarz: compute condest", false);
  //ifpack2List.set("schwarz: combine mode", "Add"); // use string mode for this
  //ifpack2List.set("schwarz: reordering type", "none");
  //ifpack2List.set("schwarz: filter singletons", false);
  //ifpack2List.set("schwarz: overlap level", 0);
  // ILUT smoother
  //ifpack2Type = "ILUT";
  //ifpack2List.set("fact: ilut level-of-fill", (double)1.0);
  //ifpack2List.set("fact: absolute threshold", (double)0.0);
  //ifpack2List.set("fact: relative threshold", (double)1.0);
  //ifpack2List.set("fact: relax value", (double)0.0);
  // Gauss-Seidel smoother
  ifpack2Type = "RELAXATION";
  ifpack2List.set("relaxation: sweeps", (LO) 4);
  ifpack2List.set("relaxation: damping factor", (SC) 1.0); // 0.7
  ifpack2List.set("relaxation: type", "Gauss-Seidel");

  smooProto = Teuchos::rcp( new Ifpack2Smoother(ifpack2Type,ifpack2List) );
  RCP<SmootherFactory> SmooFact;
  LO maxLevels = 6;
  if (maxLevels > 1)
    SmooFact = rcp( new SmootherFactory(smooProto) );

  // create coarsest smoother
  RCP<SmootherPrototype> coarsestSmooProto;
  // Direct Solver
  std::string type = "";
  Teuchos::ParameterList coarsestSmooList;
#if defined(HAVE_AMESOS_SUPERLU)
  coarsestSmooProto = rcp( new DirectSolver("Superlu",coarsestSmooList) );
#else
  coarsestSmooProto = rcp( new DirectSolver("Klu",coarsestSmooList) );
#endif
  RCP<SmootherFactory> coarsestSmooFact = rcp(new SmootherFactory(coarsestSmooProto, Teuchos::null));

  RCP<XMV> nullspace;
  RBMfact->BuildRBM(mueluA,coordinates,nullspace);

  // determine shifts for RAPShiftFactory
  std::vector<SC> shifts;
  for(int i=0; i<maxLevels; i++) {
    double alpha=1.0;
    double beta=0.5+((double) i)*0.2;
    SC shift(alpha,beta);
    shifts.push_back(-shift*omega2);
  }
  Acfact->SetShifts(shifts);

  // Set factory managers for prolongation/restriction
  Manager.SetFactory("P", Pfact);
  Manager.SetFactory("R", Rfact);
  Manager.SetFactory("Ptent", TentPFact);
  H->Keep("P", Pfact.get());
  H->Keep("R", Rfact.get());
  H->Keep("Ptent", TentPFact.get());
  H->GetLevel(0)->Set("Nullspace",nullspace);
  H->SetImplicitTranspose(true);
  H->Setup(Manager, 0, maxLevels);
  H->Delete("Smoother");
  H->Delete("CoarseSolver");
  H->print(*getFancyOStream(Teuchos::rcpFromRef(std::cout)), MueLu::High);

  // Set factories for smoothing and coarse grid
  Manager.SetFactory("Smoother", SmooFact);
  Manager.SetFactory("CoarseSolver", coarsestSmooFact);
  Manager.SetFactory("A", Acfact);
  Manager.SetFactory("K", Acfact);
  Manager.SetFactory("M", Acfact);
  H->GetLevel(0)->Set("A",mueluS);
  H->GetLevel(0)->Set("K",mueluK);
  H->GetLevel(0)->Set("M",mueluM);
  H->Setup(Manager, 0, H->GetNumLevels());

  // right hand side and left hand side vectors
  RCP<TVEC> X = Tpetra::createVector<SC,LO,GO,NO>(map);
  RCP<TVEC> B = Tpetra::createVector<SC,LO,GO,NO>(map);
  X->putScalar((SC) 0.0);
  if(comm->getRank()==0)
    B->replaceGlobalValue(0, 1.0);

  // Define Operator and Preconditioner
  RCP<OP> belosOp   = rcp(new Belos::XpetraOp<SC,LO,GO,NO,LMO>(mueluA));   // Turns a Xpetra::Matrix object into a Belos operator
  RCP<OP> belosPrec = rcp(new Belos::MueLuOp<SC,LO,GO,NO,LMO>(H));         // Turns a MueLu::Hierarchy object into a Belos operator

  // Construct a Belos LinearProblem object
  RCP<Problem> belosProblem = rcp(new Problem(belosOp,X,B));
  belosProblem->setRightPrec(belosPrec);
  bool set = belosProblem->setProblem();
  if (set == false) {
    if(comm->getRank()==0) {
      std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
    }
    return EXIT_FAILURE;
  }

  // Belos parameter list
  int maxIts = 100;
  double tol = 1e-6;
  Teuchos::ParameterList belosList;
  belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
  belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
  belosList.set("Flexible Gmres", false);         // set flexible GMRES on

  // Create a FGMRES solver manager
  RCP<BelosSolver> solver = rcp( new BelosGMRES(belosProblem, rcp(&belosList, false)) );

  // Perform solve
  Belos::ReturnType ret = solver->solve();

  // print solution entries
  //using Teuchos::VERB_EXTREME;
  //Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::getFancyOStream( Teuchos::rcpFromRef(std::cerr) );
  //X->describe(*out,VERB_EXTREME);

  // Get the number of iterations for this solve.
  if(comm->getRank()==0) {
    std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;
  }
  // Compute actual residuals.
  int numrhs=1;
  bool badRes = false;
  std::vector<double> actual_resids(numrhs);
  std::vector<double> rhs_norm(numrhs);
  RCP<TMV> resid = Tpetra::createMultiVector<SC,LO,GO,NO>(map, numrhs);
  OPT::Apply(*belosOp, *X, *resid);
  MVT::MvAddMv(-1.0, *resid, 1.0, *B, *resid);
  MVT::MvNorm(*resid, actual_resids);
  MVT::MvNorm(*B, rhs_norm);
  if(comm->getRank()==0) {
    std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
  }
  for (int i = 0; i < numrhs; i++) {
    double actRes = abs(actual_resids[i])/rhs_norm[i];
    if(comm->getRank()==0) {
      std::cout <<"Problem " << i << " : \t" << actRes <<std::endl;
    }
    if (actRes > tol) { badRes = true; }
  }

  // Check convergence
  if (ret != Belos::Converged || badRes) {
    if(comm->getRank()==0) {
      std::cout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
    }
    return EXIT_FAILURE;
  }
  if(comm->getRank()==0) {
    std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
  }

  return EXIT_SUCCESS;
}