int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using namespace MueLuTests;

#ifdef __GNUC__
#warning Navier2DBlocked_test based tests are disabled on 12/11/2013 due to some thrown exception
#endif
  return EXIT_SUCCESS;


  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
  //
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  out->setOutputToRootOnly(0);
  *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

  // Timing
  Teuchos::Time myTime("global");
  Teuchos::TimeMonitor MM(myTime);

  // read in some command line parameters
  Teuchos::CommandLineProcessor clp(false);

  int rebalanceBlock0 = 1;      clp.setOption("rebalanceBlock0",       &rebalanceBlock0,     "rebalance block 0 (1=yes, else=no)");
  int rebalanceBlock1 = 1;      clp.setOption("rebalanceBlock1",       &rebalanceBlock1,     "rebalance block 1 (1=yes, else=no)");

  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }


#if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
#ifndef HAVE_TEUCHOS_LONG_LONG_INT
  *out << "Warning: scaling test was not compiled with long long int support" << std::endl;


  // custom parameters
  LocalOrdinal maxLevels = 3;

  GlobalOrdinal maxCoarseSize=1; //FIXME clp doesn't like long long int

  int globalNumDofs = 8898;  // used for the maps
  int nDofsPerNode = 3;      // used for generating the fine level null-space

  // build strided maps
  // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
  std::vector<size_t> stridingInfo;
  stridingInfo.push_back(2);
  stridingInfo.push_back(1);

  /////////////////////////////////////// build strided maps
  // build strided maps:
  // xstridedfullmap: full map (velocity and pressure dof gids), continous
  // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
  // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
  Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
  RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
  RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
  RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

  /////////////////////////////////////// transform Xpetra::Map objects to Epetra
  // this is needed for AztecOO
  const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
  RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
  RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

  /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

  // read in problem
  Epetra_CrsMatrix * ptrA = 0;
  Epetra_Vector * ptrf = 0;
  Epetra_MultiVector* ptrNS = 0;

  *out << "Reading matrix market file" << std::endl;

  EpetraExt::MatrixMarketFileToCrsMatrix("A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  EpetraExt::MatrixMarketFileToVector("b5932_re1000.txt",*fullmap,ptrf);
  //EpetraExt::MatrixMarketFileToCrsMatrix("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/A5932_re1000.txt",*fullmap,*fullmap,*fullmap,ptrA);
  //EpetraExt::MatrixMarketFileToVector("/home/tobias/promotion/trilinos/fc17-dyn/packages/muelu/test/navierstokes/b5932_re1000.txt",*fullmap,ptrf);

  RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
  RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
  RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


  /////////////////////////////////////// split system into 2x2 block system

  *out << "Split matrix into 2x2 block matrix" << std::endl;

  // split fullA into A11,..., A22
  Teuchos::RCP<Epetra_CrsMatrix> A11;
  Teuchos::RCP<Epetra_CrsMatrix> A12;
  Teuchos::RCP<Epetra_CrsMatrix> A21;
  Teuchos::RCP<Epetra_CrsMatrix> A22;

  if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
    *out << "Problem with splitting matrix"<< std::endl;

  /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

  // build Xpetra objects from Epetra_CrsMatrix objects
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A11));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A12));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A21));
  Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrix(A22));

  /////////////////////////////////////// generate MapExtractor object

  std::vector<Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > > xmaps;
  xmaps.push_back(xstridedvelmap);
  xmaps.push_back(xstridedpremap);

  Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LocalOrdinal,GlobalOrdinal,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LocalOrdinal,GlobalOrdinal>::Build(xstridedfullmap,xmaps);

  /////////////////////////////////////// build blocked transfer operator
  // using the map extractor
  Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal>(map_extractor,map_extractor,10));
  bOp->setMatrix(0,0,xA11);
  bOp->setMatrix(0,1,xA12);
  bOp->setMatrix(1,0,xA21);
  bOp->setMatrix(1,1,xA22);

  bOp->fillComplete();

  //////////////////////////////////////////////////// create Hierarchy
  RCP<Hierarchy> H = rcp ( new Hierarchy() );
  H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  //H->setDefaultVerbLevel(Teuchos::VERB_NONE);
  H->SetMaxCoarseSize(maxCoarseSize);

  //////////////////////////////////////////////////////// finest Level
  RCP<MueLu::Level> Finest = H->GetLevel();
  Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
  Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));


  ////////////////////////////////////////// prepare null space for A11
  RCP<MultiVector> nullspace11 = MultiVectorFactory::Build(xstridedvelmap, 2);  // this is a 2D standard null space

  for (int i=0; i<nDofsPerNode-1; ++i) {
    Teuchos::ArrayRCP<Scalar> nsValues = nullspace11->getDataNonConst(i);
    int numBlocks = nsValues.size() / (nDofsPerNode - 1);
    for (int j=0; j< numBlocks; ++j) {
      nsValues[j*(nDofsPerNode - 1) + i] = 1.0;
    }
  }

  Finest->Set("Nullspace1",nullspace11);

  ////////////////////////////////////////// prepare null space for A22
  RCP<MultiVector> nullspace22 = MultiVectorFactory::Build(xstridedpremap, 1);  // this is a 2D standard null space
  Teuchos::ArrayRCP<Scalar> nsValues22 = nullspace22->getDataNonConst(0);
  for (int j=0; j< nsValues22.size(); ++j) {
    nsValues22[j] = 1.0;
  }

  Finest->Set("Nullspace2",nullspace22);


  /////////////////////////////////////////// define rebalanced block AC factory
  // This is the main factory for "A" and defines the input for
  //   - the SubBlockAFactory objects
  //   - the rebalanced block Ac factory
  RCP<RebalanceBlockAcFactory> RebalancedAcFact = rcp(new RebalanceBlockAcFactory());

  /////////////////////////////////////////// define non-rebalanced blocked transfer ops
  RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory()); // use row map index base from bOp
  RCP<GenericRFactory> RFact = rcp(new GenericRFactory());
  RFact->SetFactory("P", PFact);

  // non-rebalanced block coarse matrix factory
  // output is non-rebalanced coarse block matrix Ac
  // used as input for rebalanced block coarse factory RebalancedAcFact
  RCP<Factory> AcFact = rcp(new BlockedRAPFactory());
  AcFact->SetFactory("A", MueLu::NoFactory::getRCP());
  AcFact->SetFactory("P", PFact);  // use non-rebalanced block prolongator as input
  AcFact->SetFactory("R", RFact);  // use non-rebalanced block restrictor as input

  // define matrix sub-blocks of possibly rebalanced block matrix A
  // These are used as input for
  //   - the sub blocks of the transfer operators
  RCP<SubBlockAFactory> A11Fact = Teuchos::rcp(new SubBlockAFactory());
  A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  RCP<SubBlockAFactory> A22Fact = Teuchos::rcp(new SubBlockAFactory());
  A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
  A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
  A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

  /////////////////////////////////////////// define rebalancing factories
  // define sub blocks of the coarse non-rebalanced block matrix Ac
  // input is the block operator generated by AcFact
  RCP<SubBlockAFactory> rebA11Fact = Teuchos::rcp(new SubBlockAFactory());
  rebA11Fact->SetFactory("A",AcFact);
  rebA11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
  rebA11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
  RCP<SubBlockAFactory> rebA22Fact = Teuchos::rcp(new SubBlockAFactory());
  rebA22Fact->SetFactory("A",AcFact);
  rebA22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
  rebA22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

  // define rebalancing factory for coarse block matrix A(1,1)
  RCP<AmalgamationFactory> rebAmalgFact11 = rcp(new AmalgamationFactory());
  rebAmalgFact11->SetFactory("A", rebA11Fact);
  rebAmalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<MueLu::IsorropiaInterface<LO, GO, NO, LMO> > isoInterface1 = rcp(new MueLu::IsorropiaInterface<LO, GO, NO, LMO>());
  isoInterface1->SetFactory("A", rebA11Fact);
  isoInterface1->SetFactory("UnAmalgamationInfo", rebAmalgFact11);

  RCP<MueLu::RepartitionInterface<LO, GO, NO, LMO> > repInterface1 = rcp(new MueLu::RepartitionInterface<LO, GO, NO, LMO>());
  repInterface1->SetFactory("A", rebA11Fact);
  repInterface1->SetFactory("AmalgamatedPartition", isoInterface1);
  repInterface1->SetFactory("UnAmalgamationInfo", rebAmalgFact11);

  // Repartitioning (creates "Importer" from "Partition")
  RCP<Factory> RepartitionFact = rcp(new RepartitionFactory());
  {
    Teuchos::ParameterList paramList;
    paramList.set("minRowsPerProcessor", 200);
    paramList.set("nonzeroImbalance", 1.3);
    if(rebalanceBlock0 == 1)
      paramList.set("startLevel",1);
    else
      paramList.set("startLevel",10); // supress rebalancing
    RepartitionFact->SetParameterList(paramList);
  }
  RepartitionFact->SetFactory("A", rebA11Fact);
  RepartitionFact->SetFactory("Partition", repInterface1);

  // define rebalancing factory for coarse block matrix A(1,1)
  RCP<AmalgamationFactory> rebAmalgFact22 = rcp(new AmalgamationFactory());
  rebAmalgFact22->SetFactory("A", rebA22Fact);
  rebAmalgFact22->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<MueLu::IsorropiaInterface<LO, GO, NO, LMO> > isoInterface2 = rcp(new MueLu::IsorropiaInterface<LO, GO, NO, LMO>());
  isoInterface2->SetFactory("A", rebA22Fact);
  isoInterface2->SetFactory("UnAmalgamationInfo", rebAmalgFact22);

  RCP<MueLu::RepartitionInterface<LO, GO, NO, LMO> > repInterface2 = rcp(new MueLu::RepartitionInterface<LO, GO, NO, LMO>());
  repInterface2->SetFactory("A", rebA22Fact);
  repInterface2->SetFactory("AmalgamatedPartition", isoInterface2);
  repInterface2->SetFactory("UnAmalgamationInfo", rebAmalgFact22);

  // second repartition factory
  RCP<Factory> RepartitionFact2 = rcp(new RepartitionFactory());
  {
    Teuchos::ParameterList paramList;
    paramList.set("minRowsPerProcessor", 100);
    paramList.set("nonzeroImbalance", 1.2);
    if(rebalanceBlock1 == 1)
      paramList.set("startLevel",1);
    else
      paramList.set("startLevel",10); // supress rebalancing
    RepartitionFact2->SetParameterList(paramList);
  }
  RepartitionFact2->SetFactory("A", rebA22Fact);
  RepartitionFact2->SetFactory("Partition", repInterface2); // this is not valid

  ////////////////////////////////////////// build non-rebalanced matrix blocks
  // build factories for transfer operator P(1,1) and R(1,1)
  RCP<AmalgamationFactory> amalgFact11 = rcp(new AmalgamationFactory());
  amalgFact11->SetFactory("A", A11Fact);
  amalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<CoalesceDropFactory> dropFact11 = rcp(new CoalesceDropFactory());
  dropFact11->SetFactory("A", A11Fact);
  dropFact11->SetFactory("UnAmalgamationInfo", amalgFact11);
  dropFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  RCP<UncoupledAggregationFactory> UncoupledAggFact11 = rcp(new UncoupledAggregationFactory());
  UncoupledAggFact11->SetFactory("Graph", dropFact11);
  UncoupledAggFact11->SetMinNodesPerAggregate(9);
  UncoupledAggFact11->SetMaxNeighAlreadySelected(2);
  UncoupledAggFact11->SetOrdering(MueLu::AggOptions::NATURAL);

  RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact11->setStridingData(stridingInfo);
  coarseMapFact11->setStridedBlockId(0);

  RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());
  RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

  Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));
  nspFact11->SetFactory("Nullspace1",P11Fact); // pick "Nullspace1" from Finest level

  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> M11 = rcp(new FactoryManager());
  M11->SetFactory("A", A11Fact);  // rebalanced fine-level block operator
  M11->SetFactory("P", P11Fact);  // non-rebalanced transfer operator block P(1,1)
  M11->SetFactory("R", R11Fact);  // non-rebalanced transfer operator block R(1,1)
  M11->SetFactory("Aggregates", UncoupledAggFact11);
  M11->SetFactory("Graph", dropFact11);
  M11->SetFactory("DofsPerNode", dropFact11);
  M11->SetFactory("UnAmalgamationInfo", amalgFact11);
  M11->SetFactory("Nullspace", nspFact11); // TODO check me?
  M11->SetFactory("CoarseMap", coarseMapFact11);
  M11->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

  ////////////////////////////////////////// build non-rebalanced matrix blocks
  // build factories for transfer operator P(2,2) and R(2,2)
  RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory());
  RCP<TentativePFactory> P22Fact = rcp(new TentativePFactory());
  RCP<TransPFactory> R22Fact = rcp(new TransPFactory());

  /*XXX*/
  RCP<CoalesceDropFactory> dropFact22 = rcp(new CoalesceDropFactory());
  dropFact22->SetFactory("A", A22Fact);
  dropFact22->SetFactory("UnAmalgamationInfo", amalgFact22);
  dropFact22->setDefaultVerbLevel(Teuchos::VERB_EXTREME);

  /*XXX*/
  RCP<UncoupledAggregationFactory> UncoupledAggFact22 = rcp(new UncoupledAggregationFactory());
  UncoupledAggFact22->SetFactory("Graph", dropFact22);
  UncoupledAggFact22->SetMinNodesPerAggregate(6);
  UncoupledAggFact22->SetMaxNeighAlreadySelected(2);
  UncoupledAggFact22->SetOrdering(MueLu::AggOptions::NATURAL);

  // connect null space and tentative PFactory
  Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2"));
  nspFact22->SetFactory("Nullspace2", P22Fact); // define null space generated by P22Fact as null space for coarse level (non-rebalanced)

  RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory());
  coarseMapFact22->setStridingData(stridingInfo);
  coarseMapFact22->setStridedBlockId(1);

  //////////////////////////////// define factory manager for (2,2) block
  RCP<FactoryManager> M22 = rcp(new FactoryManager());
  M22->SetFactory("A", A22Fact); // rebalanced fine-level block operator
  M22->SetFactory("P", P22Fact); // non-rebalanced transfer operator P(2,2)
  M22->SetFactory("R", R22Fact); // non-rebalanced transfer operator R(2,2)
  M22->SetFactory("Aggregates", UncoupledAggFact22 /* UncoupledAggFact11 *//*XXX*/); // aggregates from block (1,1)
  M22->SetFactory("Graph", dropFact22);
  M22->SetFactory("DofsPerNode", dropFact22);
  M22->SetFactory("Nullspace", nspFact22); // TODO check me
  M22->SetFactory("UnAmalgamationInfo", amalgFact22);
  M22->SetFactory("Ptent", P22Fact);
  M22->SetFactory("CoarseMap", coarseMapFact22);
  M22->SetIgnoreUserData(true);

  /////////////////////////////////////////// define rebalanced blocked transfer ops
  //////////////////////////////// define factory manager for (1,1) block
  RCP<FactoryManager> rebM11 = rcp(new FactoryManager());
  rebM11->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
  //rebM11->SetFactory("P", PFact); // use non-rebalanced block P operator as input
  //rebM11->SetFactory("R", RFact); // use non-rebalanced block R operator as input
  rebM11->SetFactory("Importer", RepartitionFact);
  rebM11->SetFactory("Nullspace", nspFact11);
  //rebM11->SetIgnoreUserData(true);

  RCP<FactoryManager> rebM22 = rcp(new FactoryManager());
  rebM22->SetFactory("A", AcFact ); // important: must be a 2x2 block A Factory
  rebM22->SetFactory("Importer", RepartitionFact2); // use dummy repartitioning factory
  rebM22->SetFactory("Nullspace", nspFact22);

  // Reordering of the transfer operators
  RCP<RebalanceBlockInterpolationFactory> RebalancedBlockPFact = rcp(new RebalanceBlockInterpolationFactory());
  RebalancedBlockPFact->SetFactory("P", PFact); // use non-rebalanced block P operator as input
  RebalancedBlockPFact->AddFactoryManager(rebM11);
  RebalancedBlockPFact->AddFactoryManager(rebM22);

  RCP<RebalanceBlockRestrictionFactory> RebalancedBlockRFact = rcp(new RebalanceBlockRestrictionFactory());
  //RebalancedBlockRFact->SetParameter("type", Teuchos::ParameterEntry(std::string("Restriction")));
  RebalancedBlockRFact->SetFactory("R", RFact); // non-rebalanced block P operator
  RebalancedBlockRFact->AddFactoryManager(rebM11);
  RebalancedBlockRFact->AddFactoryManager(rebM22);

  ///////////////////////////////////////// initialize non-rebalanced block transfer operators
  // output are the non-rebalanced block transfer operators used as input in AcFact to build
  // the non-rebalanced coarse level block matrix Ac
  PFact->AddFactoryManager(M11);  // use non-rebalanced information from sub block factory manager M11
  PFact->AddFactoryManager(M22);  // use non-rebalanced information from sub block factory manager M22

  ///////////////////////////////////////// initialize rebalanced coarse block AC factory
  RebalancedAcFact->SetFactory("A", AcFact);   // use non-rebalanced block operator as input
  RebalancedAcFact->AddFactoryManager(rebM11);
  RebalancedAcFact->AddFactoryManager(rebM22);

  //////////////////////////////////////////////////////////////////////
  // Smoothers

  //Another factory manager for braes sarazin smoother
  //Schur Complement Factory, using the factory to generate AcFact
    SC omega = 1.3;
    RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", Teuchos::ParameterEntry(omega));
    SFact->SetFactory("A", MueLu::NoFactory::getRCP()); // this finally be the rebalanced block operator!

    //Smoother Factory, using SFact as a factory for A
    std::string ifpackSCType;
    Teuchos::ParameterList ifpackSCList;
    ifpackSCList.set("relaxation: sweeps", (LocalOrdinal) 3);
    ifpackSCList.set("relaxation: damping factor", (Scalar) 1.0);
    ifpackSCType = "RELAXATION";
    ifpackSCList.set("relaxation: type", "Gauss-Seidel");
    RCP<SmootherPrototype> smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
    smoProtoSC->SetFactory("A", SFact);
    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    RCP<BraessSarazinSmoother> smootherPrototype     = rcp( new BraessSarazinSmoother() );
    smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
    smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));
    smootherPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
  RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );

  RCP<BraessSarazinSmoother> coarseSolverPrototype = rcp( new BraessSarazinSmoother() );
  coarseSolverPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
  coarseSolverPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(omega));

  coarseSolverPrototype->SetFactory("A",MueLu::NoFactory::getRCP());
  RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );

  RCP<FactoryManager> MB = rcp(new FactoryManager());
  MB->SetFactory("A",     SFact);
  MB->SetFactory("Smoother",    SmooSCFact);
  MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
  smootherPrototype->AddFactoryManager(MB,0);
  coarseSolverPrototype->AddFactoryManager(MB,0);


  ////////////////////////////////////////// define main factory manager
  FactoryManager M;
  M.SetFactory("A",            RebalancedAcFact);     // rebalance block AC Factory using importer
  M.SetFactory("P",            RebalancedBlockPFact); // rebalance prolongator using non-balanced Ac
  M.SetFactory("R",            RebalancedBlockRFact); // rebalance restrictor and null space using non-balanced Ac
  M.SetFactory("Smoother",     smootherFact);
  M.SetFactory("PreSmoother",     smootherFact);
  M.SetFactory("PostSmoother",     smootherFact);
  M.SetFactory("CoarseSolver", coarseSolverFact);

  H->Setup(M,0,maxLevels);

  /**out << std::endl;
  *out << "print content of multigrid levels:" << std::endl;

  Finest->print(*out);

  RCP<Level> coarseLevel = H->GetLevel(1);
  coarseLevel->print(*out);

  RCP<Level> coarseLevel2 = H->GetLevel(2);
  coarseLevel2->print(*out);*/

  RCP<MultiVector> xLsg = MultiVectorFactory::Build(xstridedfullmap,1);

  // Use AMG directly as an iterative method
#if 0
  {
    xLsg->putScalar( (SC) 0.0);

    // Epetra_Vector -> Xpetra::Vector
    RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

    // calculate initial (absolute) residual
    Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
    xRhs->norm2(norms);
    *out << "||x_0|| = " << norms[0] << std::endl;

    // apply ten multigrid iterations
    H->Iterate(*xRhs,100,*xLsg);


    // calculate and print residual
    RCP<MultiVector> xTmp = MultiVectorFactory::Build(xstridedfullmap,1);
    bOp->apply(*xLsg,*xTmp,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);
    xRhs->update((SC)-1.0,*xTmp,(SC)1.0);
    xRhs->norm2(norms);
    *out << "||x|| = " << norms[0] << std::endl;
  }
#endif

  //
  // Solve Ax = b using AMG as a preconditioner in AztecOO
  //
  {
    RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
    X->PutScalar(0.0);
    Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

    AztecOO aztecSolver(epetraProblem);
    aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

    MueLu::EpetraOperator aztecPrec(H);
    aztecSolver.SetPrecOperator(&aztecPrec);

    int maxIts = 50;
    double tol = 1e-8;

    aztecSolver.Iterate(maxIts, tol);
  }

#endif // end ifndef HAVE_LONG_LONG_INT
#endif // #if defined(HAVE_MPI) && defined(HAVE_MUELU_ZOLTAN) && defined(HAVE_MUELU_ISORROPIA)
   return EXIT_SUCCESS;
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using namespace MueLuTests;
  using namespace Teuchos;

  typedef Xpetra::StridedMap<int,int>        StridedMap;
  typedef Xpetra::StridedMapFactory<int,int> StridedMapFactory;

  oblackholestream blackhole;
  GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Comm<int> > comm = DefaultComm<int>::getComm();
    RCP<FancyOStream> out = fancyOStream(rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Time myTime("global");
    TimeMonitor MM(myTime);

#ifndef HAVE_XPETRA_INT_LONG_LONG
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

    // read in input parameters

    // default parameters
    LO BS_nSweeps = 100;
    Scalar BS_omega = 1.7;
    LO SC_nSweeps = 1;
    Scalar SC_omega = 1.0;
    int SC_bUseDirectSolver = 0;

    // Note: use --help to list available options.
    CommandLineProcessor clp(false);
    clp.setOption("BraessSarazin_sweeps",&BS_nSweeps,"number of sweeps with BraessSarazin smoother");
    clp.setOption("BraessSarazin_omega", &BS_omega,  "scaling factor for BraessSarazin smoother");
    clp.setOption("SchurComp_sweeps",    &SC_nSweeps,"number of sweeps for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_omega",     &SC_omega,  "damping parameter for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_solver",    &SC_bUseDirectSolver,  "if 1: use direct solver for SchurComp equation, otherwise use GaussSeidel smoother (=default)");

    switch (clp.parse(argc,argv)) {
      case CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case CommandLineProcessor::PARSE_ERROR:
      case CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    int globalNumDofs = 1500;  // used for the maps
    //int nDofsPerNode = 3;      // used for generating the fine level null-space

    // build strided maps
    // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(2);
    stridingInfo.push_back(1);

    /////////////////////////////////////// build strided maps
    // build strided maps:
    // xstridedfullmap: full map (velocity and pressure dof gids), continous
    // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
    // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
    Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
    RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
    RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
    RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

    /////////////////////////////////////// transform Xpetra::Map objects to Epetra
    // this is needed for our splitting routine
    const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
    RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
    RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

    /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

    // read in problem
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    *out << "Reading matrix market file" << std::endl;

    EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
    EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);

    RCP<Epetra_CrsMatrix> epA = rcp(ptrA);
    RCP<Epetra_Vector> epv = rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = rcp(ptrNS);


    /////////////////////////////////////// split system into 2x2 block system

    *out << "Split matrix into 2x2 block matrix" << std::endl;

    // split fullA into A11,..., A22
    RCP<Epetra_CrsMatrix> A11;
    RCP<Epetra_CrsMatrix> A12;
    RCP<Epetra_CrsMatrix> A21;
    RCP<Epetra_CrsMatrix> A22;

    if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
      *out << "Problem with splitting matrix"<< std::endl;

    /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

    // build Xpetra objects from Epetra_CrsMatrix objects
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA11 = rcp(new Xpetra::EpetraCrsMatrix(A11));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA12 = rcp(new Xpetra::EpetraCrsMatrix(A12));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA21 = rcp(new Xpetra::EpetraCrsMatrix(A21));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA22 = rcp(new Xpetra::EpetraCrsMatrix(A22));

    /////////////////////////////////////// generate MapExtractor object

    std::vector<RCP<const Xpetra::Map<LO,GO,Node> > > xmaps;

    xmaps.push_back(xstridedvelmap);
    xmaps.push_back(xstridedpremap);

    RCP<const Xpetra::MapExtractor<Scalar,LO,GO,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LO,GO>::Build(xstridedfullmap,xmaps);

    /////////////////////////////////////// build blocked transfer operator
    // using the map extractor
    RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO>(map_extractor,map_extractor,10));
    bOp->setMatrix(0,0,xA11);
    bOp->setMatrix(0,1,xA12);
    bOp->setMatrix(1,0,xA21);
    bOp->setMatrix(1,1,xA22);

    bOp->fillComplete();
    //////////////////////////////////////////////////////// finest Level
    RCP<MueLu::Level> Finest = rcp(new Level());
    Finest->setDefaultVerbLevel(VERB_NONE);
    Finest->Set("A",rcp_dynamic_cast<Matrix>(bOp));

    ///////////////////////////////////
    // Test Braess Sarazin Smoother as a solver

    *out << "Test: Creating Braess Sarazin Smoother" << std::endl;
    *out << "Test: Omega for BraessSarazin = " << BS_omega << std::endl;
    *out << "Test: Number of sweeps for BraessSarazin = " << BS_nSweeps << std::endl;
    *out << "Test: Omega for Schur Complement solver= " << SC_omega << std::endl;
    *out << "Test: Number of Schur Complement solver= " << SC_nSweeps << std::endl;
    *out << "Test: Setting up Braess Sarazin Smoother" << std::endl;

    // define BraessSarazin Smoother with BS_nSweeps and BS_omega as scaling factor
    // AFact_ = null (= default) for the 2x2 blocked operator
    RCP<BraessSarazinSmoother> BraessSarazinSm = rcp( new BraessSarazinSmoother() );
    BraessSarazinSm->SetParameter("Sweeps", Teuchos::ParameterEntry(BS_nSweeps));
    BraessSarazinSm->SetParameter("Damping factor", Teuchos::ParameterEntry(BS_omega));

    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(BraessSarazinSm) );

    /*note that omega must be the same in the SchurComplementFactory and in the BraessSarazinSmoother*/
    // define SchurComplement Factory
    // SchurComp gets a RCP to AFact_ which has to be the 2x2 blocked operator
    // and the scaling/damping factor omega that is used for BraessSarazin
    // It stores the resulting SchurComplement operator as "A" generated by the SchurComplementFactory
    // Instead of F^{-1} it uses the approximation \hat{F}^{-1} with \hat{F} = diag(F)
    RCP<SchurComplementFactory> SFact = rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", ParameterEntry(BS_omega));
    SFact->SetFactory("A",MueLu::NoFactory::getRCP());

    // define smoother/solver for BraessSarazin
    RCP<SmootherPrototype> smoProtoSC = null;
    if(SC_bUseDirectSolver != 1) {
      //Smoother Factory, using SFact as a factory for A
      std::string ifpackSCType;
      ParameterList ifpackSCList;
      ifpackSCList.set("relaxation: sweeps", SC_nSweeps );
      ifpackSCList.set("relaxation: damping factor", SC_omega );
      ifpackSCType = "RELAXATION";
      ifpackSCList.set("relaxation: type", "Gauss-Seidel");
      smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
      smoProtoSC->SetFactory("A", SFact);
    }
    else {
      ParameterList ifpackDSList;
      std::string ifpackDSType;
      smoProtoSC     = rcp( new DirectSolver(ifpackDSType,ifpackDSList) ); smoProtoSC->SetFactory("A", SFact);
    }

    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    // define temporary FactoryManager that is used as input for BraessSarazin smoother
    RCP<FactoryManager> MB = rcp(new FactoryManager());
    MB->SetFactory("A",                 SFact);         // SchurComplement operator for correction step (defined as "A")
    MB->SetFactory("Smoother",          SmooSCFact);    // solver/smoother for correction step
    MB->SetFactory("PreSmoother",               SmooSCFact);
    MB->SetFactory("PostSmoother",              SmooSCFact);
    MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
    BraessSarazinSm->AddFactoryManager(MB,0);  // set temporary factory manager in BraessSarazin smoother

    // setup main factory manager
    RCP<FactoryManager> M = rcp(new FactoryManager());
    M->SetFactory("A",               MueLu::NoFactory::getRCP()); // this is the 2x2 blocked operator
    M->SetFactory("Smoother",        smootherFact);               // BraessSarazin block smoother
    M->SetFactory("PreSmoother",     smootherFact);
    M->SetFactory("PostSmoother",    smootherFact);

    MueLu::SetFactoryManager SFMCoarse(Finest, M);
    Finest->Request(MueLu::TopSmootherFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>(M, "Smoother"));

    // call setup (= extract blocks and extract diagonal of F)
    BraessSarazinSm->Setup(*Finest);

    RCP<MultiVector> xtest = MultiVectorFactory::Build(xstridedfullmap,1);
    xtest->putScalar( (SC) 0.0);

    RCP<Vector> xR = rcp(new Xpetra::EpetraVector(epv));
    // calculate initial (absolute) residual
    Array<ScalarTraits<SC>::magnitudeType> norms(1);

    xR->norm2(norms);
    *out << "Test: ||x_0|| = " << norms[0] << std::endl;
    *out << "Test: Applying Braess-Sarazin Smoother" << std::endl;
    *out << "Test: START DATA" << std::endl;
    *out << "iterations\tVelocity_residual\tPressure_residual" << std::endl;
    BraessSarazinSm->Apply(*xtest,*xR);
    xtest->norm2(norms);
    *out << "Test: ||x_1|| = " << norms[0] << std::endl;

    Array<ScalarTraits<double>::magnitudeType> test = MueLu::Utils<double, int, int>::ResidualNorm(*bOp, *xtest, *xR);
    *out << "residual norm: " << test[0] << std::endl;

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int main(int argc, char *argv[]) {
#if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
  typedef double Scalar;
  typedef int LocalOrdinal;
  typedef int GlobalOrdinal;
  typedef LocalOrdinal LO;
  typedef GlobalOrdinal GO;
  typedef Xpetra::EpetraNode Node;
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    //
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Teuchos::Time myTime("global");
    Teuchos::TimeMonitor MM(myTime);

    // custom parameters
    LocalOrdinal maxLevels = 3;

    GlobalOrdinal maxCoarseSize=1; //FIXME clp doesn't like long long int

    int globalNumDofs = 1500;  // used for the maps
    int nDofsPerNode = 3;      // used for generating the fine level null-space

    // build strided maps
    // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(2);
    stridingInfo.push_back(1);

    /////////////////////////////////////// build strided maps
    // build strided maps:
    // xstridedfullmap: full map (velocity and pressure dof gids), continous
    // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
    // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
    Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
    RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
    RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
    RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

    /////////////////////////////////////// transform Xpetra::Map objects to Epetra
    // this is needed for AztecOO
    const RCP<const Epetra_Map> fullmap = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
    RCP<const Epetra_Map>       velmap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
    RCP<const Epetra_Map>       premap  = Teuchos::rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

    /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

    // read in problem
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    *out << "Reading matrix market file" << std::endl;

    EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
    EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);

    RCP<Epetra_CrsMatrix> epA = Teuchos::rcp(ptrA);
    RCP<Epetra_Vector> epv = Teuchos::rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = Teuchos::rcp(ptrNS);


    /////////////////////////////////////// split system into 2x2 block system

    *out << "Split matrix into 2x2 block matrix" << std::endl;

    // split fullA into A11,..., A22
    Teuchos::RCP<Epetra_CrsMatrix> A11;
    Teuchos::RCP<Epetra_CrsMatrix> A12;
    Teuchos::RCP<Epetra_CrsMatrix> A21;
    Teuchos::RCP<Epetra_CrsMatrix> A22;

    if(MueLuTests::SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
      *out << "Problem with splitting matrix"<< std::endl;

    /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

    // build Xpetra objects from Epetra_CrsMatrix objects
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA11 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GlobalOrdinal,Node>(A11));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA12 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GlobalOrdinal,Node>(A12));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA21 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GlobalOrdinal,Node>(A21));
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > xA22 = Teuchos::rcp(new Xpetra::EpetraCrsMatrixT<GlobalOrdinal,Node>(A22));

    /////////////////////////////////////// generate MapExtractor object

    std::vector<Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > > xmaps;
    xmaps.push_back(xstridedvelmap);
    xmaps.push_back(xstridedpremap);

    Teuchos::RCP<const Xpetra::MapExtractor<Scalar,LocalOrdinal,GlobalOrdinal,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(xstridedfullmap,xmaps);

    /////////////////////////////////////// build blocked transfer operator
    // using the map extractor
    Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>(map_extractor,map_extractor,10));
    bOp->setMatrix(0,0,xA11);
    bOp->setMatrix(0,1,xA12);
    bOp->setMatrix(1,0,xA21);
    bOp->setMatrix(1,1,xA22);

    bOp->fillComplete();

    //////////////////////////////////////////////////// create Hierarchy
    RCP<Hierarchy> H = rcp ( new Hierarchy() );
    H->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    //H->setDefaultVerbLevel(Teuchos::VERB_NONE);
    H->SetMaxCoarseSize(maxCoarseSize);

    //////////////////////////////////////////////////////// finest Level
    RCP<MueLu::Level> Finest = H->GetLevel();
    Finest->setDefaultVerbLevel(Teuchos::VERB_HIGH);
    Finest->Set("A",Teuchos::rcp_dynamic_cast<Matrix>(bOp));

    /////////////////////////////////////////////// define subblocks of A
    // make A11 block and A22 block available as variable "A" generated
    // by A11Fact and A22Fact
    RCP<SubBlockAFactory> A11Fact = rcp(new SubBlockAFactory());
    A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
    A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
    RCP<SubBlockAFactory> A22Fact = rcp(new SubBlockAFactory());
    A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
    A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

    ////////////////////////////////////////// prepare null space for A11
    RCP<MultiVector> nullspace11 = MultiVectorFactory::Build(xstridedvelmap, 2);  // this is a 2D standard null space

    for (int i=0; i<nDofsPerNode-1; ++i) {
      Teuchos::ArrayRCP<Scalar> nsValues = nullspace11->getDataNonConst(i);
      int numBlocks = nsValues.size() / (nDofsPerNode - 1);
      for (int j=0; j< numBlocks; ++j) {
        nsValues[j*(nDofsPerNode - 1) + i] = 1.0;
      }
    }

    Finest->Set("Nullspace1",nullspace11);

    ///////////////////////////////////////// define CoalesceDropFactory and Aggregation for A11
    // set up amalgamation for A11. Note: we're using a default null space factory (Teuchos::null)
    RCP<AmalgamationFactory> amalgFact11 = rcp(new AmalgamationFactory());
    amalgFact11->SetFactory("A", A11Fact);

    amalgFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);
    RCP<CoalesceDropFactory> dropFact11 = rcp(new CoalesceDropFactory());
    dropFact11->SetFactory("A", A11Fact);
    dropFact11->SetFactory("UnAmalgamationInfo", amalgFact11);
    dropFact11->setDefaultVerbLevel(Teuchos::VERB_EXTREME);
    RCP<UncoupledAggregationFactory> CoupledAggFact11 = rcp(new UncoupledAggregationFactory());
    CoupledAggFact11->SetFactory("Graph", dropFact11);
    CoupledAggFact11->SetMinNodesPerAggregate(9);
    CoupledAggFact11->SetMaxNeighAlreadySelected(2);
    CoupledAggFact11->SetOrdering("natural");
    //CoupledAggFact11->SetPhase3AggCreation(0.5);

    ///////////////////////////////////////// define transfer ops for A11
#if 0
    // use PG-AMG
    RCP<PgPFactory> P11Fact = rcp(new PgPFactory());

    RCP<GenericRFactory> R11Fact = rcp(new GenericRFactory());
    Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1",P11tentFact));

    Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));

    RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
    coarseMapFact11->setStridingData(stridingInfo);
    coarseMapFact11->setStridedBlockId(0);

    //////////////////////////////// define factory manager for (1,1) block
    RCP<FactoryManager> M11 = rcp(new FactoryManager());
    M11->SetFactory("A", A11Fact);
    M11->SetFactory("P", P11Fact);
    M11->SetFactory("R", R11Fact);
    M11->SetFactory("Aggregates", CoupledAggFact11);
    M11->SetFactory("UnAmalgamationInfo", amalgFact11);
    M11->SetFactory("Nullspace", nspFact11);
    // M11->SetFactory("Ptent", P11tentFact);
    M11->SetFactory("CoarseMap", coarseMapFact11);
#else
    RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());

    RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

    Teuchos::RCP<NullspaceFactory> nspFact11 = Teuchos::rcp(new NullspaceFactory("Nullspace1"));
    nspFact11->SetFactory("Nullspace1",P11Fact);

    RCP<CoarseMapFactory> coarseMapFact11 = Teuchos::rcp(new CoarseMapFactory());
    coarseMapFact11->setStridingData(stridingInfo);
    coarseMapFact11->setStridedBlockId(0);

    //////////////////////////////// define factory manager for (1,1) block
    RCP<FactoryManager> M11 = rcp(new FactoryManager());
    M11->SetFactory("A", A11Fact);
    M11->SetFactory("P", P11Fact);
    M11->SetFactory("R", R11Fact);
    M11->SetFactory("Aggregates", CoupledAggFact11);
    M11->SetFactory("UnAmalgamationInfo", amalgFact11);
    M11->SetFactory("Nullspace", nspFact11);
    // M11->SetFactory("Ptent", P11Fact);
    M11->SetFactory("CoarseMap", coarseMapFact11);
#endif
    M11->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

    ////////////////////////////////////////// prepare null space for A22
    RCP<MultiVector> nullspace22 = MultiVectorFactory::Build(xstridedpremap, 1);  // this is a 2D standard null space
    Teuchos::ArrayRCP<Scalar> nsValues22 = nullspace22->getDataNonConst(0);
    for (int j=0; j< nsValues22.size(); ++j) {
      nsValues22[j] = 1.0;
    }

    Finest->Set("Nullspace2",nullspace22);

    ///////////////////////////////////////// define transfer ops for A22
#if 0
    // use PGAMG
    RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory(A22Fact));
    RCP<TentativePFactory> P22tentFact = rcp(new TentativePFactory(CoupledAggFact11, amalgFact22));

    RCP<SaPFactory> P22Fact = rcp(new SaPFactory(P22tentFact));

    //RCP<GenericRFactory> R22Fact = rcp(new GenericRFactory(P22Fact));
    RCP<TransPFactory> R22Fact = rcp(new TransPFactory(P22Fact));

    Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2",P22tentFact));
    RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory(CoupledAggFact11, nspFact22));
    coarseMapFact22->setStridingData(stridingInfo);
    coarseMapFact22->setStridedBlockId(1);

    //////////////////////////////// define factory manager for (2,2) block
    RCP<FactoryManager> M22 = rcp(new FactoryManager());
    M22->SetFactory("A", A22Fact);
    M22->SetFactory("P", P22Fact);
    M22->SetFactory("R", R22Fact);
    M22->SetFactory("Aggregates", AggFact22);
    M22->SetFactory("Nullspace", nspFact22);
    M22->SetFactory("Ptent", P22tentFact);
    M22->SetFactory("CoarseMap", coarseMapFact22);
    M22->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

#else
    // use TentativePFactory
    RCP<AmalgamationFactory> amalgFact22 = rcp(new AmalgamationFactory());
    RCP<TentativePFactory> P22Fact = rcp(new TentativePFactory()); // check me (fed with A22) wrong column GIDS!!!

    RCP<TransPFactory> R22Fact = rcp(new TransPFactory());

    Teuchos::RCP<NullspaceFactory> nspFact22 = Teuchos::rcp(new NullspaceFactory("Nullspace2"));
    nspFact22->SetFactory("Nullspace2", P22Fact);
    RCP<CoarseMapFactory> coarseMapFact22 = Teuchos::rcp(new CoarseMapFactory());
    coarseMapFact22->setStridingData(stridingInfo);
    coarseMapFact22->setStridedBlockId(1);

    //////////////////////////////// define factory manager for (2,2) block
    RCP<FactoryManager> M22 = rcp(new FactoryManager());
    M22->SetFactory("A", A22Fact);
    M22->SetFactory("P", P22Fact);
    M22->SetFactory("R", R22Fact);
    M22->SetFactory("Aggregates", CoupledAggFact11);
    M22->SetFactory("Nullspace", nspFact22);
    M11->SetFactory("UnAmalgamationInfo", amalgFact22);
    M22->SetFactory("Ptent", P22Fact);
    M22->SetFactory("CoarseMap", coarseMapFact22);
    M22->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
#endif

    /////////////////////////////////////////// define blocked transfer ops
    RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory());
    PFact->AddFactoryManager(M11);
    PFact->AddFactoryManager(M22);

    RCP<GenericRFactory> RFact = rcp(new GenericRFactory());
    RFact->SetFactory("P", PFact);

    RCP<Factory> AcFact = rcp(new BlockedRAPFactory());
    AcFact->SetFactory("P", PFact);
    AcFact->SetFactory("R", RFact);

    *out << "Creating Simple Smoother" << std::endl;

    //////////////////////////////////////////////////////////////////////
    // Smoothers

    RCP<SubBlockAFactory> A00Fact = Teuchos::rcp(new SubBlockAFactory());
    A00Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A00Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
    A00Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
    std::string ifpackTypePredictSmoother;
    Teuchos::ParameterList ifpackListPredictSmoother;
    ifpackListPredictSmoother.set("relaxation: sweeps", (LocalOrdinal) 1);
    ifpackListPredictSmoother.set("relaxation: damping factor", (Scalar) 0.5);
    ifpackTypePredictSmoother = "RELAXATION";
    ifpackListPredictSmoother.set("relaxation: type", "Gauss-Seidel");
    RCP<SmootherPrototype> smoProtoPredict     = rcp( new TrilinosSmoother(ifpackTypePredictSmoother, ifpackListPredictSmoother, 0) );
    smoProtoPredict->SetFactory("A", A00Fact);
    RCP<SmootherFactory> SmooPredictFact = rcp( new SmootherFactory(smoProtoPredict) );

    RCP<FactoryManager> MPredict = rcp(new FactoryManager());
    MPredict->SetFactory("A",                 A00Fact);         // SchurComplement operator for correction step (defined as "A")
    MPredict->SetFactory("Smoother",          SmooPredictFact);    // solver/smoother for correction step
    MPredict->SetFactory("PreSmoother",               SmooPredictFact);
    MPredict->SetFactory("PostSmoother",              SmooPredictFact);
    MPredict->SetIgnoreUserData(true);               // always use data from factories defined in factory manager

    ////////////////////////////////////////////////
    // SchurComp
    // create SchurComp factory (SchurComplement smoother is provided by local FactoryManager)
    Teuchos::RCP<SchurComplementFactory> SFact = Teuchos::rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", Teuchos::ParameterEntry(0.8));
    SFact->SetParameter("lumping", Teuchos::ParameterEntry(true));
    SFact->SetFactory("A", MueLu::NoFactory::getRCP()); // 2x2 blocked operator

    // define SchurComplement solver
    std::string ifpackTypeSchurSmoother;
    ifpackTypeSchurSmoother = "RELAXATION";
    Teuchos::ParameterList ifpackListSchurSmoother;
    ifpackListSchurSmoother.set("relaxation: sweeps", (LocalOrdinal) 10);
    ifpackListSchurSmoother.set("relaxation: damping factor", (Scalar) 0.8);
    ifpackListSchurSmoother.set("relaxation: type", "Gauss-Seidel");
    RCP<SmootherPrototype> smoProtoSC     = rcp( new TrilinosSmoother(ifpackTypeSchurSmoother, ifpackListSchurSmoother, 0) );
    smoProtoSC->SetFactory("A", SFact); // explicitely use SchurComplement matrix as input for smoother
    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    // setup local factory manager for SchurComplementFactory
    Teuchos::RCP<FactoryManager> MSchur = Teuchos::rcp(new FactoryManager());
    MSchur->SetFactory("A", SFact);              // SchurCompFactory as generating factory for SchurComp equation
    MSchur->SetFactory("Smoother", SmooSCFact);
    MSchur->SetIgnoreUserData(true);


    /////////////////////////////////////////////////////
    // create smoother prototype

    RCP<SimpleSmoother> smootherPrototype     = rcp( new SimpleSmoother() );
    smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(3));
    smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(0.6));

    smootherPrototype->AddFactoryManager(MPredict,0);    // set temporary factory manager for prediction step
    smootherPrototype->AddFactoryManager(MSchur,1);      // set temporary factory manager for correction step
    smootherPrototype->SetFactory("A", MueLu::NoFactory::getRCP());

    /////////////////////////////////////////////////////
    // create smoother factories
    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );                // pre and postsmoothing with SIMPLE on the finest and intermedium levels
    RCP<SmootherFactory>   coarseSmootherFact    = rcp( new SmootherFactory(smootherPrototype, Teuchos::null) ); // only presmoothing on finest level (we do not want to run two SIMPLE iterations on the coarsest level)

    // main factory manager
    FactoryManager M;
    M.SetFactory("A",            AcFact);
    M.SetFactory("P",            PFact);
    M.SetFactory("R",            RFact);
    M.SetFactory("Smoother",     smootherFact); // TODO fix me
    M.SetFactory("CoarseSolver", coarseSmootherFact);

    //////////////////////////////////// setup multigrid

    H->Setup(M,0,maxLevels);

    Finest->print(*out);

    RCP<Level> coarseLevel = H->GetLevel(1);
    coarseLevel->print(*out);

    //RCP<Level> coarseLevel2 = H->GetLevel(2);
    //coarseLevel2->print(*out);

    RCP<MultiVector> xLsg = MultiVectorFactory::Build(xstridedfullmap,1);

    // Use AMG directly as an iterative method
#if 0
    {
      xLsg->putScalar( (SC) 0.0);

      // Epetra_Vector -> Xpetra::Vector
      RCP<Vector> xRhs = Teuchos::rcp(new Xpetra::EpetraVector(epv));

      // calculate initial (absolute) residual
      Teuchos::Array<Teuchos::ScalarTraits<SC>::magnitudeType> norms(1);
      xRhs->norm2(norms);
      *out << "||x_0|| = " << norms[0] << std::endl;

      // apply ten multigrid iterations
      H->Iterate(*xRhs,*xLsg,100);


      // calculate and print residual
      RCP<MultiVector> xTmp = MultiVectorFactory::Build(xstridedfullmap,1);
      bOp->apply(*xLsg,*xTmp,Teuchos::NO_TRANS,(SC)1.0,(SC)0.0);
      xRhs->update((SC)-1.0,*xTmp,(SC)1.0);
      xRhs->norm2(norms);
      *out << "||x|| = " << norms[0] << std::endl;
    }
#endif

    //
    // Solve Ax = b using AMG as a preconditioner in AztecOO
    //
    {
      RCP<Epetra_Vector> X = rcp(new Epetra_Vector(epv->Map()));
      X->PutScalar(0.0);
      Epetra_LinearProblem epetraProblem(epA.get(), X.get(), epv.get());

      AztecOO aztecSolver(epetraProblem);
      aztecSolver.SetAztecOption(AZ_solver, AZ_gmres);

      MueLu::EpetraOperator aztecPrec(H);
      aztecSolver.SetPrecOperator(&aztecPrec);

      int maxIts = 50;
      double tol = 1e-8;

      aztecSolver.Iterate(maxIts, tol);
    }

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
#else
  std::cout << "Epetra (and/or EpetraExt) are not available. Skip test." << std::endl;
  return EXIT_SUCCESS;
#endif // #if defined(HAVE_MUELU_SERIAL) && defined(HAVE_MUELU_EPETRA)
}
Example #4
0
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP; using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    //#ifndef HAVE_XPETRA_INT_LONG_LONG
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
    //#endif

    /**********************************************************************************/
    /* SET TEST PARAMETERS                                                            */
    /**********************************************************************************/
    // Note: use --help to list available options.
    Teuchos::CommandLineProcessor clp(false);

    Xpetra::Parameters xpetraParameters(clp);             // manage parameters of xpetra

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    //RCP<TimeMonitor> globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));

    xpetraParameters.check();

    Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

    if (comm->getRank() == 0) {
      std::cout << xpetraParameters;
      // TODO: print custom parameters // Or use paramList::print()!
    }

    /**********************************************************************************/
    /* CREATE INITIAL MATRIX                                                          */
    /**********************************************************************************/
    RCP<const Map> bigMap;
    RCP<const Map> map1;
    RCP<const Map> map2;
    GO numElements = 500;
    GO numElements1 = 400;
    GO numElements2 = 100;

    //bigMap = MapFactory::Build(Xpetra::UseEpetra, numElements,  0, comm); // ok this is the problem :-)
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(1);
    map1   = StridedMapFactory::Build(lib, numElements1, 0, stridingInfo, comm, -1);
    map2   = StridedMapFactory::Build(lib, numElements2, numElements1, stridingInfo, comm, -1);

    std::vector<GlobalOrdinal> localGids; // vector with all local GIDs on cur proc
    Teuchos::ArrayView< const GlobalOrdinal > map1eleList = map1->getNodeElementList(); // append all local gids from map1 and map2
    localGids.insert(localGids.end(), map1eleList.begin(), map1eleList.end());
    Teuchos::ArrayView< const GlobalOrdinal > map2eleList = map2->getNodeElementList();
    localGids.insert(localGids.end(), map2eleList.begin(), map2eleList.end());
    Teuchos::ArrayView<GlobalOrdinal> eleList(&localGids[0],localGids.size());
    bigMap = StridedMapFactory::Build(lib, numElements, eleList, 0, stridingInfo, comm); // create full big map (concatenation of map1 and map2)

    std::vector<Teuchos::RCP<const Map> > maps;
    maps.push_back(map1); maps.push_back(map2);

    Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LO, GO, Node> > mapExtractor = Xpetra::MapExtractorFactory<Scalar,LO,GO,Node>::Build(bigMap, maps);

    RCP<CrsMatrixWrap> Op11 = MueLuTests::GenerateProblemMatrix(map1,2,-1,-1);
    RCP<CrsMatrixWrap> Op22 = MueLuTests::GenerateProblemMatrix(map2,3,-2,-1);

    /*Op11->describe(*out,Teuchos::VERB_EXTREME);
      Op22->describe(*out,Teuchos::VERB_EXTREME);*/

    // build blocked operator
    Teuchos::RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = Teuchos::rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO>(mapExtractor,mapExtractor,10));

    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > crsMat11 = Op11->getCrsMatrix();
    Teuchos::RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > crsMat22 = Op22->getCrsMatrix();
    bOp->setMatrix(0,0,crsMat11);
    bOp->setMatrix(1,1,crsMat22);

    bOp->fillComplete();

    // build hierarchy
    Hierarchy H;
    H.SetMaxCoarseSize(50);
    RCP<Level> levelOne = H.GetLevel();
    levelOne->Set("A", Teuchos::rcp_dynamic_cast<Matrix>(bOp)); // set blocked operator

    RCP<SubBlockAFactory> A11Fact = Teuchos::rcp(new SubBlockAFactory());
    A11Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A11Fact->SetParameter("block row",Teuchos::ParameterEntry(0));
    A11Fact->SetParameter("block col",Teuchos::ParameterEntry(0));
    RCP<SubBlockAFactory> A22Fact = Teuchos::rcp(new SubBlockAFactory());
    A22Fact->SetFactory("A",MueLu::NoFactory::getRCP());
    A22Fact->SetParameter("block row",Teuchos::ParameterEntry(1));
    A22Fact->SetParameter("block col",Teuchos::ParameterEntry(1));

    RCP<TentativePFactory> P11Fact = rcp(new TentativePFactory());
    RCP<TransPFactory> R11Fact = rcp(new TransPFactory());

    RCP<TentativePFactory> P22TentFact = rcp(new TentativePFactory());
    RCP<PgPFactory> P22Fact = rcp(new PgPFactory());
    RCP<GenericRFactory> R22Fact = rcp(new GenericRFactory());

    std::string ifpackType;
    Teuchos::ParameterList ifpackList;
    ifpackList.set("relaxation: sweeps", (LO) 5);
    ifpackList.set("relaxation: damping factor", (SC) 1.0);
    ifpackType = "RELAXATION";
    ifpackList.set("relaxation: type", "Symmetric Gauss-Seidel");
    RCP<SmootherPrototype> smoProto11     = rcp( new TrilinosSmoother(ifpackType, ifpackList, 0) );
    smoProto11->SetFactory("A", A11Fact);
    RCP<SmootherPrototype> smoProto22     = rcp( new TrilinosSmoother(ifpackType, ifpackList, 0) );
    smoProto22->SetFactory("A", A22Fact);

    //RCP<SmootherPrototype> smoProto11     = rcp( new DirectSolver("", Teuchos::ParameterList(), A11Fact) );
    //RCP<SmootherPrototype> smoProto22     = rcp( new DirectSolver("", Teuchos::ParameterList(), A22Fact) );

    RCP<SmootherFactory> Smoo11Fact = rcp( new SmootherFactory(smoProto11) );
    RCP<SmootherFactory> Smoo22Fact = rcp( new SmootherFactory(smoProto22) );

    RCP<FactoryManager> M11 = rcp(new FactoryManager());
    M11->SetFactory("A", A11Fact);
    M11->SetFactory("P", P11Fact);
    M11->SetFactory("Ptent", P11Fact); //for Nullspace
    M11->SetFactory("R", R11Fact);
    M11->SetFactory("Smoother", Smoo11Fact);
    M11->SetIgnoreUserData(true);

    RCP<FactoryManager> M22 = rcp(new FactoryManager());
    M22->SetFactory("A", A22Fact);
    M22->SetFactory("P", P22Fact);
    M22->SetFactory("R", R22Fact);
    M22->SetFactory("Ptent", P22TentFact); //for both P22 and Nullspace
    M22->SetFactory("Smoother", Smoo22Fact);
    M22->SetIgnoreUserData(true);

    RCP<BlockedPFactory> PFact = rcp(new BlockedPFactory());
    PFact->AddFactoryManager(M11);
    PFact->AddFactoryManager(M22);

    RCP<GenericRFactory> RFact = rcp(new GenericRFactory());

    RCP<Factory> AcFact = rcp(new BlockedRAPFactory());

    // Smoothers
    RCP<BlockedGaussSeidelSmoother> smootherPrototype     = rcp( new BlockedGaussSeidelSmoother() );
    smootherPrototype->SetParameter("Sweeps", Teuchos::ParameterEntry(2));
    smootherPrototype->SetParameter("Damping factor", Teuchos::ParameterEntry(1.0));
    smootherPrototype->AddFactoryManager(M11,0);
    smootherPrototype->AddFactoryManager(M22,1);
    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(smootherPrototype) );

    // Coarse grid correction
    RCP<BlockedGaussSeidelSmoother> coarseSolverPrototype = rcp( new BlockedGaussSeidelSmoother() );
    coarseSolverPrototype->AddFactoryManager(M11,0);
    coarseSolverPrototype->AddFactoryManager(M22,1);
    RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );

    // main factory manager
    FactoryManager M;
    M.SetFactory("A",            AcFact);
    M.SetFactory("P",            PFact);
    M.SetFactory("R",            RFact);
    M.SetFactory("Smoother",     smootherFact); // TODO fix me
    M.SetFactory("CoarseSolver", coarseSolverFact);

    H.SetVerbLevel(MueLu::Test);
    H.Setup(M);

    std::cout << "main AcFact = " << AcFact.get() << std::endl;
    RCP<Level> l0 = H.GetLevel(0);
    RCP<Level> l1 = H.GetLevel(1);
    RCP<Level> l2 = H.GetLevel(2);

    l0->print(*out,Teuchos::VERB_EXTREME);
    l1->print(*out,Teuchos::VERB_EXTREME);
    l2->print(*out,Teuchos::VERB_EXTREME);

    // Define B
    RCP<Vector> X = VectorFactory::Build(bigMap,1);
    RCP<Vector> B = VectorFactory::Build(bigMap,1);
    X->setSeed(846930886);
    X->randomize();
    bOp->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0);

    // X = 0
    X->putScalar((SC) 0.0);

    LO nIts = 9;
    H.Iterate(*B, *X, nIts);

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}