int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);

  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int commRank = Teuchos::GlobalMPISession::getRank();

  Comm.Barrier(); // set breakpoint here to allow debugger attachment to other MPI processes than the one you automatically attached to.

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  // problem parameters:
  int spaceDim = 2;
  double Re = 40;
  bool steady = false;
  string problemChoice = "TaylorGreen";
  int numRefs = 1;
  int p = 2, delta_p = 2;
  int numXElems = 1;
  int numTElems = 1;
  int numSlabs = 1;
  bool useConformingTraces = false;
  string solverChoice = "KLU";
  string multigridStrategyString = "W-cycle";
  bool useCondensedSolve = false;
  bool useConjugateGradient = true;
  bool logFineOperator = false;
  // double solverTolerance = 1e-8;
  double nonlinearTolerance = 1e-5;
  // int maxLinearIterations = 10000;
  int maxNonlinearIterations = 20;
  int cgMaxIterations = 10000;
  double cgTol = 1e-8;
  bool computeL2Error = false;
  bool exportSolution = false;
  bool saveSolution = false;
  bool loadSolution = false;
  int loadRef = 0;
  int loadDirRef = 0;
  string norm = "Graph";
  string rootDir = ".";
  string tag="";
  cmdp.setOption("spaceDim", &spaceDim, "spatial dimension");
  cmdp.setOption("Re", &Re, "Re");
  cmdp.setOption("steady", "transient", &steady, "use steady incompressible Navier-Stokes");
  cmdp.setOption("problem", &problemChoice, "Kovasznay, TaylorGreen");
  cmdp.setOption("polyOrder",&p,"polynomial order for field variable u");
  cmdp.setOption("delta_p", &delta_p, "test space polynomial order enrichment");
  cmdp.setOption("numRefs",&numRefs,"number of refinements");
  cmdp.setOption("numXElems",&numXElems,"number of elements in x direction");
  cmdp.setOption("numTElems",&numTElems,"number of elements in t direction");
  cmdp.setOption("numSlabs",&numSlabs,"number of time slabs to use");
  cmdp.setOption("norm", &norm, "norm");
  cmdp.setOption("conformingTraces", "nonconformingTraces", &useConformingTraces, "use conforming traces");
  cmdp.setOption("solver", &solverChoice, "KLU, SuperLU, MUMPS, GMG-Direct, GMG-ILU, GMG-IC");
  cmdp.setOption("multigridStrategy", &multigridStrategyString, "Multigrid strategy: V-cycle, W-cycle, Full, or Two-level");
  cmdp.setOption("useCondensedSolve", "useStandardSolve", &useCondensedSolve);
  cmdp.setOption("CG", "GMRES", &useConjugateGradient);
  cmdp.setOption("logFineOperator", "dontLogFineOperator", &logFineOperator);
  // cmdp.setOption("solverTolerance", &solverTolerance, "iterative solver tolerance");
  cmdp.setOption("nonlinearTolerance", &nonlinearTolerance, "nonlinear solver tolerance");
  // cmdp.setOption("maxLinearIterations", &maxLinearIterations, "maximum number of iterations for linear solver");
  cmdp.setOption("maxNonlinearIterations", &maxNonlinearIterations, "maximum number of iterations for Newton solver");
  cmdp.setOption("exportDir", &rootDir, "export directory");
  cmdp.setOption("computeL2Error", "skipL2Error", &computeL2Error, "compute L2 error");
  cmdp.setOption("exportSolution", "skipExport", &exportSolution, "export solution to HDF5");
  cmdp.setOption("saveSolution", "skipSave", &saveSolution, "save mesh and solution to HDF5");
  cmdp.setOption("loadSolution", "skipLoad", &loadSolution, "load mesh and solution from HDF5");
  cmdp.setOption("loadRef", &loadRef, "load refinement number");
  cmdp.setOption("loadDirRef", &loadDirRef, "which refinement directory to load from");
  cmdp.setOption("tag", &tag, "output tag");

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
  {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  map<string, Teuchos::RCP<IncompressibleProblem>> problems;
  problems["ManufacturedSolution"] = Teuchos::rcp(new IncompressibleManufacturedSolution(steady, Re, numXElems));
  problems["Kovasznay"] = Teuchos::rcp(new KovasznayProblem(steady, Re));
  problems["TaylorGreen"] = Teuchos::rcp(new TaylorGreenProblem(steady, Re, numXElems, numSlabs));
  problems["Cylinder"] = Teuchos::rcp(new CylinderProblem(steady, Re, numSlabs));
  problems["SquareCylinder"] = Teuchos::rcp(new SquareCylinderProblem(steady, Re, numSlabs));
  Teuchos::RCP<IncompressibleProblem> problem = problems.at(problemChoice);

  // if (commRank == 0)
  // {
  //   Solver::printAvailableSolversReport();
  //   cout << endl;
  // }
  Teuchos::RCP<Time> totalTimer = Teuchos::TimeMonitor::getNewCounter("Total Time");
  totalTimer->start(true);

  for (; problem->currentStep() < problem->numSlabs(); problem->advanceStep())
  {
    if (problem->numSlabs() > 1 && commRank == 0 && !steady)
      cout << "Solving time slab [" << problem->currentT0() << ", " << problem->currentT1() << "]" << endl;

    ostringstream problemName;
    string isSteady = "Steady";
    if (!steady)
      isSteady = "Transient";
    problemName << isSteady << problemChoice << spaceDim << "D_slab" << problem->currentStep() << "_" << norm << "_" << Re << "_p" << p << "_" << solverChoice;
    if (tag != "")
      problemName << "_" << tag;
    ostringstream saveDir;
    saveDir << problemName.str() << "_ref" << loadRef;

    int success = mkdir((rootDir+"/"+saveDir.str()).c_str(), S_IRWXU | S_IRWXG);

    string dataFileLocation = rootDir + "/" + saveDir.str() + "/" + saveDir.str() + ".data";
    string exportName = saveDir.str();

    ostringstream loadDir;
    loadDir << problemName.str() << "_ref" << loadDirRef;
    string loadFilePrefix = "";
    if (loadSolution)
    {
      loadFilePrefix = rootDir + "/" + loadDir.str() + "/" + saveDir.str();
      if (commRank == 0) cout << "Loading previous solution " << loadFilePrefix << endl;
    }
    // ostringstream saveDir;
    // saveDir << problemName.str() << "_ref" << loadRef;
    string saveFilePrefix = rootDir + "/" + saveDir.str() + "/" + problemName.str();
    if (saveSolution && commRank == 0) cout << "Saving to " << saveFilePrefix << endl;

    Teuchos::ParameterList parameters;
    parameters.set("spaceDim", spaceDim);
    parameters.set("steady", steady);
    parameters.set("mu", 1./Re);
    parameters.set("useConformingTraces", useConformingTraces);
    parameters.set("fieldPolyOrder", p);
    parameters.set("delta_p", delta_p);
    parameters.set("numTElems", numTElems);
    parameters.set("norm", norm);
    parameters.set("savedSolutionAndMeshPrefix", loadFilePrefix);
    SpaceTimeIncompressibleFormulationPtr form = Teuchos::rcp(new SpaceTimeIncompressibleFormulation(problem, parameters));

    MeshPtr mesh = form->solutionUpdate()->mesh();
    vector<MeshPtr> meshesCoarseToFine;
    MeshPtr k0Mesh = Teuchos::rcp( new Mesh (mesh->getTopology()->deepCopy(), form->bf(), 1, delta_p) );
    meshesCoarseToFine.push_back(k0Mesh);
    meshesCoarseToFine.push_back(mesh);
    // mesh->registerObserver(k0Mesh);

    // Set up boundary conditions
    problem->setBCs(form);

    // Set up solution
    SolutionPtr solutionUpdate = form->solutionUpdate();
    SolutionPtr solutionBackground = form->solutionBackground();
    // dynamic_cast<AnalyticalIncompressibleProblem*>(problem.get())->projectExactSolution(solutionBackground);

    RefinementStrategyPtr refStrategy = form->getRefinementStrategy();
    Teuchos::RCP<HDF5Exporter> exporter;
    if (exportSolution)
      exporter = Teuchos::rcp(new HDF5Exporter(mesh,exportName, rootDir));

    Teuchos::RCP<Time> solverTime = Teuchos::TimeMonitor::getNewCounter("Solve Time");
    map<string, SolverPtr> solvers;
    solvers["KLU"] = Solver::getSolver(Solver::KLU, true);
#if defined(HAVE_AMESOS_SUPERLUDIST) || defined(HAVE_AMESOS2_SUPERLUDIST)
    solvers["SuperLUDist"] = Solver::getSolver(Solver::SuperLUDist, true);
#endif
#ifdef HAVE_AMESOS_MUMPS
    solvers["MUMPS"] = Solver::getSolver(Solver::MUMPS, true);
#endif
    bool useStaticCondensation = false;

    GMGOperator::MultigridStrategy multigridStrategy;
    if (multigridStrategyString == "Two-level")
    {
      multigridStrategy = GMGOperator::TWO_LEVEL;
    }
    else if (multigridStrategyString == "W-cycle")
    {
      multigridStrategy = GMGOperator::W_CYCLE;
    }
    else if (multigridStrategyString == "V-cycle")
    {
      multigridStrategy = GMGOperator::V_CYCLE;
    }
    else if (multigridStrategyString == "Full-V")
    {
      multigridStrategy = GMGOperator::FULL_MULTIGRID_V;
    }
    else if (multigridStrategyString == "Full-W")
    {
      multigridStrategy = GMGOperator::FULL_MULTIGRID_W;
    }
    else
    {
      TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "unrecognized multigrid strategy");
    }

    ofstream dataFile(dataFileLocation);
    dataFile << "ref\t " << "elements\t " << "dofs\t " << "energy\t " << "l2\t " << "solvetime\t" << "iterations\t " << endl;

    // {
    //   // ostringstream saveFile;
    //   // saveFile << saveFilePrefix << "_ref" << -1;
    //   // form->save(saveFile.str());
    //   exporter->exportSolution(solutionBackground, -1);
    //   if (commRank == 0)
    //     cout << "Done exporting" << endl;
    // }

    for (int refIndex=loadRef; refIndex <= numRefs; refIndex++)
    {
      double l2Update = 1e10;
      int iterCount = 0;
      solverTime->start(true);
      Teuchos::RCP<GMGSolver> gmgSolver;
      if (solverChoice[0] == 'G')
      {
        // gmgSolver = Teuchos::rcp( new GMGSolver(solutionUpdate, k0Mesh, maxLinearIterations, solverTolerance, Solver::getDirectSolver(true), useStaticCondensation));
        bool reuseFactorization = true;
        SolverPtr coarseSolver = Solver::getDirectSolver(reuseFactorization);
        gmgSolver = Teuchos::rcp(new GMGSolver(solutionUpdate, meshesCoarseToFine, cgMaxIterations, cgTol, multigridStrategy, coarseSolver, useCondensedSolve));
        gmgSolver->setUseConjugateGradient(useConjugateGradient);
        int azOutput = 20; // print residual every 20 CG iterations
        gmgSolver->setAztecOutput(azOutput);
        gmgSolver->gmgOperator()->setNarrateOnRankZero(logFineOperator,"finest GMGOperator");

        // gmgSolver->setAztecOutput(azOutput);
        // if (solverChoice == "GMG-Direct")
        //   gmgSolver->gmgOperator()->setSchwarzFactorizationType(GMGOperator::Direct);
        // if (solverChoice == "GMG-ILU")
        //   gmgSolver->gmgOperator()->setSchwarzFactorizationType(GMGOperator::ILU);
        // if (solverChoice == "GMG-IC")
        //   gmgSolver->gmgOperator()->setSchwarzFactorizationType(GMGOperator::IC);
      }
      while (l2Update > nonlinearTolerance && iterCount < maxNonlinearIterations)
      {
        if (solverChoice[0] == 'G')
          solutionUpdate->solve(gmgSolver);
        else
          solutionUpdate->condensedSolve(solvers[solverChoice]);

        // Compute L2 norm of update
        double u1L2Update = solutionUpdate->L2NormOfSolutionGlobal(form->u(1)->ID());
        double u2L2Update = solutionUpdate->L2NormOfSolutionGlobal(form->u(2)->ID());
        l2Update = sqrt(u1L2Update*u1L2Update + u2L2Update*u2L2Update);
        if (commRank == 0)
          cout << "Nonlinear Update:\t " << l2Update << endl;

        form->updateSolution();
        iterCount++;
      }
      double solveTime = solverTime->stop();

      double energyError = solutionUpdate->energyErrorTotal();
      double l2Error = 0;
      if (computeL2Error)
      {
        l2Error = problem->computeL2Error(form, solutionBackground);
      }
      if (commRank == 0)
      {
        cout << "Refinement: " << refIndex
          << " \tElements: " << mesh->numActiveElements()
          << " \tDOFs: " << mesh->numGlobalDofs()
          << " \tEnergy Error: " << energyError
          << " \tL2 Error: " << l2Error
          << " \tSolve Time: " << solveTime
          << " \tTotal Time: " << totalTimer->totalElapsedTime(true)
          // << " \tIteration Count: " << iterationCount
          << endl;
        dataFile << refIndex
          << " " << mesh->numActiveElements()
          << " " << mesh->numGlobalDofs()
          << " " << energyError
          << " " << l2Error
          << " " << solveTime
          << " " << totalTimer->totalElapsedTime(true)
          // << " " << iterationCount
          << endl;
      }

      if (exportSolution)
        exporter->exportSolution(solutionBackground, refIndex);

      if (saveSolution)
      {
        ostringstream saveFile;
        saveFile << saveFilePrefix << "_ref" << refIndex;
        form->save(saveFile.str());
      }

      if (refIndex != numRefs)
      {
        // k0Mesh = Teuchos::rcp( new Mesh (mesh->getTopology()->deepCopy(), form->bf(), 1, delta_p) );
        // meshesCoarseToFine.push_back(k0Mesh);
        refStrategy->refine();
        meshesCoarseToFine.push_back(mesh);
      }
    }
    dataFile.close();
  }
  double totalTime = totalTimer->stop();
  if (commRank == 0)
    cout << "Total time = " << totalTime << endl;

  return 0;
}
int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);

  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int commRank = Teuchos::GlobalMPISession::getRank();

  Comm.Barrier(); // set breakpoint here to allow debugger attachment to other MPI processes than the one you automatically attached to.

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  // problem parameters:
  int spaceDim = 2;
  double epsilon = 1e-2;
  int numRefs = 0;
  int k = 2, delta_k = 2;
  int numXElems = 1;
  bool useConformingTraces = true;
  string solverChoice = "KLU";
  string coarseSolverChoice = "KLU"; // often this beats SuperLU_Dist as coarse solver (true on BG/Q with 6000 3D elements on 256 ranks)
  double solverTolerance = 1e-6;
  string norm = "CoupledRobust";
  cmdp.setOption("spaceDim", &spaceDim, "spatial dimension");
  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");
  cmdp.setOption("numRefs",&numRefs,"number of refinements");
  cmdp.setOption("numXElems",&numXElems,"number of elements in x direction");
  cmdp.setOption("epsilon", &epsilon, "epsilon");
  cmdp.setOption("norm", &norm, "norm");
  cmdp.setOption("conformingTraces", "nonconformingTraces", &useConformingTraces, "use conforming traces");
  cmdp.setOption("coarseSolver", &coarseSolverChoice, "KLU, SuperLU");
  cmdp.setOption("solver", &solverChoice, "KLU, SuperLU, MUMPS, GMG-Direct, GMG-ILU, GMG-IC");
  cmdp.setOption("solverTolerance", &solverTolerance, "iterative solver tolerance");

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
  {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  FunctionPtr beta;
  FunctionPtr beta_x = Function::constant(1);
  FunctionPtr beta_y = Function::constant(2);
  FunctionPtr beta_z = Function::constant(3);
  if (spaceDim == 1)
    beta = beta_x;
  else if (spaceDim == 2)
    beta = Function::vectorize(beta_x, beta_y);
  else if (spaceDim == 3)
    beta = Function::vectorize(beta_x, beta_y, beta_z);

  ConvectionDiffusionFormulation form(spaceDim, useConformingTraces, beta, epsilon);

  // Define right hand side
  RHSPtr rhs = RHS::rhs();

  // Set up boundary conditions
  BCPtr bc = BC::bc();
  VarPtr uhat = form.uhat();
  VarPtr tc = form.tc();
  SpatialFilterPtr inflowX = SpatialFilter::matchingX(-1);
  SpatialFilterPtr inflowY = SpatialFilter::matchingY(-1);
  SpatialFilterPtr inflowZ = SpatialFilter::matchingZ(-1);
  SpatialFilterPtr outflowX = SpatialFilter::matchingX(1);
  SpatialFilterPtr outflowY = SpatialFilter::matchingY(1);
  SpatialFilterPtr outflowZ = SpatialFilter::matchingZ(1);
  FunctionPtr zero = Function::zero();
  FunctionPtr one = Function::constant(1);
  FunctionPtr x = Function::xn(1);
  FunctionPtr y = Function::yn(1);
  FunctionPtr z = Function::zn(1);
  if (spaceDim == 1)
  {
    bc->addDirichlet(tc, inflowX, -one);
    bc->addDirichlet(uhat, outflowX, zero);
  }
  if (spaceDim == 2)
  {
    bc->addDirichlet(tc, inflowX, -1*.5*(one-y));
    bc->addDirichlet(uhat, outflowX, zero);
    bc->addDirichlet(tc, inflowY, -2*.5*(one-x));
    bc->addDirichlet(uhat, outflowY, zero);
  }
  if (spaceDim == 3)
  {
    bc->addDirichlet(tc, inflowX, -1*.25*(one-y)*(one-z));
    bc->addDirichlet(uhat, outflowX, zero);
    bc->addDirichlet(tc, inflowY, -2*.25*(one-x)*(one-z));
    bc->addDirichlet(uhat, outflowY, zero);
    bc->addDirichlet(tc, inflowZ, -3*.25*(one-x)*(one-y));
    bc->addDirichlet(uhat, outflowZ, zero);
  }

  // Build mesh
  vector<double> x0 = vector<double>(spaceDim,-1.0);
  double width = 2.0;
  vector<double> dimensions;
  vector<int> elementCounts;
  for (int d=0; d<spaceDim; d++)
  {
    dimensions.push_back(width);
    elementCounts.push_back(numXElems);
  }
  MeshPtr mesh = MeshFactory::rectilinearMesh(form.bf(), dimensions, elementCounts, k+1, delta_k, x0);
  MeshPtr k0Mesh = Teuchos::rcp( new Mesh (mesh->getTopology()->deepCopy(), form.bf(), 1, delta_k) );
  mesh->registerObserver(k0Mesh);

  // Set up solution
  SolutionPtr soln = Solution::solution(form.bf(), mesh, bc, rhs, form.ip(norm));

  double threshold = 0.20;
  RefinementStrategy refStrategy(soln, threshold);

  ostringstream refName;
  refName << "confusion" << spaceDim << "D_" << norm << "_" << epsilon << "_k" << k << "_" << solverChoice;
  // HDF5Exporter exporter(mesh,refName.str());

  Teuchos::RCP<Time> solverTime = Teuchos::TimeMonitor::getNewCounter("Solve Time");

  if (commRank == 0)
    Solver::printAvailableSolversReport();
  map<string, SolverPtr> solvers;
  solvers["KLU"] = Solver::getSolver(Solver::KLU, true);
  SolverPtr superluSolver = Solver::getSolver(Solver::SuperLUDist, true);
  solvers["SuperLU"] = superluSolver;
  
  int maxIters = 2000;
  bool useStaticCondensation = false;
  int azOutput = 20; // print residual every 20 CG iterations

  ofstream dataFile(refName.str()+".txt");
  dataFile << "ref\t " << "elements\t " << "dofs\t " << "error\t " << "solvetime\t" << "iterations\t " << endl;
  for (int refIndex=0; refIndex <= numRefs; refIndex++)
  {
    solverTime->start(true);
    Teuchos::RCP<GMGSolver> gmgSolver;
    if (solverChoice[0] == 'G')
    {
      gmgSolver = Teuchos::rcp( new GMGSolver(soln, k0Mesh, maxIters, solverTolerance, solvers[coarseSolverChoice], useStaticCondensation));
      
      gmgSolver->setAztecOutput(azOutput);
      if (solverChoice == "GMG-Direct")
        gmgSolver->gmgOperator().setSchwarzFactorizationType(GMGOperator::Direct);
      if (solverChoice == "GMG-ILU")
        gmgSolver->gmgOperator().setSchwarzFactorizationType(GMGOperator::ILU);
      if (solverChoice == "GMG-IC")
        gmgSolver->gmgOperator().setSchwarzFactorizationType(GMGOperator::IC);
      soln->solve(gmgSolver);
    }
    else
      soln->condensedSolve(solvers[solverChoice]);
    double solveTime = solverTime->stop();

    double energyError = soln->energyErrorTotal();
    if (commRank == 0)
    {
      // if (refIndex > 0)
      // refStrategy.printRefinementStatistics(refIndex-1);
      if (solverChoice[0] == 'G')
      {
        cout << "Refinement: " << refIndex
             << " \tElements: " << mesh->numActiveElements()
             << " \tDOFs: " << mesh->numGlobalDofs()
             << " \tEnergy Error: " << energyError
             << " \tSolve Time: " << solveTime
             << " \tIteration Count: " << gmgSolver->iterationCount()
             << endl;
        dataFile << refIndex
                 << " " << mesh->numActiveElements()
                 << " " << mesh->numGlobalDofs()
                 << " " << energyError
                 << " " << solveTime
                 << " " << gmgSolver->iterationCount()
                 << endl;
      }
      else
      {
        cout << "Refinement: " << refIndex
             << " \tElements: " << mesh->numActiveElements()
             << " \tDOFs: " << mesh->numGlobalDofs()
             << " \tEnergy Error: " << energyError
             << " \tSolve Time: " << solveTime
             << endl;
        dataFile << refIndex
                 << " " << mesh->numActiveElements()
                 << " " << mesh->numGlobalDofs()
                 << " " << energyError
                 << " " << solveTime
                 << endl;
      }
    }

    // exporter.exportSolution(soln, refIndex);

    if (refIndex != numRefs)
      refStrategy.refine();
  }
  dataFile.close();

  return 0;
}