int main(int argc, char *argv[])
{
  int rank = 0;
#ifdef HAVE_MPI
  // TODO: figure out the right thing to do here...
  // may want to modify argc and argv before we make the following call:
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  rank=mpiSession.getRank();
#else
#endif
  bool useLineSearch = false;

  int pToAdd = 2; // for optimal test function approximation
  int pToAddForStreamFunction = 2;
  double nonlinearStepSize = 1.0;
  double dt = 0.5;
  double nonlinearRelativeEnergyTolerance = 0.015; // used to determine convergence of the nonlinear solution
  //  double nonlinearRelativeEnergyTolerance = 0.15; // used to determine convergence of the nonlinear solution
  double eps = 1.0/64.0; // width of ramp up to 1.0 for top BC;  eps == 0 ==> soln not in H1
  // epsilon above is chosen to match our initial 16x16 mesh, to avoid quadrature errors.
  //  double eps = 0.0; // John Evans's problem: not in H^1
  bool enforceLocalConservation = false;
  bool enforceOneIrregularity = true;
  bool reportPerCellErrors  = true;
  bool useMumps = true;

  int horizontalCells, verticalCells;

  int maxIters = 50; // for nonlinear steps

  vector<double> ReValues;

  // usage: polyOrder [numRefinements]
  // parse args:
  if (argc < 6)
  {
    cout << "Usage: NavierStokesCavityFlowContinuationFixedMesh fieldPolyOrder hCells vCells energyErrorGoal Re0 [Re1 ...]\n";
    return -1;
  }
  int polyOrder = atoi(argv[1]);
  horizontalCells = atoi(argv[2]);
  verticalCells = atoi(argv[3]);
  double energyErrorGoal = atof(argv[4]);
  for (int i=5; i<argc; i++)
  {
    ReValues.push_back(atof(argv[i]));
  }
  if (rank == 0)
  {
    cout << "L^2 order: " << polyOrder << endl;
    cout << "initial mesh size: " << horizontalCells << " x " << verticalCells << endl;
    cout << "energy error goal: " << energyErrorGoal << endl;
    cout << "Reynolds number values for continuation:\n";
    for (int i=0; i<ReValues.size(); i++)
    {
      cout << ReValues[i] << ", ";
    }
    cout << endl;
  }

  FieldContainer<double> quadPoints(4,2);

  quadPoints(0,0) = 0.0; // x1
  quadPoints(0,1) = 0.0; // y1
  quadPoints(1,0) = 1.0;
  quadPoints(1,1) = 0.0;
  quadPoints(2,0) = 1.0;
  quadPoints(2,1) = 1.0;
  quadPoints(3,0) = 0.0;
  quadPoints(3,1) = 1.0;

  // define meshes:
  int H1Order = polyOrder + 1;
  bool useTriangles = false;
  bool meshHasTriangles = useTriangles;

  double minL2Increment = 1e-8;

  // get variable definitions:
  VarFactory varFactory = VGPStokesFormulation::vgpVarFactory();
  u1 = varFactory.fieldVar(VGP_U1_S);
  u2 = varFactory.fieldVar(VGP_U2_S);
  sigma11 = varFactory.fieldVar(VGP_SIGMA11_S);
  sigma12 = varFactory.fieldVar(VGP_SIGMA12_S);
  sigma21 = varFactory.fieldVar(VGP_SIGMA21_S);
  sigma22 = varFactory.fieldVar(VGP_SIGMA22_S);
  p = varFactory.fieldVar(VGP_P_S);

  u1hat = varFactory.traceVar(VGP_U1HAT_S);
  u2hat = varFactory.traceVar(VGP_U2HAT_S);
  t1n = varFactory.fluxVar(VGP_T1HAT_S);
  t2n = varFactory.fluxVar(VGP_T2HAT_S);

  v1 = varFactory.testVar(VGP_V1_S, HGRAD);
  v2 = varFactory.testVar(VGP_V2_S, HGRAD);
  tau1 = varFactory.testVar(VGP_TAU1_S, HDIV);
  tau2 = varFactory.testVar(VGP_TAU2_S, HDIV);
  q = varFactory.testVar(VGP_Q_S, HGRAD);

  FunctionPtr u1_0 = Teuchos::rcp( new U1_0(eps) );
  FunctionPtr u2_0 = Teuchos::rcp( new U2_0 );
  FunctionPtr zero = Function::zero();
  ParameterFunctionPtr Re_param = ParameterFunction::parameterFunction(1);
  VGPNavierStokesProblem problem = VGPNavierStokesProblem(Re_param,quadPoints,
                                   horizontalCells,verticalCells,
                                   H1Order, pToAdd,
                                   u1_0, u2_0,  // BC for u
                                   zero, zero); // zero forcing function
  SolutionPtr solution = problem.backgroundFlow();
  SolutionPtr solnIncrement = problem.solutionIncrement();

  Teuchos::RCP<Mesh> mesh = problem.mesh();
  mesh->registerSolution(solution);
  mesh->registerSolution(solnIncrement);

  ///////////////////////////////////////////////////////////////////////////

  // define bilinear form for stream function:
  VarFactory streamVarFactory;
  VarPtr phi_hat = streamVarFactory.traceVar("\\widehat{\\phi}");
  VarPtr psin_hat = streamVarFactory.fluxVar("\\widehat{\\psi}_n");
  VarPtr psi_1 = streamVarFactory.fieldVar("\\psi_1");
  VarPtr psi_2 = streamVarFactory.fieldVar("\\psi_2");
  VarPtr phi = streamVarFactory.fieldVar("\\phi");
  VarPtr q_s = streamVarFactory.testVar("q_s", HGRAD);
  VarPtr v_s = streamVarFactory.testVar("v_s", HDIV);
  BFPtr streamBF = Teuchos::rcp( new BF(streamVarFactory) );
  streamBF->addTerm(psi_1, q_s->dx());
  streamBF->addTerm(psi_2, q_s->dy());
  streamBF->addTerm(-psin_hat, q_s);

  streamBF->addTerm(psi_1, v_s->x());
  streamBF->addTerm(psi_2, v_s->y());
  streamBF->addTerm(phi, v_s->div());
  streamBF->addTerm(-phi_hat, v_s->dot_normal());

  Teuchos::RCP<Mesh> streamMesh, overkillMesh;

  streamMesh = MeshFactory::buildQuadMesh(quadPoints, horizontalCells, verticalCells,
                                          streamBF, H1Order+pToAddForStreamFunction,
                                          H1Order+pToAdd+pToAddForStreamFunction, useTriangles);

  mesh->registerObserver(streamMesh); // will refine streamMesh in the same way as mesh.

  map<int, double> dofsToL2error; // key: numGlobalDofs, value: total L2error compared with overkill
  vector< VarPtr > fields;
  fields.push_back(u1);
  fields.push_back(u2);
  fields.push_back(sigma11);
  fields.push_back(sigma12);
  fields.push_back(sigma21);
  fields.push_back(sigma22);
  fields.push_back(p);

  if (rank == 0)
  {
    cout << "Starting mesh has " << horizontalCells << " x " << verticalCells << " elements and ";
    cout << mesh->numGlobalDofs() << " total dofs.\n";
    cout << "polyOrder = " << polyOrder << endl;
    cout << "pToAdd = " << pToAdd << endl;
    cout << "eps for top BC = " << eps << endl;

    if (useTriangles)
    {
      cout << "Using triangles.\n";
    }
    if (enforceLocalConservation)
    {
      cout << "Enforcing local conservation.\n";
    }
    else
    {
      cout << "NOT enforcing local conservation.\n";
    }
    if (enforceOneIrregularity)
    {
      cout << "Enforcing 1-irregularity.\n";
    }
    else
    {
      cout << "NOT enforcing 1-irregularity.\n";
    }
  }

  ////////////////////   CREATE BCs   ///////////////////////
  SpatialFilterPtr entireBoundary = Teuchos::rcp( new SpatialFilterUnfiltered );

  FunctionPtr u1_prev = Function::solution(u1,solution);
  FunctionPtr u2_prev = Function::solution(u2,solution);

  FunctionPtr u1hat_prev = Function::solution(u1hat,solution);
  FunctionPtr u2hat_prev = Function::solution(u2hat,solution);


  ////////////////////   SOLVE & REFINE   ///////////////////////

  FunctionPtr vorticity = Teuchos::rcp( new PreviousSolutionFunction(solution, - u1->dy() + u2->dx() ) );
  //  FunctionPtr vorticity = Teuchos::rcp( new PreviousSolutionFunction(solution,sigma12 - sigma21) );
  RHSPtr streamRHS = RHS::rhs();
  streamRHS->addTerm(vorticity * q_s);
  ((PreviousSolutionFunction*) vorticity.get())->setOverrideMeshCheck(true);
  ((PreviousSolutionFunction*) u1_prev.get())->setOverrideMeshCheck(true);
  ((PreviousSolutionFunction*) u2_prev.get())->setOverrideMeshCheck(true);

  BCPtr streamBC = BC::bc();
  //  streamBC->addDirichlet(psin_hat, entireBoundary, u0_cross_n);
  streamBC->addDirichlet(phi_hat, entireBoundary, zero);
  //  streamBC->addZeroMeanConstraint(phi);

  IPPtr streamIP = Teuchos::rcp( new IP );
  streamIP->addTerm(q_s);
  streamIP->addTerm(q_s->grad());
  streamIP->addTerm(v_s);
  streamIP->addTerm(v_s->div());
  SolutionPtr streamSolution = Teuchos::rcp( new Solution( streamMesh, streamBC, streamRHS, streamIP ) );

  if (enforceLocalConservation)
  {
    FunctionPtr zero = Function::zero();
    solution->lagrangeConstraints()->addConstraint(u1hat->times_normal_x() + u2hat->times_normal_y()==zero);
    solnIncrement->lagrangeConstraints()->addConstraint(u1hat->times_normal_x() + u2hat->times_normal_y()==zero);
  }

  if (true)
  {
    FunctionPtr u1_incr = Function::solution(u1, solnIncrement);
    FunctionPtr u2_incr = Function::solution(u2, solnIncrement);
    FunctionPtr sigma11_incr = Function::solution(sigma11, solnIncrement);
    FunctionPtr sigma12_incr = Function::solution(sigma12, solnIncrement);
    FunctionPtr sigma21_incr = Function::solution(sigma21, solnIncrement);
    FunctionPtr sigma22_incr = Function::solution(sigma22, solnIncrement);
    FunctionPtr p_incr = Function::solution(p, solnIncrement);

    FunctionPtr l2_incr = u1_incr * u1_incr + u2_incr * u2_incr + p_incr * p_incr
                          + sigma11_incr * sigma11_incr + sigma12_incr * sigma12_incr
                          + sigma21_incr * sigma21_incr + sigma22_incr * sigma22_incr;

    double energyThreshold = 0.20;
    Teuchos::RCP< RefinementStrategy > refinementStrategy = Teuchos::rcp( new RefinementStrategy( solnIncrement, energyThreshold ));

    for (int i=0; i<ReValues.size(); i++)
    {
      double Re = ReValues[i];
      Re_param->setValue(Re);
      if (rank==0) cout << "Solving with Re = " << Re << ":\n";
      double energyErrorTotal;
      do
      {
        double incr_norm;
        do
        {
          problem.iterate(useLineSearch);
          incr_norm = sqrt(l2_incr->integrate(problem.mesh()));
          if (rank==0)
          {
            cout << "\x1B[2K"; // Erase the entire current line.
            cout << "\x1B[0E"; // Move to the beginning of the current line.
            cout << "Iteration: " << problem.iterationCount() << "; L^2(incr) = " << incr_norm;
            flush(cout);
          }
        }
        while ((incr_norm > minL2Increment ) && (problem.iterationCount() < maxIters));
        if (rank==0) cout << endl;
        problem.setIterationCount(1); // 1 means reuse background flow (which we must, given that we want continuation in Re...)
        energyErrorTotal = solnIncrement->energyErrorTotal(); //solution->energyErrorTotal();
        if (energyErrorTotal > energyErrorGoal)
        {
          refinementStrategy->refine(false);
        }
        if (rank==0)
        {
          cout << "Energy error: " << energyErrorTotal << endl;
        }
      }
      while (energyErrorTotal > energyErrorGoal);
    }
  }

  double energyErrorTotal = solution->energyErrorTotal();
  double incrementalEnergyErrorTotal = solnIncrement->energyErrorTotal();
  if (rank == 0)
  {
    cout << "final mesh has " << mesh->numActiveElements() << " elements and " << mesh->numGlobalDofs() << " dofs.\n";
    cout << "energy error: " << energyErrorTotal << endl;
    cout << "  (Incremental solution's energy error is " << incrementalEnergyErrorTotal << ".)\n";
  }

  FunctionPtr u1_sq = u1_prev * u1_prev;
  FunctionPtr u_dot_u = u1_sq + (u2_prev * u2_prev);
  FunctionPtr u_mag = Teuchos::rcp( new SqrtFunction( u_dot_u ) );
  FunctionPtr u_div = Teuchos::rcp( new PreviousSolutionFunction(solution, u1->dx() + u2->dy() ) );
  FunctionPtr massFlux = Teuchos::rcp( new PreviousSolutionFunction(solution, u1hat->times_normal_x() + u2hat->times_normal_y()) );

  // check that the zero mean pressure is being correctly imposed:
  FunctionPtr p_prev = Teuchos::rcp( new PreviousSolutionFunction(solution,p) );
  double p_avg = p_prev->integrate(mesh);
  if (rank==0)
    cout << "Integral of pressure: " << p_avg << endl;

  // integrate massFlux over each element (a test):
  // fake a new bilinear form so we can integrate against 1
  VarPtr testOne = varFactory.testVar("1",CONSTANT_SCALAR);
  BFPtr fakeBF = Teuchos::rcp( new BF(varFactory) );
  LinearTermPtr massFluxTerm = massFlux * testOne;

  CellTopoPtrLegacy quadTopoPtr = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() ));
  DofOrderingFactory dofOrderingFactory(fakeBF);
  int fakeTestOrder = H1Order;
  DofOrderingPtr testOrdering = dofOrderingFactory.testOrdering(fakeTestOrder, *quadTopoPtr);

  int testOneIndex = testOrdering->getDofIndex(testOne->ID(),0);
  vector< ElementTypePtr > elemTypes = mesh->elementTypes(); // global element types
  map<int, double> massFluxIntegral; // cellID -> integral
  double maxMassFluxIntegral = 0.0;
  double totalMassFlux = 0.0;
  double totalAbsMassFlux = 0.0;
  double maxCellMeasure = 0;
  double minCellMeasure = 1;
  for (vector< ElementTypePtr >::iterator elemTypeIt = elemTypes.begin(); elemTypeIt != elemTypes.end(); elemTypeIt++)
  {
    ElementTypePtr elemType = *elemTypeIt;
    vector< ElementPtr > elems = mesh->elementsOfTypeGlobal(elemType);
    vector<GlobalIndexType> cellIDs;
    for (int i=0; i<elems.size(); i++)
    {
      cellIDs.push_back(elems[i]->cellID());
    }
    FieldContainer<double> physicalCellNodes = mesh->physicalCellNodesGlobal(elemType);
    BasisCachePtr basisCache = Teuchos::rcp( new BasisCache(elemType,mesh,polyOrder) ); // enrich by trial space order
    basisCache->setPhysicalCellNodes(physicalCellNodes,cellIDs,true); // true: create side caches
    FieldContainer<double> cellMeasures = basisCache->getCellMeasures();
    FieldContainer<double> fakeRHSIntegrals(elems.size(),testOrdering->totalDofs());
    massFluxTerm->integrate(fakeRHSIntegrals,testOrdering,basisCache,true); // true: force side evaluation
    //      cout << "fakeRHSIntegrals:\n" << fakeRHSIntegrals;
    for (int i=0; i<elems.size(); i++)
    {
      int cellID = cellIDs[i];
      // pick out the ones for testOne:
      massFluxIntegral[cellID] = fakeRHSIntegrals(i,testOneIndex);
    }
    // find the largest:
    for (int i=0; i<elems.size(); i++)
    {
      int cellID = cellIDs[i];
      maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
    }
    for (int i=0; i<elems.size(); i++)
    {
      int cellID = cellIDs[i];
      maxCellMeasure = max(maxCellMeasure,cellMeasures(i));
      minCellMeasure = min(minCellMeasure,cellMeasures(i));
      maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
      totalMassFlux += massFluxIntegral[cellID];
      totalAbsMassFlux += abs( massFluxIntegral[cellID] );
    }
  }
  if (rank==0)
  {
    cout << "largest mass flux: " << maxMassFluxIntegral << endl;
    cout << "total mass flux: " << totalMassFlux << endl;
    cout << "sum of mass flux absolute value: " << totalAbsMassFlux << endl;
    cout << "largest h: " << sqrt(maxCellMeasure) << endl;
    cout << "smallest h: " << sqrt(minCellMeasure) << endl;
    cout << "ratio of largest / smallest h: " << sqrt(maxCellMeasure) / sqrt(minCellMeasure) << endl;
  }
  if (rank == 0)
  {
    cout << "phi ID: " << phi->ID() << endl;
    cout << "psi1 ID: " << psi_1->ID() << endl;
    cout << "psi2 ID: " << psi_2->ID() << endl;

    cout << "streamMesh has " << streamMesh->numActiveElements() << " elements.\n";
    cout << "solving for approximate stream function...\n";
  }

  streamSolution->solve(useMumps);
  energyErrorTotal = streamSolution->energyErrorTotal();
  if (rank == 0)
  {
    cout << "...solved.\n";
    cout << "Stream mesh has energy error: " << energyErrorTotal << endl;
  }

  if (rank==0)
  {
    solution->writeToVTK("nsCavitySoln.vtk");
    if (! meshHasTriangles )
    {
      massFlux->writeBoundaryValuesToMATLABFile(solution->mesh(), "massFlux.dat");
      u_mag->writeValuesToMATLABFile(solution->mesh(), "u_mag.m");
      u_div->writeValuesToMATLABFile(solution->mesh(), "u_div.m");
      solution->writeFieldsToFile(u1->ID(), "u1.m");
      solution->writeFluxesToFile(u1hat->ID(), "u1_hat.dat");
      solution->writeFieldsToFile(u2->ID(), "u2.m");
      solution->writeFluxesToFile(u2hat->ID(), "u2_hat.dat");
      solution->writeFieldsToFile(p->ID(), "p.m");
      streamSolution->writeFieldsToFile(phi->ID(), "phi.m");

      streamSolution->writeFluxesToFile(phi_hat->ID(), "phi_hat.dat");
      streamSolution->writeFieldsToFile(psi_1->ID(), "psi1.m");
      streamSolution->writeFieldsToFile(psi_2->ID(), "psi2.m");
      vorticity->writeValuesToMATLABFile(streamMesh, "vorticity.m");

      FunctionPtr ten = Teuchos::rcp( new ConstantScalarFunction(10) );
      ten->writeBoundaryValuesToMATLABFile(solution->mesh(), "skeleton.dat");
      cout << "wrote files: u_mag.m, u_div.m, u1.m, u1_hat.dat, u2.m, u2_hat.dat, p.m, phi.m, vorticity.m.\n";
    }
    else
    {
      solution->writeToFile(u1->ID(), "u1.dat");
      solution->writeToFile(u2->ID(), "u2.dat");
      solution->writeToFile(u2->ID(), "p.dat");
      cout << "wrote files: u1.dat, u2.dat, p.dat\n";
    }

    FieldContainer<double> points = pointGrid(0, 1, 0, 1, 100);
    FieldContainer<double> pointData = solutionData(points, streamSolution, phi);
    GnuPlotUtil::writeXYPoints("phi_patch_navierStokes_cavity.dat", pointData);
    set<double> patchContourLevels = diagonalContourLevels(pointData,1);
    vector<string> patchDataPath;
    patchDataPath.push_back("phi_patch_navierStokes_cavity.dat");
    GnuPlotUtil::writeContourPlotScript(patchContourLevels, patchDataPath, "lidCavityNavierStokes.p");

    GnuPlotUtil::writeExactMeshSkeleton("lid_navierStokes_continuation_adaptive", mesh, 2);

    writePatchValues(0, 1, 0, 1, streamSolution, phi, "phi_patch.m");
    writePatchValues(0, .1, 0, .1, streamSolution, phi, "phi_patch_detail.m");
    writePatchValues(0, .01, 0, .01, streamSolution, phi, "phi_patch_minute_detail.m");
    writePatchValues(0, .001, 0, .001, streamSolution, phi, "phi_patch_minute_minute_detail.m");
  }

  return 0;
}
int main(int argc, char *argv[])
{
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
  int spaceDim = 2;
#ifdef HAVE_MPI
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args(argc, argv );
#endif
  int minPolyOrder = args.Input<int>("--minPolyOrder", "L^2 (field) minimum polynomial order",0);
  int maxPolyOrder = args.Input<int>("--maxPolyOrder", "L^2 (field) maximum polynomial order",1);
  int minLogElements = args.Input<int>("--minLogElements", "base 2 log of the minimum number of elements in one mesh direction", 0);
  int maxLogElements = args.Input<int>("--maxLogElements", "base 2 log of the maximum number of elements in one mesh direction", 4);
  double Re = args.Input<double>("--Re", "Reynolds number", 40);
//  bool outputStiffnessMatrix = args.Input<bool>("--writeFinalStiffnessToDisk", "write the final stiffness matrix to disk.", false);
  bool computeMaxConditionNumber = args.Input<bool>("--computeMaxConditionNumber", "compute the maximum Gram matrix condition number for final mesh.", false);
  int maxIters = args.Input<int>("--maxIters", "maximum number of Newton-Raphson iterations to take to try to match tolerance", 50);
  double minL2Increment = args.Input<double>("--NRtol", "Newton-Raphson tolerance, L^2 norm of increment", 1e-12);
  string normChoice = args.Input<string>("--norm", "norm choice: graph, compliantGraph, stokesGraph, or stokesCompliantGraph", "graph");

  bool useCondensedSolve = args.Input<bool>("--useCondensedSolve", "use static condensation", true);

  double dt = args.Input<double>("--timeStep", "time step (0 for none)", 0);

  double zmcRho = args.Input<double>("--zmcRho", "zero-mean constraint rho (stabilization parameter)", -1);

//  string replayFile = args.Input<string>("--replayFile", "file with refinement history to replay", "");
//  string saveFile = args.Input<string>("--saveReplay", "file to which to save refinement history", "");

  args.Process();

  int pToAdd = 2; // for optimal test function approximation
  bool useLineSearch = false;
  bool computeRelativeErrors = true; // we'll say false when one of the exact solution components is 0
  bool useEnrichedTraces = true; // enriched traces are the right choice, mathematically speaking
  BasisFactory::basisFactory()->setUseEnrichedTraces(useEnrichedTraces);

  // parse args:
  bool useTriangles = false, useGraphNorm = false, useCompliantNorm = false, useStokesCompliantNorm = false, useStokesGraphNorm = false;

  if (normChoice=="graph")
  {
    useGraphNorm = true;
  }
  else if (normChoice=="compliantGraph")
  {
    useCompliantNorm = true;
  }
  else if (normChoice=="stokesGraph")
  {
    useStokesGraphNorm = true;
  }
  else if (normChoice=="stokesCompliantGraph")
  {
    useStokesCompliantNorm = true;
  }
  else
  {
    if (rank==0) cout << "unknown norm choice.  Exiting.\n";
    exit(-1);
  }

  bool artificialTimeStepping = (dt > 0);

  if (rank == 0)
  {
    cout << "pToAdd = " << pToAdd << endl;
    cout << "useTriangles = "    << (useTriangles   ? "true" : "false") << "\n";
    cout << "norm = " << normChoice << endl;
  }

  // define Kovasznay domain:
  FieldContainer<double> quadPointsKovasznay(4,2);
  // domain from Cockburn Kanschat for Stokes:
  quadPointsKovasznay(0,0) = -0.5; // x1
  quadPointsKovasznay(0,1) =  0.0; // y1
  quadPointsKovasznay(1,0) =  1.5;
  quadPointsKovasznay(1,1) =  0.0;
  quadPointsKovasznay(2,0) =  1.5;
  quadPointsKovasznay(2,1) =  2.0;
  quadPointsKovasznay(3,0) = -0.5;
  quadPointsKovasznay(3,1) =  2.0;

  // Domain from Evans Hughes for Navier-Stokes:
//  quadPointsKovasznay(0,0) =  0.0; // x1
//  quadPointsKovasznay(0,1) = -0.5; // y1
//  quadPointsKovasznay(1,0) =  1.0;
//  quadPointsKovasznay(1,1) = -0.5;
//  quadPointsKovasznay(2,0) =  1.0;
//  quadPointsKovasznay(2,1) =  0.5;
//  quadPointsKovasznay(3,0) =  0.0;
//  quadPointsKovasznay(3,1) =  0.5;

//  double Re = 10.0;  // Cockburn Kanschat Stokes
//  double Re = 40.0; // Evans Hughes Navier-Stokes
//  double Re = 1000.0;

  string formulationTypeStr = "vgp";

  FunctionPtr u1_exact, u2_exact, p_exact;

  int numCellsFineMesh = 20; // for computing a zero-mean pressure
  int H1OrderFineMesh = 5;

//  VGPNavierStokesProblem(double Re, Intrepid::FieldContainer<double> &quadPoints, int horizontalCells,
//                         int verticalCells, int H1Order, int pToAdd,
//                         TFunctionPtr<double> u1_0, TFunctionPtr<double> u2_0, TFunctionPtr<double> f1, TFunctionPtr<double> f2,
//                         bool enrichVelocity = false, bool enhanceFluxes = false)
  
  FunctionPtr zero = Function::zero();
  VGPNavierStokesProblem zeroProblem = VGPNavierStokesProblem(Re, quadPointsKovasznay,
                                       numCellsFineMesh, numCellsFineMesh,
                                       H1OrderFineMesh, pToAdd,
                                       zero, zero, zero, useCompliantNorm || useStokesCompliantNorm);

  VarFactoryPtr varFactory = VGPStokesFormulation::vgpVarFactory();
  VarPtr u1_vgp = varFactory->fieldVar(VGP_U1_S);
  VarPtr u2_vgp = varFactory->fieldVar(VGP_U2_S);
  VarPtr sigma11_vgp = varFactory->fieldVar(VGP_SIGMA11_S);
  VarPtr sigma12_vgp = varFactory->fieldVar(VGP_SIGMA12_S);
  VarPtr sigma21_vgp = varFactory->fieldVar(VGP_SIGMA21_S);
  VarPtr sigma22_vgp = varFactory->fieldVar(VGP_SIGMA22_S);
  VarPtr p_vgp = varFactory->fieldVar(VGP_P_S);


  VarPtr v1_vgp = varFactory->testVar(VGP_V1_S, HGRAD);
  VarPtr v2_vgp = varFactory->testVar(VGP_V2_S, HGRAD);

//  if (rank==0) {
//    cout << "bilinear form with zero background flow:\n";
//    zeroProblem.bf()->printTrialTestInteractions();
//  }

  VGPStokesFormulation stokesForm(1/Re);

  NavierStokesFormulation::setKovasznay(Re, zeroProblem.mesh(), u1_exact, u2_exact, p_exact);

//  if (rank==0) cout << "Stokes bilinearForm: " << stokesForm.bf()->displayString() << endl;
  
  map< string, string > convergenceDataForMATLAB; // key: field file name

  for (int polyOrder = minPolyOrder; polyOrder <= maxPolyOrder; polyOrder++)
  {
    int H1Order = polyOrder + 1;

    int numCells1D = pow(2.0,minLogElements);

    if (rank==0)
    {
      cout << "L^2 order: " << polyOrder << endl;
      cout << "Re = " << Re << endl;
    }

    int kovasznayCubatureEnrichment = 20; // 20 is better than 10 for accurately measuring error on the coarser meshes.

    vector< VGPNavierStokesProblem > problems;
    do
    {
      VGPNavierStokesProblem problem = VGPNavierStokesProblem(Re,quadPointsKovasznay,
                                       numCells1D,numCells1D,
                                       H1Order, pToAdd,
                                       u1_exact, u2_exact, p_exact, useCompliantNorm || useStokesCompliantNorm);

      problem.backgroundFlow()->setCubatureEnrichmentDegree(kovasznayCubatureEnrichment);
      problem.solutionIncrement()->setCubatureEnrichmentDegree(kovasznayCubatureEnrichment);

      problem.backgroundFlow()->setZeroMeanConstraintRho(zmcRho);
      problem.solutionIncrement()->setZeroMeanConstraintRho(zmcRho);

      FunctionPtr dt_inv;

      if (artificialTimeStepping)
      {
        //    // LHS gets u_inc / dt:
        BFPtr bf = problem.bf();
        dt_inv = ParameterFunction::parameterFunction(1.0 / dt); //Teuchos::rcp( new ConstantScalarFunction(1.0 / dt, "\\frac{1}{dt}") );
        bf->addTerm(-dt_inv * u1_vgp, v1_vgp);
        bf->addTerm(-dt_inv * u2_vgp, v2_vgp);
        problem.setIP( bf->graphNorm() ); // graph norm has changed...
      }
      else
      {
        dt_inv = Function::zero();
      }

      problems.push_back(problem);
      if ( useCompliantNorm )
      {
        problem.setIP(problem.vgpNavierStokesFormulation()->scaleCompliantGraphNorm(dt_inv));
      }
      else if (useStokesCompliantNorm)
      {
        VGPStokesFormulation stokesForm(1.0); // pretend Re = 1 in the graph norm
        problem.setIP(stokesForm.scaleCompliantGraphNorm());
      }
      else if (useStokesGraphNorm)
      {
        VGPStokesFormulation stokesForm(1.0); // pretend Re = 1 in the graph norm
        problem.setIP(stokesForm.graphNorm());
      }
      else if (! useGraphNorm )
      {
        // then use the naive:
        problem.setIP(problem.bf()->naiveNorm(spaceDim));
      }
      if (rank==0)
      {
        cout << numCells1D << " x " << numCells1D << ": " << problem.mesh()->numGlobalDofs() << " dofs " << endl;
      }
      numCells1D *= 2;
    }
    while (pow(2.0,maxLogElements) >= numCells1D);

    // note that rhs and bilinearForm aren't really going to be right here, since they
    // involve a background flow which varies over the various problems...
    HConvergenceStudy study(problems[0].exactSolution(),
                            problems[0].mesh()->bilinearForm(),
                            problems[0].exactSolution()->rhs(),
                            problems[0].backgroundFlow()->bc(),
                            problems[0].bf()->graphNorm(),
                            minLogElements, maxLogElements,
                            H1Order, pToAdd, false, useTriangles, false);
    study.setReportRelativeErrors(computeRelativeErrors);
    study.setCubatureDegreeForExact(kovasznayCubatureEnrichment);

    vector< SolutionPtr > solutions;
    numCells1D = pow(2.0,minLogElements);
    for (vector< VGPNavierStokesProblem >::iterator problem = problems.begin();
         problem != problems.end(); problem++)
    {
      SolutionPtr solnIncrement = problem->solutionIncrement();
      FunctionPtr u1_incr = Function::solution(u1_vgp, solnIncrement);
      FunctionPtr u2_incr = Function::solution(u2_vgp, solnIncrement);
      FunctionPtr sigma11_incr = Function::solution(sigma11_vgp, solnIncrement);
      FunctionPtr sigma12_incr = Function::solution(sigma12_vgp, solnIncrement);
      FunctionPtr sigma21_incr = Function::solution(sigma21_vgp, solnIncrement);
      FunctionPtr sigma22_incr = Function::solution(sigma22_vgp, solnIncrement);
      FunctionPtr p_incr = Function::solution(p_vgp, solnIncrement);

//      LinearTermPtr rhsLT = problem->backgroundFlow()->rhs()->linearTerm();
//      if (rank==0) cout << "bilinearForm: " << problems[0].mesh()->bilinearForm()->displayString() << endl;
//      if (rank==0) cout << "RHS: " << rhsLT->displayString() << endl;
      
      FunctionPtr l2_incr = u1_incr * u1_incr + u2_incr * u2_incr + p_incr * p_incr
                            + sigma11_incr * sigma11_incr + sigma12_incr * sigma12_incr
                            + sigma21_incr * sigma21_incr + sigma22_incr * sigma22_incr;
      double weight = 1.0;
      do
      {
        weight = problem->iterate(useLineSearch, useCondensedSolve);

        LinearTermPtr rhsLT = problem->backgroundFlow()->rhs()->linearTerm();
        RieszRep rieszRep(problem->backgroundFlow()->mesh(), problem->backgroundFlow()->ip(), rhsLT);
        rieszRep.computeRieszRep();
        double costFunction = rieszRep.getNorm();
        double incr_norm = sqrt(l2_incr->integrate(problem->mesh()));

        if (rank==0)
        {
          cout << setprecision(6) << scientific;
          cout << "\x1B[2K"; // Erase the entire current line.
          cout << "\x1B[0E"; // Move to the beginning of the current line.
          cout << "Iteration: " << problem->iterationCount() << "; L^2(incr) = " << incr_norm;
          flush(cout);
//          cout << setprecision(6) << scientific;
//          cout << "Took " << weight << "-weighted step for " << numCells1D;
//          cout << " x " << numCells1D << " mesh: " << problem->iterationCount();
//          cout << setprecision(6) << fixed;
//          cout << " iterations; cost function " << costFunction << endl;
        }
      }
      while ((sqrt(l2_incr->integrate(problem->mesh())) > minL2Increment ) && (problem->iterationCount() < maxIters) && (weight != 0));

      if (rank==0) cout << endl;

      solutions.push_back( problem->backgroundFlow() );

      // set the IP to the naive norm for clearer comparison with the best approximation energy error
//      problem->backgroundFlow()->setIP(problem->bf()->naiveNorm());

//      double energyError = problem->backgroundFlow()->energyErrorTotal();
//      if (rank==0) {
//        cout << setprecision(6) << fixed;
//        cout << numCells1D << " x " << numCells1D << ": " << problem->iterationCount();
//        cout << " iterations; actual energy error " << energyError << endl;
//      }
      numCells1D *= 2;
    }

    study.setSolutions(solutions);


    for (int i=0; i<=maxLogElements-minLogElements; i++)
    {
      SolutionPtr bestApproximation = study.bestApproximations()[i];
      VGPNavierStokesFormulation nsFormBest = VGPNavierStokesFormulation(Re, bestApproximation);
      SpatialFilterPtr entireBoundary = Teuchos::rcp( new SpatialFilterUnfiltered ); // SpatialFilterUnfiltered returns true everywhere
      Teuchos::RCP<ExactSolution<double>> exact = nsFormBest.exactSolution(u1_exact, u2_exact, p_exact, entireBoundary);
//      bestApproximation->setIP( nsFormBest.bf()->naiveNorm() );
//      bestApproximation->setRHS( exact->rhs() );

      // use backgroundFlow's IP so that they're comparable
      IPPtr ip = problems[i].backgroundFlow()->ip();
      LinearTermPtr rhsLT = exact->rhs()->linearTerm();
      RieszRep rieszRep(bestApproximation->mesh(), ip, rhsLT);
      rieszRep.computeRieszRep();

      double bestCostFunction = rieszRep.getNorm();
      if (rank==0)
        cout << "best energy error (measured according to the actual solution's test space IP): " << bestCostFunction << endl;
    }

    map< int, double > energyNormWeights;
    if (useCompliantNorm)
    {
      energyNormWeights[u1_vgp->ID()] = 1.0; // should be 1/h
      energyNormWeights[u2_vgp->ID()] = 1.0; // should be 1/h
      energyNormWeights[sigma11_vgp->ID()] = Re; // 1/mu
      energyNormWeights[sigma12_vgp->ID()] = Re; // 1/mu
      energyNormWeights[sigma21_vgp->ID()] = Re; // 1/mu
      energyNormWeights[sigma22_vgp->ID()] = Re; // 1/mu
      if (Re < 1)   // assuming we're using the experimental small Re thing
      {
        energyNormWeights[p_vgp->ID()] = Re;
      }
      else
      {
        energyNormWeights[p_vgp->ID()] = 1.0;
      }
    }
    else
    {
      energyNormWeights[u1_vgp->ID()] = 1.0;
      energyNormWeights[u2_vgp->ID()] = 1.0;
      energyNormWeights[sigma11_vgp->ID()] = 1.0;
      energyNormWeights[sigma12_vgp->ID()] = 1.0;
      energyNormWeights[sigma21_vgp->ID()] = 1.0;
      energyNormWeights[sigma22_vgp->ID()] = 1.0;
      energyNormWeights[p_vgp->ID()] = 1.0;
    }
    vector<double> bestEnergy = study.weightedL2Error(energyNormWeights,true);
    vector<double> solnEnergy = study.weightedL2Error(energyNormWeights,false);

    map<int, double> velocityWeights;
    velocityWeights[u1_vgp->ID()] = 1.0;
    velocityWeights[u2_vgp->ID()] = 1.0;
    vector<double> bestVelocityError = study.weightedL2Error(velocityWeights,true);
    vector<double> solnVelocityError = study.weightedL2Error(velocityWeights,false);

    map<int, double> pressureWeight;
    pressureWeight[p_vgp->ID()] = 1.0;
    vector<double> bestPressureError = study.weightedL2Error(pressureWeight,true);
    vector<double> solnPressureError = study.weightedL2Error(pressureWeight,false);

    if (rank==0)
    {
      cout << setw(25);
      cout << "Solution Energy Error:" << setw(25) << "Best Energy Error:" << endl;
      cout << scientific << setprecision(1);
      for (int i=0; i<bestEnergy.size(); i++)
      {
        cout << setw(25) << solnEnergy[i] << setw(25) << bestEnergy[i] << endl;
      }
      cout << setw(25);
      cout << "Solution Velocity Error:" << setw(25) << "Best Velocity Error:" << endl;
      cout << scientific << setprecision(1);
      for (int i=0; i<bestEnergy.size(); i++)
      {
        cout << setw(25) << solnVelocityError[i] << setw(25) << bestVelocityError[i] << endl;
      }
      cout << setw(25);
      cout << "Solution Pressure Error:" << setw(25) << "Best Pressure Error:" << endl;
      cout << scientific << setprecision(1);
      for (int i=0; i<bestEnergy.size(); i++)
      {
        cout << setw(25) << solnPressureError[i] << setw(25) << bestPressureError[i] << endl;
      }

      vector< string > tableHeaders;
      vector< vector<double> > dataTable;
      vector< double > meshWidths;
      for (int i=minLogElements; i<=maxLogElements; i++)
      {
        double width = pow(2.0,i);
        meshWidths.push_back(width);
      }

      tableHeaders.push_back("mesh_width");
      dataTable.push_back(meshWidths);
      tableHeaders.push_back("soln_energy_error");
      dataTable.push_back(solnEnergy);
      tableHeaders.push_back("best_energy_error");
      dataTable.push_back(bestEnergy);

      tableHeaders.push_back("soln_velocity_error");
      dataTable.push_back(solnVelocityError);
      tableHeaders.push_back("best_velocity_error");
      dataTable.push_back(bestVelocityError);

      tableHeaders.push_back("soln_pressure_error");
      dataTable.push_back(solnPressureError);
      tableHeaders.push_back("best_pressure_error");
      dataTable.push_back(bestPressureError);

      ostringstream fileNameStream;
      fileNameStream << "nsStudy_Re" << Re << "k" << polyOrder << "_results.dat";

      DataIO::outputTableToFile(tableHeaders,dataTable,fileNameStream.str());
    }

    if (rank == 0)
    {
      cout << study.TeXErrorRateTable();
      vector<int> primaryVariables;
      stokesForm.primaryTrialIDs(primaryVariables);
      vector<int> fieldIDs,traceIDs;
      vector<string> fieldFileNames;
      stokesForm.trialIDs(fieldIDs,traceIDs,fieldFileNames);
      cout << "******** Best Approximation comparison: ********\n";
      cout << study.TeXBestApproximationComparisonTable(primaryVariables);

      ostringstream filePathPrefix;
      filePathPrefix << "navierStokes/" << formulationTypeStr << "_p" << polyOrder << "_velpressure";
      study.TeXBestApproximationComparisonTable(primaryVariables,filePathPrefix.str());
      filePathPrefix.str("");
      filePathPrefix << "navierStokes/" << formulationTypeStr << "_p" << polyOrder << "_all";
      study.TeXBestApproximationComparisonTable(fieldIDs);

      for (int i=0; i<fieldIDs.size(); i++)
      {
        int fieldID = fieldIDs[i];
        int traceID = traceIDs[i];
        string fieldName = fieldFileNames[i];
        ostringstream filePathPrefix;
        filePathPrefix << "navierStokes/" << fieldName << "_p" << polyOrder;
        bool writeMATLABplotData = false;
        study.writeToFiles(filePathPrefix.str(),fieldID,traceID, writeMATLABplotData);
      }

      for (int i=0; i<primaryVariables.size(); i++)
      {
        string convData = study.convergenceDataMATLAB(primaryVariables[i], minPolyOrder);
        cout << convData;
        convergenceDataForMATLAB[fieldFileNames[i]] += convData;
      }

      filePathPrefix.str("");
      filePathPrefix << "navierStokes/" << formulationTypeStr << "_p" << polyOrder << "_numDofs";
      cout << study.TeXNumGlobalDofsTable();
    }
    if (computeMaxConditionNumber)
    {
      for (int i=minLogElements; i<=maxLogElements; i++)
      {
        SolutionPtr soln = study.getSolution(i);
        ostringstream fileNameStream;
        fileNameStream << "nsStudy_maxConditionIPMatrix_" << i << ".dat";
        IPPtr ip = Teuchos::rcp( dynamic_cast< IP* >(soln->ip().get()), false );
        bool jacobiScalingTrue = true;
        double maxConditionNumber = MeshUtilities::computeMaxLocalConditionNumber(ip, soln->mesh(), jacobiScalingTrue, fileNameStream.str());
        if (rank==0)
        {
          cout << "max Gram matrix condition number estimate for logElements " << i << ": "  << maxConditionNumber << endl;
          cout << "putative worst-conditioned Gram matrix written to: " << fileNameStream.str() << "." << endl;
        }
      }
    }
  }
  if (rank==0)
  {
    ostringstream filePathPrefix;
    filePathPrefix << "navierStokes/" << formulationTypeStr << "_";
    for (map<string,string>::iterator convIt = convergenceDataForMATLAB.begin(); convIt != convergenceDataForMATLAB.end(); convIt++)
    {
      string fileName = convIt->first + ".m";
      string data = convIt->second;
      fileName = filePathPrefix.str() + fileName;
      ofstream fout(fileName.c_str());
      fout << data;
      fout.close();
    }
  }

}
Exemple #3
0
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
#endif
  int commRank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  // {
  // // 1D tests
  //   CellTopoPtrLegacy line_2 = Teuchos::rcp( new shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() ) );

  // // let's draw a line
  //   vector<double> v0 = makeVertex(0);
  //   vector<double> v1 = makeVertex(1);
  //   vector<double> v2 = makeVertex(2);

  //   vector< vector<double> > vertices;
  //   vertices.push_back(v0);
  //   vertices.push_back(v1);
  //   vertices.push_back(v2);

  //   vector<unsigned> line1VertexList;
  //   vector<unsigned> line2VertexList;
  //   line1VertexList.push_back(0);
  //   line1VertexList.push_back(1);
  //   line2VertexList.push_back(1);
  //   line2VertexList.push_back(2);

  //   vector< vector<unsigned> > elementVertices;
  //   elementVertices.push_back(line1VertexList);
  //   elementVertices.push_back(line2VertexList);

  //   vector< CellTopoPtrLegacy > cellTopos;
  //   cellTopos.push_back(line_2);
  //   cellTopos.push_back(line_2);
  //   MeshGeometryPtr meshGeometry = Teuchos::rcp( new MeshGeometry(vertices, elementVertices, cellTopos) );

  //   MeshTopologyPtr meshTopology = Teuchos::rcp( new MeshTopology(meshGeometry) );

  //   FunctionPtr x = Function::xn(1);
  //   FunctionPtr function = x;
  //   FunctionPtr fbdr = Function::restrictToCellBoundary(function);
  //   vector<FunctionPtr> functions;
  //   functions.push_back(function);
  //   functions.push_back(function);
  //   vector<string> functionNames;
  //   functionNames.push_back("function1");
  //   functionNames.push_back("function2");

  //   // {
  //   //     HDF5Exporter exporter(mesh, "function1", false);
  //   //     exporter.exportFunction(function, "function1");
  //   // }
  //   // {
  //   //     HDF5Exporter exporter(mesh, "boundary1", false);
  //   //     exporter.exportFunction(fbdr, "boundary1");
  //   // }
  //   // {
  //   //     HDF5Exporter exporter(mesh, "functions1", false);
  //   //     exporter.exportFunction(functions, functionNames);
  //   // }
  // }
  {
    // 2D tests
    // CellTopoPtrLegacy quad_4 = Teuchos::rcp( new shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() ) );
    // CellTopoPtrLegacy tri_3 = Teuchos::rcp( new shards::CellTopology(shards::getCellTopologyData<shards::Triangle<3> >() ) );
    CellTopoPtr quad_4 = CellTopology::quad();
    CellTopoPtr tri_3 = CellTopology::triangle();

    // let's draw a little house
    vector<double> v0 = makeVertex(-1,0);
    vector<double> v1 = makeVertex(1,0);
    vector<double> v2 = makeVertex(1,2);
    vector<double> v3 = makeVertex(-1,2);
    vector<double> v4 = makeVertex(0.0,3);

    vector< vector<double> > vertices;
    vertices.push_back(v0);
    vertices.push_back(v1);
    vertices.push_back(v2);
    vertices.push_back(v3);
    vertices.push_back(v4);

    vector<unsigned> quadVertexList;
    quadVertexList.push_back(0);
    quadVertexList.push_back(1);
    quadVertexList.push_back(2);
    quadVertexList.push_back(3);

    vector<unsigned> triVertexList;
    triVertexList.push_back(3);
    triVertexList.push_back(2);
    triVertexList.push_back(4);

    vector< vector<unsigned> > elementVertices;
    elementVertices.push_back(quadVertexList);
    elementVertices.push_back(triVertexList);

    // vector< CellTopoPtrLegacy > cellTopos;
    vector< CellTopoPtr> cellTopos;
    cellTopos.push_back(quad_4);
    cellTopos.push_back(tri_3);
    MeshGeometryPtr meshGeometry = Teuchos::rcp( new MeshGeometry(vertices, elementVertices, cellTopos) );

    MeshTopologyPtr meshTopology = Teuchos::rcp( new MeshTopology(meshGeometry) );

    ////////////////////   DECLARE VARIABLES   ///////////////////////
    // define test variables
    VarFactoryPtr vf = VarFactory::varFactory();
    VarPtr tau = vf->testVar("tau", HDIV);
    VarPtr v = vf->testVar("v", HGRAD);

    // define trial variables
    VarPtr uhat = vf->traceVar("uhat");
    VarPtr fhat = vf->fluxVar("fhat");
    VarPtr u = vf->fieldVar("u");
    VarPtr sigma = vf->fieldVar("sigma", VECTOR_L2);

    ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
    BFPtr bf = Teuchos::rcp( new BF(vf) );
    // tau terms:
    bf->addTerm(sigma, tau);
    bf->addTerm(u, tau->div());
    bf->addTerm(-uhat, tau->dot_normal());

    // v terms:
    bf->addTerm( sigma, v->grad() );
    bf->addTerm( fhat, v);

    ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
    IPPtr ip = bf->graphNorm();

    ////////////////////   SPECIFY RHS   ///////////////////////
    RHSPtr rhs = RHS::rhs();
    FunctionPtr one = Function::constant(1.0);
    rhs->addTerm( one * v );

    ////////////////////   CREATE BCs   ///////////////////////
    BCPtr bc = BC::bc();
    FunctionPtr zero = Function::zero();
    SpatialFilterPtr entireBoundary = SpatialFilter::allSpace();
    bc->addDirichlet(uhat, entireBoundary, zero);

    ////////////////////   SOLVE & REFINE   ///////////////////////

    // Output solution
    Intrepid::FieldContainer<GlobalIndexType> savedCellPartition;
    Teuchos::RCP<Epetra_FEVector> savedLHSVector;

    {
      ////////////////////   BUILD MESH   ///////////////////////
      int H1Order = 4, pToAdd = 2;
      Teuchos::RCP<Mesh> mesh = Teuchos::rcp( new Mesh (meshTopology, bf, H1Order, pToAdd) );

      Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );
      solution->solve(false);
      RefinementStrategy refinementStrategy( solution, 0.2);
      HDF5Exporter exporter(mesh, "Poisson");
      // exporter.exportSolution(solution, vf, 0, 2, cellIDToSubdivision(mesh, 4));
      exporter.exportSolution(solution, 0, 2);
      mesh->saveToHDF5("MeshSave.h5");
      solution->saveToHDF5("SolnSave.h5");
      solution->save("PoissonProblem");
      // int numRefs = 1;
      // for (int ref = 1; ref <= numRefs; ref++)
      // {
      //   refinementStrategy.refine(commRank==0);
      //   solution->solve(false);
      //   mesh->saveToHDF5("MeshSave.h5");
      //   solution->saveToHDF5("SolnSave.h5");
      //   exporter.exportSolution(solution, vf, ref, 2, cellIDToSubdivision(mesh, 4));
      // }
      mesh->globalDofAssignment()->getPartitions(savedCellPartition);
      savedLHSVector = solution->getLHSVector();
    }
    {
      SolutionPtr loadedSolution = Solution::load(bf, "PoissonProblem");
      HDF5Exporter exporter(loadedSolution->mesh(), "ProblemLoaded");
      // exporter.exportSolution(loadedSolution, vf, 0, 2, cellIDToSubdivision(loadedSolution->mesh(), 4));
      exporter.exportSolution(loadedSolution, 0, 2);
    }
    // {
    //   MeshPtr loadedMesh = MeshFactory::loadFromHDF5(bf, "Test0.h5");
    //   Teuchos::RCP<Solution> loadedSolution = Teuchos::rcp( new Solution(loadedMesh, bc, rhs, ip) );
    //   loadedSolution->solve(false);
    //   HDF5Exporter exporter(loadedMesh, "MeshLoaded");
    //   exporter.exportSolution(loadedSolution, vf, 0, 2, cellIDToSubdivision(loadedMesh, 4));
    // }
    {
      MeshPtr loadedMesh = MeshFactory::loadFromHDF5(bf, "MeshSave.h5");
      Intrepid::FieldContainer<GlobalIndexType> loadedCellPartition;
      loadedMesh->globalDofAssignment()->getPartitions(loadedCellPartition);
      if (loadedCellPartition.size() != savedCellPartition.size())
      {
        cout << "Error: the loaded partition has different size/shape than the saved one.\n";
        cout << "loaded size: " << loadedCellPartition.size() << "; saved size: " << savedCellPartition.size() << endl;
      }
      else
      {
        bool partitionsMatch = true;
        for (int i=0; i<loadedCellPartition.size(); i++)
        {
          if (loadedCellPartition[i] != savedCellPartition[i])
          {
            partitionsMatch = false;
            break;
          }
        }
        if (partitionsMatch) cout << "Saved and loaded cell partitions match!\n";
        else
        {
          cout << "Saved and loaded cell partitions differ.\n";
          cout << "saved:\n" << savedCellPartition;
          cout << "loaded:\n" << loadedCellPartition;
        }
      }
      Teuchos::RCP<Solution> loadedSolution = Teuchos::rcp( new Solution(loadedMesh, bc, rhs, ip) );
      loadedSolution->loadFromHDF5("SolnSave.h5");

      Teuchos::RCP<Epetra_FEVector> loadedLHSVector = loadedSolution->getLHSVector();
      if (loadedLHSVector->Map().MinLID() != savedLHSVector->Map().MinLID())
      {
        cout << "On rank " << commRank << ", loaded min LID = " << loadedLHSVector->Map().MinLID();
        cout << ", but saved min LID = " << savedLHSVector->Map().MinLID() << endl;
      }
      else if (loadedLHSVector->Map().MaxLID() != savedLHSVector->Map().MaxLID())
      {
        cout << "On rank " << commRank << ", loaded max LID = " << loadedLHSVector->Map().MaxLID();
        cout << ", but saved max LID = " << savedLHSVector->Map().MaxLID() << endl;
      }
      else
      {
        bool globalIDsMatch = true;
        for (int lid = loadedLHSVector->Map().MinLID(); lid <= loadedLHSVector->Map().MaxLID(); lid++)
        {
          if (loadedLHSVector->Map().GID(lid) != savedLHSVector->Map().GID(lid))
          {
            globalIDsMatch = false;
          }
        }
        if (! globalIDsMatch)
        {
          cout << "On rank " << commRank << ", loaded and saved solution vector maps differ in their global IDs.\n";
        }
        else
        {
          cout << "On rank " << commRank << ", loaded and saved solution vector maps match in their global IDs.\n";
        }

        bool entriesMatch = true;
        double tol = 1e-16;
        if (loadedLHSVector->Map().MinLID() != loadedLHSVector->Map().MaxLID())
        {
          for (int lid = loadedLHSVector->Map().MinLID(); lid <= loadedLHSVector->Map().MaxLID(); lid++)
          {
            double loadedValue = (*loadedLHSVector)[0][lid];
            double savedValue = (*savedLHSVector)[0][lid];
            double diff = abs( loadedValue - savedValue );
            if (diff > tol)
            {
              entriesMatch = false;
              cout << "On rank " << commRank << ", loaded and saved solution vectors differ in entry with lid " << lid;
              cout << "; loaded value = " << loadedValue << "; saved value = " << savedValue << ".\n";
            }
          }
          if (entriesMatch)
          {
            cout << "On rank " << commRank << ", loaded and saved solution vectors match!\n";
          }
          else
          {
            cout << "On rank " << commRank << ", loaded and saved solution vectors do not match.\n";
          }
        }
      }

      HDF5Exporter exporter(loadedMesh, "SolutionLoaded");
      // exporter.exportSolution(loadedSolution, vf, 0, 2, cellIDToSubdivision(loadedMesh, 4));
      exporter.exportSolution(loadedSolution, 0, 2);
    }
  }

  // {
  // // 3D tests
  //   CellTopoPtrLegacy hex = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData<shards::Hexahedron<8> >() ));

  // // let's draw a little box
  //   vector<double> v0 = makeVertex(0,0,0);
  //   vector<double> v1 = makeVertex(1,0,0);
  //   vector<double> v2 = makeVertex(1,1,0);
  //   vector<double> v3 = makeVertex(0,1,0);
  //   vector<double> v4 = makeVertex(0,0,1);
  //   vector<double> v5 = makeVertex(1,0,1);
  //   vector<double> v6 = makeVertex(1,1,1);
  //   vector<double> v7 = makeVertex(0,1,1);

  //   vector< vector<double> > vertices;
  //   vertices.push_back(v0);
  //   vertices.push_back(v1);
  //   vertices.push_back(v2);
  //   vertices.push_back(v3);
  //   vertices.push_back(v4);
  //   vertices.push_back(v5);
  //   vertices.push_back(v6);
  //   vertices.push_back(v7);

  //   vector<unsigned> hexVertexList;
  //   hexVertexList.push_back(0);
  //   hexVertexList.push_back(1);
  //   hexVertexList.push_back(2);
  //   hexVertexList.push_back(3);
  //   hexVertexList.push_back(4);
  //   hexVertexList.push_back(5);
  //   hexVertexList.push_back(6);
  //   hexVertexList.push_back(7);

  //   // vector<unsigned> triVertexList;
  //   // triVertexList.push_back(2);
  //   // triVertexList.push_back(3);
  //   // triVertexList.push_back(4);

  //   vector< vector<unsigned> > elementVertices;
  //   elementVertices.push_back(hexVertexList);
  //   // elementVertices.push_back(triVertexList);

  //   vector< CellTopoPtrLegacy > cellTopos;
  //   cellTopos.push_back(hex);
  //   // cellTopos.push_back(tri_3);
  //   MeshGeometryPtr meshGeometry = Teuchos::rcp( new MeshGeometry(vertices, elementVertices, cellTopos) );

  //   MeshTopologyPtr meshTopology = Teuchos::rcp( new MeshTopology(meshGeometry) );

  //   FunctionPtr x = Function::xn(1);
  //   FunctionPtr y = Function::yn(1);
  //   FunctionPtr z = Function::zn(1);
  //   FunctionPtr function = x + y + z;
  //   FunctionPtr fbdr = Function::restrictToCellBoundary(function);
  //   FunctionPtr vect = Function::vectorize(x, y, z);
  //   vector<FunctionPtr> functions;
  //   functions.push_back(function);
  //   functions.push_back(vect);
  //   vector<string> functionNames;
  //   functionNames.push_back("function");
  //   functionNames.push_back("vect");

  //   // {
  //   //     HDF5Exporter exporter(mesh, "function3", false);
  //   //     exporter.exportFunction(function, "function3");
  //   // }
  //   // {
  //   //     HDF5Exporter exporter(mesh, "boundary3", false);
  //   //     exporter.exportFunction(fbdr, "boundary3");
  //   // }
  //   // {
  //   //     HDF5Exporter exporter(mesh, "vect3", false);
  //   //     exporter.exportFunction(vect, "vect3");
  //   // }
  //   // {
  //   //     HDF5Exporter exporter(mesh, "functions3", false);
  //   //     exporter.exportFunction(functions, functionNames);
  //   // }
  // }
}
int main(int argc, char *argv[])
{
#ifdef ENABLE_INTEL_FLOATING_POINT_EXCEPTIONS
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";
  _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID);
#endif

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  const static double PI  = 3.141592653589793238462;

  bool useCondensedSolve = true; // condensed solve not yet compatible with minimum rule meshes

  int k = 2; // poly order for u in every direction, including temporal
  int numCells = 32; // in x, y
  int numTimeCells = 1;
  int numTimeSlabs = -1;
  int numFrames = 201;
  int delta_k = 3;   // test space enrichment: should be 3 for 3D
  int maxRefinements = 0; // maximum # of refinements on each time slab
  bool useMumpsIfAvailable  = true;
  bool useConstantConvection = false;
  double refinementTolerance = 0.1;

  int checkPointFrequency = 50; // output solution and mesh every 50 time slabs

  int previousSolutionTimeSlabNumber = -1;
  string previousSolutionFile = "";
  string previousMeshFile = "";

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");

  cmdp.setOption("numCells",&numCells,"number of cells in x and y directions");
  cmdp.setOption("numTimeCells",&numTimeCells,"number of time axis cells");
  cmdp.setOption("numTimeSlabs",&numTimeSlabs,"number of time slabs");
  cmdp.setOption("numFrames",&numFrames,"number of frames for export");

  cmdp.setOption("useConstantConvection", "useVariableConvection", &useConstantConvection);

  cmdp.setOption("useCondensedSolve", "useUncondensedSolve", &useCondensedSolve, "use static condensation to reduce the size of the global solve");
  cmdp.setOption("useMumps", "useKLU", &useMumpsIfAvailable, "use MUMPS (if available)");

  cmdp.setOption("refinementTolerance", &refinementTolerance, "relative error beyond which to stop refining");
  cmdp.setOption("maxRefinements", &maxRefinements, "maximum # of refinements on each time slab");

  cmdp.setOption("previousSlabNumber", &previousSolutionTimeSlabNumber, "time slab number of previous solution");
  cmdp.setOption("previousSolution", &previousSolutionFile, "file with previous solution");
  cmdp.setOption("previousMesh", &previousMeshFile, "file with previous mesh");

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
  {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  int H1Order = k + 1;

  VarFactory varFactory;
  // traces:
  VarPtr qHat = varFactory.fluxVar("\\widehat{q}");

  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);

  // test functions:
  VarPtr v = varFactory.testVar("v", HGRAD);

  FunctionPtr x = Function::xn(1);
  FunctionPtr y = Function::yn(1);

  FunctionPtr c;
  if (useConstantConvection)
  {
    c = Function::vectorize(Function::constant(0.5), Function::constant(0.5), Function::constant(1.0));
  }
  else
  {
    c = Function::vectorize(y-0.5, 0.5-x, Function::constant(1.0));
  }
  FunctionPtr n = Function::normal();

  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  bf->addTerm( u, c * v->grad());
  bf->addTerm(qHat, v);

  double width = 2.0, height = 2.0;
  int horizontalCells = numCells, verticalCells = numCells;
  int depthCells = numTimeCells;
  double x0 = -0.5;
  double y0 = -0.5;
  double t0 = 0;

  double totalTime = 2.0 * PI;

  vector<double> frameTimes;
  for (int i=0; i<numFrames; i++)
  {
    frameTimes.push_back((totalTime*i) / (numFrames-1));
  }

  if (numTimeSlabs==-1)
  {
    // want the number of grid points in temporal direction to be about 2000.  The temporal length is 2 * PI
    numTimeSlabs = (int) 2000 / k;
  }
  double timeLengthPerSlab = totalTime / numTimeSlabs;

  if (rank==0)
  {
    cout << "solving on " << numCells << " x " << numCells << " x " << numTimeCells << " mesh " << "of order " << k << ".\n";
    cout << "numTimeSlabs: " << numTimeSlabs << endl;
  }

  SpatialFilterPtr inflowFilter  = Teuchos::rcp( new InflowFilterForClockwisePlanarRotation (x0,x0+width,y0,y0+height,0.5,0.5));

  vector<double> dimensions;
  dimensions.push_back(width);
  dimensions.push_back(height);
  dimensions.push_back(timeLengthPerSlab);

  vector<int> elementCounts(3);
  elementCounts[0] = horizontalCells;
  elementCounts[1] = verticalCells;
  elementCounts[2] = depthCells;

  vector<double> origin(3);
  origin[0] = x0;
  origin[1] = y0;
  origin[2] = t0;

  Teuchos::RCP<Solver> solver = Teuchos::rcp( new KluSolver );

#ifdef HAVE_AMESOS_MUMPS
  if (useMumpsIfAvailable) solver = Teuchos::rcp( new MumpsSolver );
#endif

//  double errorPercentage = 0.5; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
//  Teuchos::RCP<RefinementStrategy> refinementStrategy;
//  refinementStrategy = Teuchos::rcp( new ErrorPercentageRefinementStrategy( soln, errorPercentage ));

  if (maxRefinements != 0)
  {
    cout << "Warning: maxRefinements is not 0, but the slice exporter implicitly assumes there won't be any refinements.\n";
  }

  MeshPtr mesh;

  MeshPtr prevMesh;
  SolutionPtr prevSoln;

  mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

  if (rank==0) cout << "Initial mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";

  FunctionPtr sideParity = Function::sideParity();

  int lastFrameOutputted = -1;

  SolutionPtr soln;

  IPPtr ip;
  ip = bf->graphNorm();

  FunctionPtr u0 = Teuchos::rcp( new Cone_U0(0.0, 0.25, 0.1, 1.0, false) );

  BCPtr bc = BC::bc();
  bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
  bc->addDirichlet(qHat, SpatialFilter::matchingZ(t0), u0);

  MeshPtr initialMesh = mesh;

  int startingSlabNumber;
  if (previousSolutionTimeSlabNumber != -1)
  {
    startingSlabNumber = previousSolutionTimeSlabNumber + 1;

    if (rank==0) cout << "Loading mesh from " << previousMeshFile << endl;

    prevMesh = MeshFactory::loadFromHDF5(bf, previousMeshFile);
    prevSoln = Solution::solution(mesh, bc, RHS::rhs(), ip); // include BC and IP objects for sake of condensed dof interpreter setup...
    prevSoln->setUseCondensedSolve(useCondensedSolve);

    if (rank==0) cout << "Loading solution from " << previousSolutionFile << endl;
    prevSoln->loadFromHDF5(previousSolutionFile);

    double tn = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_prev = Function::solution(qHat, prevSoln);
    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, prevMesh, mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    double t_slab_final = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    int frameOrdinal = 0;

    while (frameTimes[frameOrdinal] < t_slab_final)
    {
      lastFrameOutputted = frameOrdinal++;
    }
  }
  else
  {
    startingSlabNumber = 0;
  }


#ifdef HAVE_EPETRAEXT_HDF5
  ostringstream dir_name;
  dir_name << "spacetime_slice_convectingCone_k" << k << "_startSlab" << startingSlabNumber;
  map<GlobalIndexType,GlobalIndexType> cellMap;
  MeshPtr meshSlice = MeshTools::timeSliceMesh(initialMesh, 0, cellMap, H1Order);
  HDF5Exporter sliceExporter(meshSlice,dir_name.str());
#endif

  soln = Solution::solution(mesh, bc, RHS::rhs(), ip);
  soln->setUseCondensedSolve(useCondensedSolve);

  for(int timeSlab = startingSlabNumber; timeSlab<numTimeSlabs; timeSlab++)
  {
    double energyThreshold = 0.2; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
    Teuchos::RCP<RefinementStrategy> refinementStrategy;
    refinementStrategy = Teuchos::rcp( new RefinementStrategy( soln, energyThreshold ));

    FunctionPtr u_spacetime = Function::solution(u, soln);

    double relativeEnergyError;
    int refNumber = 0;

//    {
//      // DEBUGGING: just to try running the time slicing:
//      double t_slab_final = (timeStep+1) * timeLengthPerSlab;
//      int frameOrdinal = lastFrameOutputted + 1;
//      while (frameTimes[frameOrdinal] < t_slab_final) {
//        FunctionPtr u_spacetime = Function::solution(u, soln);
//        ostringstream dir_name;
//        dir_name << "spacetime_slice_convectingCone_k" << k;
//        MeshTools::timeSliceExport(dir_name.str(), mesh, u_spacetime, frameTimes[frameOrdinal], "u_slice");
//
//        cout << "Exported frame " << frameOrdinal << ", t=" << frameTimes[frameOrdinal] << endl;
//        frameOrdinal++;
//      }
//    }

    do
    {
      soln->solve(solver);
      soln->reportTimings();

#ifdef HAVE_EPETRAEXT_HDF5
      ostringstream dir_name;
      dir_name << "spacetime_convectingCone_k" << k << "_t" << timeSlab;
      HDF5Exporter exporter(soln->mesh(),dir_name.str());
      exporter.exportSolution(soln, varFactory);

      if (rank==0) cout << "Exported HDF solution for time slab to directory " << dir_name.str() << endl;
//      string u_name = "u_spacetime";
//      exporter.exportFunction(u_spacetime, u_name);

      ostringstream file_name;
      file_name << dir_name.str();

      bool saveSolutionAndMeshForThisSlab = ((timeSlab + 1) % checkPointFrequency == 0); // +1 so that first output is nth, not first
      if (saveSolutionAndMeshForThisSlab)
      {
        dir_name << ".soln";
        soln->saveToHDF5(dir_name.str());
        if (rank==0) cout << endl << "wrote " << dir_name.str() << endl;

        file_name << ".mesh";
        soln->mesh()->saveToHDF5(file_name.str());
      }
#endif
      FunctionPtr u_soln = Function::solution(u, soln);

      double solnNorm = u_soln->l2norm(mesh);

      double energyError = soln->energyErrorTotal();
      relativeEnergyError = energyError / solnNorm;

      if (rank==0)
      {
        cout << "Relative energy error for refinement " << refNumber++ << ": " << relativeEnergyError << endl;
      }

      if ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements))
      {
        refinementStrategy->refine();
        if (rank==0)
        {
          cout << "After refinement, mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";
        }
      }

    }
    while ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements));

    double t_slab_final = (timeSlab+1) * timeLengthPerSlab;
    int frameOrdinal = lastFrameOutputted + 1;
    vector<double> timesForSlab;
    while (frameTimes[frameOrdinal] < t_slab_final)
    {
      double t = frameTimes[frameOrdinal];
      if (rank==0) cout << "exporting t=" << t << " on slab " << timeSlab << endl;
      FunctionPtr sliceFunction = MeshTools::timeSliceFunction(mesh, cellMap, u_spacetime, t);
      sliceExporter.exportFunction(sliceFunction, "u_slice", t);
      lastFrameOutputted = frameOrdinal++;
    }

    // set up next mesh/solution:
    FunctionPtr q_prev = Function::solution(qHat, soln);

//    cout << "Error in setup of q_prev: simple solution doesn't know about the map from the previous time slab to the current one. (TODO: fix this.)\n";

    double tn = (timeSlab+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, soln->mesh(), mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    // IMPORTANT: now that we are ready to step to next soln, nullify BC.  If we do not do this, then we have an RCP chain
    //            that extends back to the first time slab, effectively a memory leak.
    soln->setBC(BC::bc());

    soln = Solution::solution(mesh, bc, RHS::rhs(), ip);
    soln->setUseCondensedSolve(useCondensedSolve);
  }

  return 0;
}