int main(int argc, char *argv[]) {

#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int rank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  int nCells = args.Input<int>("--nCells", "num cells",2);
  int numSteps = args.Input<int>("--numSteps", "num NR steps",20);

  int polyOrder = 0;
  
  // define our manufactured solution or problem bilinear form:
  bool useTriangles = false;
  
  int pToAdd = 1;

  args.Process();

  int H1Order = polyOrder + 1;
  
  ////////////////////////////////////////////////////////////////////
  // DEFINE VARIABLES 
  ////////////////////////////////////////////////////////////////////
  
  // new-style bilinear form definition
  VarFactory varFactory;
  VarPtr fn = varFactory.fluxVar("\\widehat{\\beta_n_u}");
  VarPtr u = varFactory.fieldVar("u");
  
  VarPtr v = varFactory.testVar("v",HGRAD);
  BFPtr bf = Teuchos::rcp( new BF(varFactory) ); // initialize bilinear form
  
  ////////////////////////////////////////////////////////////////////
  // CREATE MESH 
  ////////////////////////////////////////////////////////////////////
  
  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = MeshUtilities::buildUnitQuadMesh(nCells , bf, H1Order, H1Order+pToAdd);
  
  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////
  BCPtr nullBC = Teuchos::rcp((BC*)NULL); RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL); IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  SolutionPtr solnPerturbation = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  
  vector<double> e1(2),e2(2);
  e1[0] = 1; e2[1] = 1;
  
  FunctionPtr u_prev = Teuchos::rcp( new PreviousSolutionFunction(backgroundFlow, u) );
  FunctionPtr beta = e1 * u_prev + Teuchos::rcp( new ConstantVectorFunction( e2 ) );
  
  ////////////////////////////////////////////////////////////////////
  // DEFINE BILINEAR FORM
  ////////////////////////////////////////////////////////////////////
  
  // v:
  bf->addTerm( -u, beta * v->grad());
  bf->addTerm( fn, v);

  ////////////////////////////////////////////////////////////////////
  // DEFINE RHS
  ////////////////////////////////////////////////////////////////////

  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr u_prev_squared_div2 = 0.5 * u_prev * u_prev;  
  rhs->addTerm((e1 * u_prev_squared_div2 + e2 * u_prev) * v->grad());

  // ==================== SET INITIAL GUESS ==========================

  mesh->registerSolution(backgroundFlow);
  FunctionPtr zero = Function::constant(0.0);
  FunctionPtr u0 = Teuchos::rcp( new U0 );
  FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
  //  FunctionPtr parity = Teuchos::rcp(new SideParityFunction);

  FunctionPtr u0_squared_div_2 = 0.5 * u0 * u0;

  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()] = u0;
  //  functionMap[fn->ID()] = -(e1 * u0_squared_div_2 + e2 * u0) * n * parity;
  backgroundFlow->projectOntoMesh(functionMap);

  // ==================== END SET INITIAL GUESS ==========================

  ////////////////////////////////////////////////////////////////////
  // DEFINE INNER PRODUCT
  ////////////////////////////////////////////////////////////////////

  IPPtr ip = Teuchos::rcp( new IP );
  ip->addTerm( v );
  ip->addTerm(v->grad());
  //  ip->addTerm( beta * v->grad() ); // omitting term to make IP non-dependent on u

  ////////////////////////////////////////////////////////////////////
  // DEFINE DIRICHLET BC
  ////////////////////////////////////////////////////////////////////

  SpatialFilterPtr outflowBoundary = Teuchos::rcp( new TopBoundary);
  SpatialFilterPtr inflowBoundary = Teuchos::rcp( new NegatedSpatialFilter(outflowBoundary) );
  Teuchos::RCP<BCEasy> inflowBC = Teuchos::rcp( new BCEasy );
  inflowBC->addDirichlet(fn,inflowBoundary, 
                         ( e1 * u0_squared_div_2 + e2 * u0) * n );
  
  ////////////////////////////////////////////////////////////////////
  // CREATE SOLUTION OBJECT
  ////////////////////////////////////////////////////////////////////

  Teuchos::RCP<Solution> solution = Teuchos::rcp(new Solution(mesh, inflowBC, rhs, ip));
  mesh->registerSolution(solution); solution->setCubatureEnrichmentDegree(10);

  ////////////////////////////////////////////////////////////////////
  // HESSIAN BIT + CHECKS ON GRADIENT + HESSIAN
  ////////////////////////////////////////////////////////////////////

  VarFactory hessianVars = varFactory.getBubnovFactory(VarFactory::BUBNOV_TRIAL);
  VarPtr du = hessianVars.test(u->ID());
  //  BFPtr hessianBF = Teuchos::rcp( new BF(hessianVars) ); // initialize bilinear form

  FunctionPtr du_current  = Teuchos::rcp( new PreviousSolutionFunction(solution, u) );

  FunctionPtr fnhat = Teuchos::rcp(new PreviousSolutionFunction(solution,fn));
  LinearTermPtr residual = Teuchos::rcp(new LinearTerm);// residual
  residual->addTerm(fnhat*v,true);
  residual->addTerm( - (e1 * (u_prev_squared_div2) + e2 * (u_prev)) * v->grad(),true);

  LinearTermPtr Bdu = Teuchos::rcp(new LinearTerm);// residual
  Bdu->addTerm( - du_current*(beta*v->grad()));

  Teuchos::RCP<RieszRep> riesz = Teuchos::rcp(new RieszRep(mesh, ip, residual));
  Teuchos::RCP<RieszRep> duRiesz = Teuchos::rcp(new RieszRep(mesh, ip, Bdu));
  riesz->computeRieszRep();
  FunctionPtr e_v = Teuchos::rcp(new RepFunction(v,riesz));
  e_v->writeValuesToMATLABFile(mesh, "e_v.m");
  FunctionPtr posErrPart = Teuchos::rcp(new PositivePart(e_v->dx()));
  //  hessianBF->addTerm(e_v->dx()*u,du); 
  //  hessianBF->addTerm(posErrPart*u,du); 
  //  Teuchos::RCP<NullFilter> nullFilter = Teuchos::rcp(new NullFilter);
  //  Teuchos::RCP<HessianFilter> hessianFilter = Teuchos::rcp(new HessianFilter(hessianBF));

  Teuchos::RCP< LineSearchStep > LS_Step = Teuchos::rcp(new LineSearchStep(riesz));

  double NL_residual = 9e99;
  for (int i = 0;i<numSteps;i++){
    // write matrix to file and then resollve without hessian
    /*
    solution->setFilter(hessianFilter);           
    stringstream oss;
    oss << "hessianMatrix" << i << ".dat";
    solution->setWriteMatrixToFile(true,oss.str());      
    solution->solve(false);

    solution->setFilter(nullFilter);
    oss.str(""); // clear
    oss << "stiffnessMatrix" << i << ".dat";
    solution->setWriteMatrixToFile(false,oss.str());      
    */

    solution->solve(false); // do one solve to initialize things...   
    double stepLength = 1.0;
    stepLength = LS_Step->stepSize(backgroundFlow,solution, NL_residual);

    //      solution->setWriteMatrixToFile(true,"stiffness.dat");    

    backgroundFlow->addSolution(solution,stepLength);
    NL_residual = LS_Step->getNLResidual();
    if (rank==0){
      cout << "NL residual after adding = " << NL_residual << " with step size " << stepLength << endl;    
    }

    double fd_gradient;
    for (int dofIndex = 0;dofIndex<mesh->numGlobalDofs();dofIndex++){
      TestingUtilities::initializeSolnCoeffs(solnPerturbation);
      TestingUtilities::setSolnCoeffForGlobalDofIndex(solnPerturbation,1.0,dofIndex);
      fd_gradient = FiniteDifferenceUtilities::finiteDifferenceGradient(mesh, riesz, backgroundFlow, dofIndex);
      
      // CHECK GRADIENT
      LinearTermPtr b_u =  bf->testFunctional(solnPerturbation);
      map<int,FunctionPtr> NL_err_rep_map;

      NL_err_rep_map[v->ID()] = Teuchos::rcp(new RepFunction(v,riesz));
      FunctionPtr gradient = b_u->evaluate(NL_err_rep_map, TestingUtilities::isFluxOrTraceDof(mesh,dofIndex)); // use boundary part only if flux or trace
      double grad;
      if (TestingUtilities::isFluxOrTraceDof(mesh,dofIndex)){
	grad = gradient->integralOfJump(mesh,10);
      }else{
	grad = gradient->integrate(mesh,10);
      }
      double fdgrad = fd_gradient;
      double diff = grad-fdgrad;
      if (abs(diff)>1e-6 && i>0){
	cout << "Found difference of " << diff << ", " << " with fd val = " << fdgrad << " and gradient = " << grad << " in dof " << dofIndex << ", isTraceDof = " << TestingUtilities::isFluxOrTraceDof(mesh,dofIndex) << endl;
      }
    }
  }
  
  VTKExporter exporter(solution, mesh, varFactory);
  if (rank==0){
    exporter.exportSolution("qopt");
    cout << endl;
  }

  return 0;
}
bool LinearTermTests::testIntegration()
{
  // for now, we just check the consistency: for LinearTerm a = b + c, does a->integrate
  // give the same values as b->integrate + c->integrate ?
  bool success = true;

  //  VarPtr v1, v2, v3; // HGRAD members (test variables)
  //  VarPtr q1, q2, q3; // HDIV members (test variables)
  //  VarPtr u1, u2, u3; // L2 members (trial variables)
  //  VarPtr u1_hat, u2_hat; // trace variables
  //  VarPtr u3_hat_n; // flux variable
  //
  //  FunctionPtr sine_x;

  if ( ! checkLTSumConsistency(1 * v1, 1 * v2, testOrder, basisCache) )
  {
    cout << "(v1 + v2)->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(sine_x * v1, 1 * v2, testOrder, basisCache) )
  {
    cout << "(sine_x * v1 + v2)->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(1 * q1->div(), 1 * q2->x(), testOrder, basisCache) )
  {
    cout << "(q1->div() + q2->x())->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(1 * u1, 1 * u2, trialOrder, basisCache) )
  {
    cout << "(u1 + u2)->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(1 * u1, sine_x * u2, trialOrder, basisCache) )
  {
    cout << "(u1 + sine_x * u2)->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  // now, same thing, but with boundary-value-only functions in the mix:
  // this next is a fairly complex test; may want to add a more granular one above...
  IPPtr ip = Teuchos::rcp(new IP);
  RHSPtr rhs = RHS::rhs();
  BCPtr bc = BC::bc();
  SolutionPtr solution = Teuchos::rcp( new Solution(mesh,bc,rhs,ip) );
  // project some functions onto solution, so that something interesting is there:
  FunctionPtr u1_proj = sine_x;
  FunctionPtr u2_proj = cos_y;
  FunctionPtr u3_proj = u1_proj * u2_proj;
  map<int, FunctionPtr> solnToProject;
  solnToProject[u1->ID()] = u1_proj;
  solnToProject[u2->ID()] = u2_proj;
  solnToProject[u3->ID()] = u3_proj;
  solnToProject[u1_hat->ID()] = u1_proj;
  solnToProject[u2_hat->ID()] = u2_proj;
  // u3_hat_n isn't too much like a 'real' bilinear form, in that u3 itself is a scalar
  // this is just a test, so I'm not worried about it...
  solnToProject[u3_hat_n->ID()] = u3_proj;

  solution->projectOntoMesh(solnToProject);

  LinearTermPtr bfTestFunctional = bf->testFunctional(solution);

  // bf->addTerm(u1, q1->x());
  // bf->addTerm(u2, q1->y());
  // bf->addTerm(u3, v1);

//  bf->addTerm(u1_hat, q1->dot_normal());
//  bf->addTerm(u3_hat_n, v1);

  LinearTermPtr testFunctionalNoBoundaryValues = u1_proj * q1->x() + u2_proj * q1->y() + u3_proj * v1;

  FunctionPtr u1_hat_prev = Teuchos::rcp( new PreviousSolutionFunction<double>(solution, u1_hat) );
  FunctionPtr u2_hat_prev = Teuchos::rcp( new PreviousSolutionFunction<double>(solution, u2_hat) );
  FunctionPtr u3_hat_prev = Teuchos::rcp( new PreviousSolutionFunction<double>(solution, u3_hat_n) );
  LinearTermPtr testFunctionalBoundaryValues = u1_hat_prev * q1->dot_normal() + u3_hat_prev * v1;

  if ( ! checkLTSumConsistency(testFunctionalNoBoundaryValues, testFunctionalBoundaryValues,
                               testOrder, basisCache) )
  {
    cout << "bfTestFunctional->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(testFunctionalBoundaryValues, bfTestFunctional - testFunctionalBoundaryValues,
                               testOrder, basisCache) )
  {
    cout << "bfTestFunctional->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  if ( ! checkLTSumConsistency(testFunctionalNoBoundaryValues, bfTestFunctional - testFunctionalNoBoundaryValues,
                               testOrder, basisCache) )
  {
    cout << "bfTestFunctional->integrate not consistent with sum of summands integration.\n";
    success = false;
  }

  return success;
}
int main(int argc, char *argv[])
{
  // Process command line arguments
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
#else
  int rank = 0;
  int numProcs = 1;
#endif

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  VarPtr v = varFactory.testVar("v", HGRAD);

  // define trial variables
  VarPtr beta_n_u_hat = varFactory.fluxVar("\\widehat{\\beta \\cdot n }");
  VarPtr u = varFactory.fieldVar("u");

  FunctionPtr beta = Teuchos::rcp(new Beta());

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );
  // define nodes for mesh
  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) = -1.0; // x1
  meshBoundary(0,1) = -1.0; // y1
  meshBoundary(1,0) =  1.0;
  meshBoundary(1,1) = -1.0;
  meshBoundary(2,0) =  1.0;
  meshBoundary(2,1) =  1.0;
  meshBoundary(3,0) = -1.0;
  meshBoundary(3,1) =  1.0;

  int horizontalCells = 32, verticalCells = 32;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
                            confusionBF, H1Order, H1Order+pToAdd);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));

  // v terms:
  confusionBF->addTerm( beta * u, - v->grad() );
  confusionBF->addTerm( beta_n_u_hat, v);

  confusionBF->addTerm( u, invDt*v );
  rhs->addTerm( u_prev_time * invDt * v );

  ////////////////////   SPECIFY RHS   ///////////////////////
  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  // robust test norm
  IPPtr ip = confusionBF->graphNorm();
  // IPPtr ip = Teuchos::rcp(new IP);
  // ip->addTerm(v);
  // ip->addTerm(invDt*v - beta*v->grad());

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  SpatialFilterPtr inflowBoundary = Teuchos::rcp( new InflowSquareBoundary(beta) );
  FunctionPtr u0 = Teuchos::rcp( new ConstantScalarFunction(0) );
  FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );

  bc->addDirichlet(beta_n_u_hat, inflowBoundary, beta*n*u0);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow);
  mesh->registerSolution(flowResidual);

  // ==================== SET INITIAL GUESS ==========================
  FunctionPtr u_init = Teuchos::rcp(new InitialCondition());
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()]      = u_init;

  prevTimeFlow->projectOntoMesh(functionMap);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  // if (enforceLocalConservation) {
  //   // FunctionPtr parity = Teuchos::rcp<Function>( new SideParityFunction );
  //   // LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(parity, beta_n_u_minus_sigma_n) );
  //   LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(1.0, beta_n_u_minus_sigma_n) );
  //   LinearTermPtr sourcePart = Teuchos::rcp<LinearTerm>( new LinearTerm(invDt, u) );
  //   conservedQuantity->addTerm(sourcePart, true);
  //   solution->lagrangeConstraints()->addConstraint(conservedQuantity == u_prev_time * invDt);
  // }

  int timestepCount = 0;
  double time_tol = 1e-8;
  double L2_time_residual = 1e9;
  while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
  {
    solution->solve(false);
    // Subtract solutions to get residual
    flowResidual->setSolution(solution);
    flowResidual->addSolution(prevTimeFlow, -1.0);
    L2_time_residual = flowResidual->L2NormOfSolutionGlobal(u->ID());

    if (rank == 0)
    {
      cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;

      stringstream outfile;
      outfile << "rotatingCylinder_" << timestepCount;
      solution->writeToVTK(outfile.str(), 5);

      // Check local conservation
      FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
      FunctionPtr source = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
      source = invDt * source;
      Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, source, varFactory, mesh);
      cout << "Mass flux: Largest Local = " << fluxImbalances[0]
           << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
    }

    prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
    timestepCount++;
  }

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int commRank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  // Required arguments
  int numRefs = args.Input<int>("--numRefs", "number of refinement steps");
  int norm = args.Input<int>("--norm", "0 = graph\n    1 = robust\n    2 = coupled robust");

  // Optional arguments (have defaults)
  bool enforceLocalConservation = args.Input<bool>("--conserve", "enforce local conservation", false);
  double Re = args.Input("--Re", "Reynolds number", 40);
  double nu = 1./Re;
  double lambda = Re/2.-sqrt(Re*Re/4+4*pi*pi);
  int maxNewtonIterations = args.Input("--maxIterations", "maximum number of Newton iterations", 20);
  int polyOrder = args.Input("--polyOrder", "polynomial order for field variables", 2);
  int deltaP = args.Input("--deltaP", "how much to enrich test space", 2);
  // string saveFile = args.Input<string>("--meshSaveFile", "file to which to save refinement history", "");
  // string replayFile = args.Input<string>("--meshLoadFile", "file with refinement history to replay", "");
  args.Process();

  // if (commRank==0)
  // {
  //   cout << "saveFile is " << saveFile << endl;
  //   cout << "loadFile is " << replayFile << endl;
  // }

  ////////////////////   PROBLEM DEFINITIONS   ///////////////////////
  int H1Order = polyOrder+1;

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  // VarPtr tau11 = varFactory.testVar("tau11", HGRAD);
  // VarPtr tau12 = varFactory.testVar("tau12", HGRAD);
  // VarPtr tau22 = varFactory.testVar("tau22", HGRAD);
  VarPtr tau1 = varFactory.testVar("tau1", HDIV);
  VarPtr tau2 = varFactory.testVar("tau2", HDIV);
  VarPtr v1 = varFactory.testVar("v1", HGRAD);
  VarPtr v2 = varFactory.testVar("v2", HGRAD);
  VarPtr q = varFactory.testVar("q", HGRAD);

  // define trial variables
  VarPtr u1 = varFactory.fieldVar("u1");
  VarPtr u2 = varFactory.fieldVar("u2");
  // VarPtr sigma11 = varFactory.fieldVar("sigma11");
  // VarPtr sigma12 = varFactory.fieldVar("sigma12");
  // VarPtr sigma22 = varFactory.fieldVar("sigma22");
  VarPtr sigma1 = varFactory.fieldVar("sigma1", VECTOR_L2);
  VarPtr sigma2 = varFactory.fieldVar("sigma2", VECTOR_L2);
  VarPtr u1hat = varFactory.traceVar("u1hat");
  VarPtr u2hat = varFactory.traceVar("u2hat");
  VarPtr t1hat = varFactory.fluxVar("t1hat");
  VarPtr t2hat = varFactory.fluxVar("t2hat");
  VarPtr p = varFactory.fieldVar("p");

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  // define nodes for mesh
  FieldContainer<double> meshBoundary(4,2);
  double xmin = -0.5;
  double xmax =  1.0;
  double ymin = -0.5;
  double ymax =  1.5;

  meshBoundary(0,0) =  xmin; // x1
  meshBoundary(0,1) =  ymin; // y1
  meshBoundary(1,0) =  xmax;
  meshBoundary(1,1) =  ymin;
  meshBoundary(2,0) =  xmax;
  meshBoundary(2,1) =  ymax;
  meshBoundary(3,0) =  xmin;
  meshBoundary(3,1) =  ymax;

  int horizontalCells = 6, verticalCells = 8;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
                            bf, H1Order, H1Order+deltaP);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  vector<double> e1(2); // (1,0)
  e1[0] = 1;
  vector<double> e2(2); // (0,1)
  e2[1] = 1;

  FunctionPtr u1_prev = Function::solution(u1, backgroundFlow);
  FunctionPtr u2_prev = Function::solution(u2, backgroundFlow);
  FunctionPtr sigma1_prev = Function::solution(sigma1, backgroundFlow);
  FunctionPtr sigma2_prev = Function::solution(sigma2, backgroundFlow);
  FunctionPtr p_prev = Function::solution(p, backgroundFlow);
  // FunctionPtr sigma11_prev = Function::solution(sigma11, backgroundFlow);
  // FunctionPtr sigma12_prev = Function::solution(sigma12, backgroundFlow);
  // FunctionPtr sigma22_prev = Function::solution(sigma22, backgroundFlow);

  FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  FunctionPtr one = Teuchos::rcp( new ConstantScalarFunction(1.0) );
  FunctionPtr u1Exact     = Teuchos::rcp( new ExactU1(lambda) );
  FunctionPtr u2Exact     = Teuchos::rcp( new ExactU2(lambda) );
  // FunctionPtr beta = e1 * u1_prev + e2 * u2_prev;

  // ==================== SET INITIAL GUESS ==========================
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u1->ID()] = u1Exact;
  functionMap[u2->ID()] = u2Exact;
  // functionMap[sigma1->ID()] = Function::vectorize(zero,zero);
  // functionMap[sigma2->ID()] = Function::vectorize(zero,zero);
  // functionMap[p->ID()] = zero;

  backgroundFlow->projectOntoMesh(functionMap);

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////

  // // stress equation
  bf->addTerm( 1./nu*sigma1, tau1 );
  bf->addTerm( 1./nu*sigma2, tau2 );
  bf->addTerm( u1, tau1->div() );
  bf->addTerm( u2, tau2->div() );
  bf->addTerm( -u1hat, tau1->dot_normal() );
  bf->addTerm( -u2hat, tau2->dot_normal() );
  // bf->addTerm( 1./(2*nu)*sigma11, tau11 );
  // bf->addTerm( 1./(2*nu)*sigma12, tau12 );
  // bf->addTerm( 1./(2*nu)*sigma12, tau12 );
  // bf->addTerm( 1./(2*nu)*sigma22, tau22 );
  // bf->addTerm( u1, tau11->dx() );
  // bf->addTerm( u1, tau12->dy() );
  // bf->addTerm( u2, tau12->dx() );
  // bf->addTerm( u2, tau22->dy() );
  // bf->addTerm( -u1hat, tau11->times_normal_x() );
  // bf->addTerm( -u1hat, tau12->times_normal_y() );
  // bf->addTerm( -u2hat, tau12->times_normal_x() );
  // bf->addTerm( -u2hat, tau22->times_normal_y() );

  // momentum equation
  bf->addTerm( -2.*u1_prev*u1, v1->dx() );
  bf->addTerm( -u2_prev*u1, v1->dy() );
  bf->addTerm( -u1_prev*u2, v1->dy() );
  bf->addTerm( -u2_prev*u1, v2->dx() );
  bf->addTerm( -u1_prev*u2, v1->dy() );
  bf->addTerm( -2.*u2_prev*u2, v2->dy() );
  bf->addTerm( -p, v1->dx() );
  bf->addTerm( -p, v2->dy() );
  // bf->addTerm( sigma11, v1->dx() );
  // bf->addTerm( sigma12, v1->dy() );
  // bf->addTerm( sigma12, v2->dx() );
  // bf->addTerm( sigma22, v2->dy() );
  bf->addTerm( sigma1, v1->grad() );
  bf->addTerm( sigma2, v2->grad() );
  bf->addTerm( t1hat, v1);
  bf->addTerm( t2hat, v2);

  // continuity equation
  bf->addTerm( -u1, q->dx() );
  bf->addTerm( -u2, q->dy() );
  bf->addTerm( u1hat, q->times_normal_x() );
  bf->addTerm( u2hat, q->times_normal_y() );

  ////////////////////   SPECIFY RHS   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );

  // stress equation
  rhs->addTerm( -u1_prev * tau1->div() );
  rhs->addTerm( -u2_prev * tau2->div() );

  // momentum equation
  rhs->addTerm( 2.*u1_prev*u1_prev * v1->dx() );
  rhs->addTerm( u2_prev*u1_prev    * v1->dy() );
  rhs->addTerm( u1_prev*u2_prev    * v1->dy() );
  rhs->addTerm( u2_prev*u1_prev    * v2->dx() );
  rhs->addTerm( u1_prev*u2_prev    * v1->dy() );
  rhs->addTerm( 2.*u2_prev*u2_prev * v2->dy() );
  // rhs->addTerm( p_prev             * v1->dx() );
  // rhs->addTerm( p_prev             * v2->dy() );
  // rhs->addTerm( -sigma1_prev       * v1->grad() );
  // rhs->addTerm( -sigma2_prev       * v2->grad() );

  // rhs->addTerm( -sigma11_prev * v1->dx() );
  // rhs->addTerm( -sigma12_prev * v1->dy() );
  // rhs->addTerm( -sigma12_prev * v2->dx() );
  // rhs->addTerm( -sigma22_prev * v2->dy() );

  // continuity equation
  rhs->addTerm( u1_prev * q->dx() );
  rhs->addTerm( u2_prev * q->dy() );

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  IPPtr ip = Teuchos::rcp(new IP);
  if (norm == 0)
  {
    ip = bf->graphNorm();
  }
  else if (norm == 1)
  {
    // ip = bf->l2Norm();
  }

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  // Teuchos::RCP<PenaltyConstraints> pc = Teuchos::rcp( new PenaltyConstraints );
  SpatialFilterPtr left = Teuchos::rcp( new ConstantXBoundary(-0.5) );
  SpatialFilterPtr right = Teuchos::rcp( new ConstantXBoundary(1) );
  SpatialFilterPtr top = Teuchos::rcp( new ConstantYBoundary(-0.5) );
  SpatialFilterPtr bottom = Teuchos::rcp( new ConstantYBoundary(1.5) );
  bc->addDirichlet(u1hat, left, u1Exact);
  bc->addDirichlet(u2hat, left, u2Exact);
  bc->addDirichlet(u1hat, right, u1Exact);
  bc->addDirichlet(u2hat, right, u2Exact);
  bc->addDirichlet(u1hat, top, u1Exact);
  bc->addDirichlet(u2hat, top, u2Exact);
  bc->addDirichlet(u1hat, bottom, u1Exact);
  bc->addDirichlet(u2hat, bottom, u2Exact);

  // zero mean constraint on pressure
  bc->addZeroMeanConstraint(p);

  // pc->addConstraint(u1hat*u2hat-t1hat == zero, top);
  // pc->addConstraint(u2hat*u2hat-t2hat == zero, top);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );
  // solution->setFilter(pc);

  // if (enforceLocalConservation) {
  //   solution->lagrangeConstraints()->addConstraint(u1hat->times_normal_x() + u2hat->times_normal_y() == zero);
  // }

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(backgroundFlow);

  // Teuchos::RCP< RefinementHistory > refHistory = Teuchos::rcp( new RefinementHistory );
  // mesh->registerObserver(refHistory);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  double energyThreshold = 0.2; // for mesh refinements
  RefinementStrategy refinementStrategy( solution, energyThreshold );
  VTKExporter exporter(backgroundFlow, mesh, varFactory);
  stringstream outfile;
  outfile << "kovasznay" << "_" << 0;
  exporter.exportSolution(outfile.str());

  double nonlinearRelativeEnergyTolerance = 1e-5; // used to determine convergence of the nonlinear solution
  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    double L2Update = 1e10;
    int iterCount = 0;
    while (L2Update > nonlinearRelativeEnergyTolerance && iterCount < maxNewtonIterations)
    {
      solution->solve(false);
      double u1L2Update = solution->L2NormOfSolutionGlobal(u1->ID());
      double u2L2Update = solution->L2NormOfSolutionGlobal(u2->ID());
      L2Update = sqrt(u1L2Update*u1L2Update + u2L2Update*u2L2Update);

      // Check local conservation
      if (commRank == 0)
      {
        cout << "L2 Norm of Update = " << L2Update << endl;

        // if (saveFile.length() > 0) {
        //   std::ostringstream oss;
        //   oss << string(saveFile) << refIndex ;
        //   cout << "on refinement " << refIndex << " saving mesh file to " << oss.str() << endl;
        //   refHistory->saveToFile(oss.str());
        // }
      }

      // line search algorithm
      double alpha = 1.0;
      backgroundFlow->addSolution(solution, alpha);
      iterCount++;
    }

    if (commRank == 0)
    {
      stringstream outfile;
      outfile << "kovasznay" << "_" << refIndex+1;
      exporter.exportSolution(outfile.str());
    }

    if (refIndex < numRefs)
      refinementStrategy.refine(commRank==0); // print to console on commRank 0
  }

  return 0;
}
Exemple #5
0
int main(int argc, char *argv[]) {
 
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int rank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();
  
  int nCells = args.Input<int>("--nCells", "num cells",2);  
  int numRefs = args.Input<int>("--numRefs","num adaptive refinements",0);
  int numPreRefs = args.Input<int>("--numPreRefs","num preemptive adaptive refinements",0);
  int order = args.Input<int>("--order","order of approximation",2);
  double eps = args.Input<double>("--epsilon","diffusion parameter",1e-2);
  double energyThreshold = args.Input<double>("-energyThreshold","energy thresh for adaptivity", .5);
  double rampHeight = args.Input<double>("--rampHeight","ramp height at x = 2", 0.0);
  double ipSwitch = args.Input<double>("--ipSwitch","point at which to switch to graph norm", 0.0); // default to 0 to remain on robust norm
  bool useAnisotropy = args.Input<bool>("--useAnisotropy","aniso flag ", false);

  int H1Order = order+1; 
  int pToAdd = args.Input<int>("--pToAdd","test space enrichment", 2);

  FunctionPtr zero = Function::constant(0.0);
  FunctionPtr one = Function::constant(1.0);
  FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
  vector<double> e1,e2;
  e1.push_back(1.0);e1.push_back(0.0);
  e2.push_back(0.0);e2.push_back(1.0);

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory; 
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);
  
  // define trial variables
  VarPtr uhat = varFactory.traceVar("\\widehat{u}");
  VarPtr beta_n_u_minus_sigma_n = varFactory.fluxVar("\\widehat{\\beta \\cdot n u - \\sigma_{n}}");
  VarPtr u = varFactory.fieldVar("u");
  VarPtr sigma1 = varFactory.fieldVar("\\sigma_1");
  VarPtr sigma2 = varFactory.fieldVar("\\sigma_2");

  vector<double> beta;
  beta.push_back(1.0);
  beta.push_back(0.0);
  
  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////

  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );
  // tau terms:
  confusionBF->addTerm(sigma1 / eps, tau->x());
  confusionBF->addTerm(sigma2 / eps, tau->y());
  confusionBF->addTerm(u, tau->div());
  confusionBF->addTerm(uhat, -tau->dot_normal());
  
  // v terms:
  confusionBF->addTerm( sigma1, v->dx() );
  confusionBF->addTerm( sigma2, v->dy() );
  confusionBF->addTerm( -u, beta * v->grad() );
  confusionBF->addTerm( beta_n_u_minus_sigma_n, v);

  // first order term with magnitude alpha
  double alpha = 0.0;
  //  confusionBF->addTerm(alpha * u, v);

  ////////////////////   BUILD MESH   ///////////////////////


  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = MeshUtilities::buildUnitQuadMesh(nCells,confusionBF, H1Order, H1Order+pToAdd);
  mesh->setPartitionPolicy(Teuchos::rcp(new ZoltanMeshPartitionPolicy("HSFC")));  
  MeshInfo meshInfo(mesh); // gets info like cell measure, etc

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  IPPtr ip = Teuchos::rcp(new IP);

  /*
   // robust test norm
  FunctionPtr C_h = Teuchos::rcp( new EpsilonScaling(eps) );  
  FunctionPtr invH = Teuchos::rcp(new InvHScaling);
  FunctionPtr invSqrtH = Teuchos::rcp(new InvSqrtHScaling);
  FunctionPtr sqrtH = Teuchos::rcp(new SqrtHScaling);
  FunctionPtr hSwitch = Teuchos::rcp(new HSwitch(ipSwitch,mesh));
  ip->addTerm(hSwitch*sqrt(eps) * v->grad() );
  ip->addTerm(hSwitch*beta * v->grad() );
  ip->addTerm(hSwitch*tau->div() );
  
  // graph norm
  ip->addTerm( (one-hSwitch)*((1.0/eps) * tau + v->grad()));
  ip->addTerm( (one-hSwitch)*(beta * v->grad() - tau->div()));

  // regularizing terms
  ip->addTerm(C_h/sqrt(eps) * tau );    
  ip->addTerm(invSqrtH*v);
  */

   // robust test norm
  IPPtr robIP = Teuchos::rcp(new IP);
  FunctionPtr C_h = Teuchos::rcp( new EpsilonScaling(eps) );  
  FunctionPtr invH = Teuchos::rcp(new InvHScaling);
  FunctionPtr invSqrtH = Teuchos::rcp(new InvSqrtHScaling);
  FunctionPtr sqrtH = Teuchos::rcp(new SqrtHScaling);
  FunctionPtr hSwitch = Teuchos::rcp(new HSwitch(ipSwitch,mesh));
  robIP->addTerm(sqrt(eps) * v->grad() );
  robIP->addTerm(beta * v->grad() );
  robIP->addTerm(tau->div() );
  // regularizing terms
  robIP->addTerm(C_h/sqrt(eps) * tau );    
  robIP->addTerm(invSqrtH*v);

  IPPtr graphIP = confusionBF->graphNorm();
  graphIP->addTerm(invSqrtH*v);
  //  graphIP->addTerm(C_h/sqrt(eps) * tau );    
  IPPtr switchIP = Teuchos::rcp(new IPSwitcher(robIP,graphIP,ipSwitch)); // rob IP for h>ipSwitch mesh size, graph norm o/w
  ip = switchIP;
    
  LinearTermPtr vVecLT = Teuchos::rcp(new LinearTerm);
  LinearTermPtr tauVecLT = Teuchos::rcp(new LinearTerm);
  vVecLT->addTerm(sqrt(eps)*v->grad());
  tauVecLT->addTerm(C_h/sqrt(eps)*tau);

  LinearTermPtr restLT = Teuchos::rcp(new LinearTerm);
  restLT->addTerm(alpha*v);
  restLT->addTerm(invSqrtH*v);
  restLT = restLT + beta * v->grad();
  restLT = restLT + tau->div();

  ////////////////////   SPECIFY RHS   ///////////////////////

  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr f = zero;
  //  f = one;
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );

  SpatialFilterPtr Inflow = Teuchos::rcp(new LeftInflow);
  SpatialFilterPtr wallBoundary = Teuchos::rcp(new WallBoundary);//MeshUtilities::rampBoundary(rampHeight);
  SpatialFilterPtr freeStream = Teuchos::rcp(new FreeStreamBoundary);

  bc->addDirichlet(uhat, wallBoundary, one);
  //  bc->addDirichlet(uhat, wallBoundary, Teuchos::rcp(new WallSmoothBC(eps)));
  bc->addDirichlet(beta_n_u_minus_sigma_n, Inflow, zero);
  bc->addDirichlet(beta_n_u_minus_sigma_n, freeStream, zero);

  ////////////////////   SOLVE & REFINE   ///////////////////////

  Teuchos::RCP<Solution> solution;
  solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );
  BCPtr nullBC = Teuchos::rcp((BC*)NULL); RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL); IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );  
  mesh->registerSolution(backgroundFlow); // to trigger issue with p-refinements
  map<int, Teuchos::RCP<Function> > functionMap; functionMap[u->ID()] = Function::constant(3.14);
  backgroundFlow->projectOntoMesh(functionMap);

  // lower p to p = 1 at SINGULARITY only
  vector<int> ids;
  /*
  for (int i = 0;i<mesh->numActiveElements();i++){
    bool cellIDset = false;
    int cellID = mesh->activeElements()[i]->cellID();
    int elemOrder = mesh->cellPolyOrder(cellID)-1;
    FieldContainer<double> vv(4,2); mesh->verticesForCell(vv, cellID);
    bool vertexOnWall = false; bool vertexAtSingularity = false;
    for (int j = 0;j<4;j++){
      if ((abs(vv(j,0)-.5) + abs(vv(j,1)))<1e-10){
	vertexAtSingularity = true;     
	cellIDset = true;
      }
    }	
    if (!vertexAtSingularity && elemOrder<2 && !cellIDset ){
      ids.push_back(cellID);
      cout << "celliD = " << cellID << endl;
    }
  }
  */
  ids.push_back(1);
  ids.push_back(3);
  mesh->pRefine(ids); // to put order = 1

  return 0;
  
  LinearTermPtr residual = rhs->linearTermCopy();
  residual->addTerm(-confusionBF->testFunctional(solution));  
  RieszRepPtr rieszResidual = Teuchos::rcp(new RieszRep(mesh, ip, residual));
  rieszResidual->computeRieszRep();
  FunctionPtr e_v = Teuchos::rcp(new RepFunction(v,rieszResidual));
  FunctionPtr e_tau = Teuchos::rcp(new RepFunction(tau,rieszResidual));
  map<int,FunctionPtr> errRepMap;
  errRepMap[v->ID()] = e_v;
  errRepMap[tau->ID()] = e_tau;
  FunctionPtr errTau = tauVecLT->evaluate(errRepMap,false);
  FunctionPtr errV = vVecLT->evaluate(errRepMap,false);
  FunctionPtr errRest = restLT->evaluate(errRepMap,false);
  FunctionPtr xErr = (errTau->x())*(errTau->x()) + (errV->dx())*(errV->dx());
  FunctionPtr yErr = (errTau->y())*(errTau->y()) + (errV->dy())*(errV->dy());
  FunctionPtr restErr = errRest*errRest;

  RefinementStrategy refinementStrategy( solution, energyThreshold );    

  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  //                     PRE REFINEMENTS 
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////  

  if (rank==0){
    cout << "Number of pre-refinements = " << numPreRefs << endl;
  }
  for (int i =0;i<=numPreRefs;i++){   
    vector<ElementPtr> elems = mesh->activeElements();
    vector<ElementPtr>::iterator elemIt;
    vector<int> wallCells;    
    for (elemIt=elems.begin();elemIt != elems.end();elemIt++){
      int cellID = (*elemIt)->cellID();
      int numSides = mesh->getElement(cellID)->numSides();
      FieldContainer<double> vertices(numSides,2); //for quads

      mesh->verticesForCell(vertices, cellID);
      bool cellIDset = false;	
      for (int j = 0;j<numSides;j++){ 	
	if ((abs(vertices(j,0)-.5)<1e-7) && (abs(vertices(j,1))<1e-7) && !cellIDset){ // if at singularity, i.e. if a vertex is (1,0)
	  wallCells.push_back(cellID);
	  cellIDset = true;
	}
      }
    }
    if (i<numPreRefs){
      refinementStrategy.refineCells(wallCells);
    }
  }

  double minSideLength = meshInfo.getMinCellSideLength() ;
  double minCellMeasure = meshInfo.getMinCellMeasure() ;
  if (rank==0){
    cout << "after prerefs, sqrt min cell measure = " << sqrt(minCellMeasure) << ", min side length = " << minSideLength << endl;
  }

  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

  VTKExporter exporter(solution, mesh, varFactory);

  for (int refIndex=0;refIndex<numRefs;refIndex++){
    if (rank==0){
      cout << "on ref index " << refIndex << endl;
    }    
    rieszResidual->computeRieszRep(); // in preparation to get anisotropy    

    vector<int> cellIDs;
    refinementStrategy.getCellsAboveErrorThreshhold(cellIDs);

    map<int,double> energyError = solution->energyError();  

    map<int,double> xErrMap = xErr->cellIntegrals(cellIDs,mesh,5,true);
    map<int,double> yErrMap = yErr->cellIntegrals(cellIDs,mesh,5,true);
    map<int,double> restErrMap = restErr->cellIntegrals(cellIDs,mesh,5,true);    
    for (vector<ElementPtr>::iterator elemIt = mesh->activeElements().begin();elemIt!=mesh->activeElements().end();elemIt++){
      int cellID = (*elemIt)->cellID();
      double err = xErrMap[cellID]+ yErrMap[cellID] + restErrMap[cellID];
      //      if (rank==0)
	//      cout << "err thru LT = " << sqrt(err) << ", while energy err = " << energyError[cellID] << endl;
    }

    /*
    map<int,double> ratio,xErr,yErr;
    vector<ElementPtr> elems = mesh->activeElements();
    for (vector<ElementPtr>::iterator elemIt = elems.begin();elemIt!=elems.end();elemIt++){
      int cellID = (*elemIt)->cellID();
      ratio[cellID] = 0.0;
      xErr[cellID] = 0.0;
      yErr[cellID] = 0.0;
      if (std::find(cellIDs.begin(),cellIDs.end(),cellID)!=cellIDs.end()){ // if this cell is above energy thresh
	ratio[cellID] = yErrMap[cellID]/xErrMap[cellID];
	xErr[cellID] = xErrMap[cellID];
	yErr[cellID] = yErrMap[cellID];
      }
    }   
    FunctionPtr ratioFxn = Teuchos::rcp(new EnergyErrorFunction(ratio));
    FunctionPtr xErrFxn = Teuchos::rcp(new EnergyErrorFunction(xErr));
    FunctionPtr yErrFxn = Teuchos::rcp(new EnergyErrorFunction(yErr));
    exporter.exportFunction(ratioFxn, string("ratio")+oss.str());
    exporter.exportFunction(xErrFxn, string("xErr")+oss.str());
    exporter.exportFunction(yErrFxn, string("yErr")+oss.str());
    */
    if (useAnisotropy){
      refinementStrategy.refine(rank==0,xErrMap,yErrMap); //anisotropic refinements
    }else{
      refinementStrategy.refine(rank==0); // no anisotropy
    }

    // lower p to p = 1 at SINGULARITY only
    vector<int> ids;
    for (int i = 0;i<mesh->numActiveElements();i++){
      int cellID = mesh->activeElements()[i]->cellID();
      int elemOrder = mesh->cellPolyOrder(cellID)-1;
      FieldContainer<double> vv(4,2); mesh->verticesForCell(vv, cellID);
      bool vertexOnWall = false; bool vertexAtSingularity = false;
      for (int j = 0;j<4;j++){
	if ((abs(vv(j,0)-.5) + abs(vv(j,1)))<1e-10)
	  vertexAtSingularity = true;
      }	
      if (!vertexAtSingularity && elemOrder<2){
	ids.push_back(cellID);
      }
    }
    mesh->pRefine(ids); // to put order = 1
    /*
      if (elemOrder>1){
	if (vertexAtSingularity){
	  vector<int> ids;
	  ids.push_back(cellID);
	  mesh->pRefine(ids,1-(elemOrder-1)); // to put order = 1
	  //	  mesh->pRefine(ids); // to put order = 1
	  if (rank==0)
	    cout << "p unrefining elem with elemOrder = " << elemOrder << endl;
	}
      }else{
	if (!vertexAtSingularity){
	  vector<int> ids;
	  ids.push_back(cellID);	    
	  mesh->pRefine(ids,2-elemOrder);
	}	  
      }
      */



    double minSideLength = meshInfo.getMinCellSideLength() ;
    if (rank==0)
      cout << "minSideLength is " << minSideLength << endl;

    solution->condensedSolve();
    std::ostringstream oss;
    oss << refIndex;
    
  }

  // final solve on final mesh
  solution->setWriteMatrixToFile(true,"K.mat");
  solution->condensedSolve();

  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
  //                                          CHECK CONDITIONING 
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////

  bool checkConditioning = true;
  if (checkConditioning){
    double minSideLength = meshInfo.getMinCellSideLength() ;
    StandardAssembler assembler(solution);
    double maxCond = 0.0;
    int maxCellID = 0;
    for (int i = 0;i<mesh->numActiveElements();i++){
      int cellID = mesh->getActiveElement(i)->cellID();
      FieldContainer<double> ipMat = assembler.getIPMatrix(mesh->getElement(cellID));
      double cond = SerialDenseWrapper::getMatrixConditionNumber(ipMat);
      if (cond>maxCond){
	maxCond = cond;
	maxCellID = cellID;
      }
    }
    if (rank==0){
      cout << "cell ID  " << maxCellID << " has minCellLength " << minSideLength << " and condition estimate " << maxCond << endl;
    }
    string ipMatName = string("ipMat.mat");
    ElementPtr maxCondElem = mesh->getElement(maxCellID);
    FieldContainer<double> ipMat = assembler.getIPMatrix(maxCondElem);
    SerialDenseWrapper::writeMatrixToMatlabFile(ipMatName,ipMat);   
  }
  ////////////////////   print to file   ///////////////////////
  
  if (rank==0){
    exporter.exportSolution(string("robustIP"));
    cout << endl;
  }
 
  return 0;
} 
int main(int argc, char *argv[]) {
  // TODO: figure out the right thing to do here...
  // may want to modify argc and argv before we make the following call:
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
  
#ifdef HAVE_MPI
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args(argc, argv );
#endif
  
  int polyOrder, pToAdd;
  try {
    // read args:
    polyOrder = args.Input<int>("--polyOrder", "L^2 (field) polynomial order");
    pToAdd = args.Input<int>("--delta_p", "delta p for test enrichment", 2);
    args.Process();
  } catch ( choice::ArgException& e )
  {
    exit(0);
  }
  
  int H1Order = polyOrder + 1;
  
  bool useCompliantGraphNorm = false;   // weights to improve conditioning of the local problems
  bool useExtendedPrecisionForOptimalTestInversion = false;

  /////////////////////////// "VGP_CONFORMING" VERSION ///////////////////////

  // fluxes and traces:
  VarPtr u1hat, u2hat, t1n, t2n;
  // fields for SGP:
  VarPtr phi, p, sigma11, sigma12, sigma21, sigma22;
  // fields specific to VGP:
  VarPtr u1, u2;
  
  BFPtr stokesBF;
  IPPtr qoptIP;
  
  double mu = 1;
  
  FunctionPtr h = Teuchos::rcp( new hFunction() );
  
  VarPtr tau1,tau2,v1,v2,q;
  VarFactory varFactory;
  tau1 = varFactory.testVar("\\tau_1", HDIV);
  tau2 = varFactory.testVar("\\tau_2", HDIV);
  v1 = varFactory.testVar("v_1", HGRAD);
  v2 = varFactory.testVar("v_2", HGRAD);
  q = varFactory.testVar("q", HGRAD);
  
  u1hat = varFactory.traceVar("\\widehat{u}_1");
  u2hat = varFactory.traceVar("\\widehat{u}_2");
  
  t1n = varFactory.fluxVar("\\widehat{t_{1n}}");
  t2n = varFactory.fluxVar("\\widehat{t_{2n}}");
  if (!useCompliantGraphNorm) {
    u1 = varFactory.fieldVar("u_1");
    u2 = varFactory.fieldVar("u_2");
  } else {
    u1 = varFactory.fieldVar("u_1", HGRAD);
    u2 = varFactory.fieldVar("u_2", HGRAD);
  }
  sigma11 = varFactory.fieldVar("\\sigma_11");
  sigma12 = varFactory.fieldVar("\\sigma_12");
  sigma21 = varFactory.fieldVar("\\sigma_21");
  sigma22 = varFactory.fieldVar("\\sigma_22");
  p = varFactory.fieldVar("p");
  
  stokesBF = Teuchos::rcp( new BF(varFactory) );  
  // tau1 terms:
  stokesBF->addTerm(u1,tau1->div());
  stokesBF->addTerm(sigma11,tau1->x()); // (sigma1, tau1)
  stokesBF->addTerm(sigma12,tau1->y());
  stokesBF->addTerm(-u1hat, tau1->dot_normal());
  
  // tau2 terms:
  stokesBF->addTerm(u2, tau2->div());
  stokesBF->addTerm(sigma21,tau2->x()); // (sigma2, tau2)
  stokesBF->addTerm(sigma22,tau2->y());
  stokesBF->addTerm(-u2hat, tau2->dot_normal());
  
  // v1:
  stokesBF->addTerm(mu * sigma11,v1->dx()); // (mu sigma1, grad v1) 
  stokesBF->addTerm(mu * sigma12,v1->dy());
  stokesBF->addTerm( - p, v1->dx() );
  stokesBF->addTerm( -t1n, v1);
  
  // v2:
  stokesBF->addTerm(mu * sigma21,v2->dx()); // (mu sigma2, grad v2)
  stokesBF->addTerm(mu * sigma22,v2->dy());
  stokesBF->addTerm( -p, v2->dy());
  stokesBF->addTerm( -t2n, v2);
  
  // q:
  stokesBF->addTerm(-u1,q->dx()); // (-u, grad q)
  stokesBF->addTerm(-u2,q->dy());
  stokesBF->addTerm(u1hat->times_normal_x() + u2hat->times_normal_y(), q);
  
  if (rank==0)
    stokesBF->printTrialTestInteractions();
  
  stokesBF->setUseExtendedPrecisionSolveForOptimalTestFunctions(useExtendedPrecisionForOptimalTestInversion);

  mesh = MeshFactory::quadMesh(stokesBF, H1Order, pToAdd);
  
  ////////////////////   CREATE BCs   ///////////////////////
  BCPtr bc = BC::bc();
  
  ////////////////////   CREATE RHS   ///////////////////////
  RHSPtr rhs = RHS::rhs(); // zero for now...
  
  IPPtr ip;
  
  qoptIP = Teuchos::rcp(new IP());
      
  if (useCompliantGraphNorm) {
    qoptIP->addTerm( mu * v1->dx() + tau1->x() ); // sigma11
    qoptIP->addTerm( mu * v1->dy() + tau1->y() ); // sigma12
    qoptIP->addTerm( mu * v2->dx() + tau2->x() ); // sigma21
    qoptIP->addTerm( mu * v2->dy() + tau2->y() ); // sigma22
    qoptIP->addTerm( mu * v1->dx() + mu * v2->dy() );   // pressure
    qoptIP->addTerm( h * tau1->div() - h * q->dx() );   // u1
    qoptIP->addTerm( h * tau2->div() - h * q->dy());    // u2
    
    qoptIP->addTerm( (mu / h) * v1 );
    qoptIP->addTerm( (mu / h) * v2 );
    qoptIP->addTerm( q );
    qoptIP->addTerm( tau1 );
    qoptIP->addTerm( tau2 );
  } else { // standard graph norm, then
    qoptIP = stokesBF->graphNorm();
  }

  ip = qoptIP;
  
  if (rank==0) 
    ip->printInteractions();
  
  // aim is just to answer one simple question:
  // have I figured out a trial-space preimage for optimal test function (q=1, tau=0, v=0)?
  
  SolutionPtr soln = Teuchos::rcp(new Solution(mesh));
  
  FunctionPtr x = Function::xn();
  FunctionPtr y = Function::yn();
  
  // u1 = u1_hat = x / 2
  FunctionPtr u1_exact = x / 2;
  
  // u2 = u2_hat = y / 2
  FunctionPtr u2_exact = y / 2;
  
  // sigma = 0.5 * I
  FunctionPtr sigma11_exact = Function::constant(0.5);
  FunctionPtr sigma22_exact = Function::constant(0.5);
  
  // tn_hat = 0.5 * n
  FunctionPtr n = Function::normal();
  FunctionPtr t1n_exact = n->x() / 2;
  FunctionPtr t2n_exact = n->y() / 2;
  
  map<int, FunctionPtr > exact_soln;
  exact_soln[u1->ID()] = u1_exact;
  exact_soln[u1hat->ID()] = u1_exact;
  exact_soln[u2->ID()] = u2_exact;
  exact_soln[u2hat->ID()] = u2_exact;
  exact_soln[sigma11->ID()] = sigma11_exact;
  exact_soln[sigma22->ID()] = sigma22_exact;
  exact_soln[t1n->ID()] = t1n_exact;
  exact_soln[t2n->ID()] = t2n_exact;
  
  exact_soln[p->ID()] = Function::zero();
  exact_soln[sigma12->ID()] = Function::zero();
  exact_soln[sigma21->ID()] = Function::zero();
  
  soln->projectOntoMesh(exact_soln);
  
  LinearTermPtr soln_functional = stokesBF->testFunctional(soln);
  
  RieszRepPtr rieszRep = Teuchos::rcp( new RieszRep(mesh, ip, soln_functional) );
  
  rieszRep->computeRieszRep();
  
  // get test functions:
  FunctionPtr q_fxn = Teuchos::rcp( new RepFunction(q, rieszRep) );
  FunctionPtr v1_fxn = Teuchos::rcp( new RepFunction(v1, rieszRep) );
  FunctionPtr v2_fxn = Teuchos::rcp( new RepFunction(v2, rieszRep) );
  FunctionPtr tau1_fxn = Teuchos::rcp( new RepFunction(tau1, rieszRep) );
  FunctionPtr tau2_fxn = Teuchos::rcp( new RepFunction(tau2, rieszRep) );
  
  cout << "L2 norm of (q-1) : " << (q_fxn - 1)->l2norm(mesh) << endl;
  cout << "L2 norm of (v1) : " << (v1_fxn)->l2norm(mesh) << endl;
  cout << "L2 norm of (v2) : " << (v2_fxn)->l2norm(mesh) << endl;
  cout << "L2 norm of (tau1) : " << (tau1_fxn)->l2norm(mesh) << endl;
  cout << "L2 norm of (tau2) : " << (tau2_fxn)->l2norm(mesh) << endl;
  
  VTKExporter exporter(soln, mesh, varFactory);
  exporter.exportSolution("conservationPreimage", H1Order*2);

  cout << "Checking that the soln_functional is what I expect:\n";
  
  FunctionPtr xyVector = Function::vectorize(x, y);
  
  cout << "With v1 = x, integral: " << integralOverMesh(soln_functional, v1, x) << endl;
  cout << "With v2 = y, integral: " << integralOverMesh(soln_functional, v2, y) << endl;
  cout << "With tau1=(x,y), integral: " << integralOverMesh(soln_functional, tau1, xyVector) << endl;
  cout << "With tau2=(x,y), integral: " << integralOverMesh(soln_functional, tau2, xyVector) << endl;
  cout << "With q   =x, integral: " << integralOverMesh(soln_functional, q, x) << endl;
  
  cout << "(Expect 0s all around, except for q, where we expect (1,x) == 0.5.)\n";
  return 0;
}
int main(int argc, char *argv[]) {
  // Process command line arguments
  if (argc > 1)
    numRefs = atof(argv[1]);
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
#else
  int rank = 0;
  int numProcs = 1;
#endif

  FunctionPtr beta = Teuchos::rcp(new Beta());

  ////////////////////////////////////////////////////////////////////
  // DEFINE VARIABLES 
  ////////////////////////////////////////////////////////////////////
  // test variables
  VarFactory varFactory; 
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);

  // trial variables
  VarPtr uhat = varFactory.traceVar("\\widehat{u}");
  VarPtr beta_n_u_minus_sigma_n = varFactory.fluxVar("\\widehat{\\beta \\cdot n u - \\sigma_{n}}");
  VarPtr u = varFactory.fieldVar("u");
  VarPtr sigma1 = varFactory.fieldVar("\\sigma_1");
  VarPtr sigma2 = varFactory.fieldVar("\\sigma_2");

  ////////////////////////////////////////////////////////////////////
  // CREATE MESH 
  ////////////////////////////////////////////////////////////////////

  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );

  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) =  0.0; // x1
  meshBoundary(0,1) = -2.0; // y1
  meshBoundary(1,0) =  4.0;
  meshBoundary(1,1) = -2.0;
  meshBoundary(2,0) =  4.0;
  meshBoundary(2,1) =  2.0;
  meshBoundary(3,0) =  0.0;
  meshBoundary(3,1) =  2.0;

  int horizontalCells = 4, verticalCells = 4;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
      confusionBF, H1Order, H1Order+pToAdd, false);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );  
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );  

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  // ==================== SET INITIAL GUESS ==========================
  double u_free = 0.0;
  double sigma1_free = 0.0;
  double sigma2_free = 0.0;
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()] = Teuchos::rcp( new ConstantScalarFunction(u_free) );
  functionMap[sigma1->ID()] = Teuchos::rcp( new ConstantScalarFunction(sigma1_free) );
  functionMap[sigma2->ID()] = Teuchos::rcp( new ConstantScalarFunction(sigma2_free) );

  prevTimeFlow->projectOntoMesh(functionMap);
  // ==================== END SET INITIAL GUESS ==========================

  ////////////////////////////////////////////////////////////////////
  // DEFINE BILINEAR FORM
  ////////////////////////////////////////////////////////////////////

  // tau terms:
  confusionBF->addTerm(sigma1 / epsilon, tau->x());
  confusionBF->addTerm(sigma2 / epsilon, tau->y());
  confusionBF->addTerm(u, tau->div());
  confusionBF->addTerm(-uhat, tau->dot_normal());

  // v terms:
  confusionBF->addTerm( sigma1, v->dx() );
  confusionBF->addTerm( sigma2, v->dy() );
  confusionBF->addTerm( beta * u, - v->grad() );
  confusionBF->addTerm( beta_n_u_minus_sigma_n, v);

  ////////////////////////////////////////////////////////////////////
  // TIMESTEPPING TERMS
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );

  double dt = 0.25;
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));    
  if (rank==0){
    cout << "Timestep dt = " << dt << endl;
  }
  if (transient)
  {
    confusionBF->addTerm( u, invDt*v );
    rhs->addTerm( u_prev_time * invDt * v );
  }

  ////////////////////////////////////////////////////////////////////
  // DEFINE INNER PRODUCT
  ////////////////////////////////////////////////////////////////////

  // mathematician's norm
  IPPtr mathIP = Teuchos::rcp(new IP());
  mathIP->addTerm(tau);
  mathIP->addTerm(tau->div());

  mathIP->addTerm(v);
  mathIP->addTerm(v->grad());

  // quasi-optimal norm
  IPPtr qoptIP = Teuchos::rcp(new IP);
  qoptIP->addTerm( v );
  qoptIP->addTerm( tau / epsilon + v->grad() );
  qoptIP->addTerm( beta * v->grad() - tau->div() );

  // robust test norm
  IPPtr robIP = Teuchos::rcp(new IP);
  FunctionPtr ip_scaling = Teuchos::rcp( new EpsilonScaling(epsilon) ); 
  if (!enforceLocalConservation)
  {
    robIP->addTerm( ip_scaling * v );
    if (transient)
      robIP->addTerm( invDt * v );
  }
  robIP->addTerm( sqrt(epsilon) * v->grad() );
  // Weight these two terms for inflow
  FunctionPtr ip_weight = Teuchos::rcp( new IPWeight() );
  robIP->addTerm( ip_weight * beta * v->grad() );
  robIP->addTerm( ip_weight * tau->div() );
  robIP->addTerm( ip_scaling/sqrt(epsilon) * tau );
  if (enforceLocalConservation)
    robIP->addZeroMeanTerm( v );

  ////////////////////////////////////////////////////////////////////
  // DEFINE RHS
  ////////////////////////////////////////////////////////////////////

  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////////////////////////////////////////////////////
  // DEFINE BC
  ////////////////////////////////////////////////////////////////////

  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  // Teuchos::RCP<PenaltyConstraints> pc = Teuchos::rcp( new PenaltyConstraints );
  SpatialFilterPtr lBoundary = Teuchos::rcp( new LeftBoundary );
  SpatialFilterPtr tbBoundary = Teuchos::rcp( new TopBottomBoundary );
  SpatialFilterPtr rBoundary = Teuchos::rcp( new RightBoundary );
  FunctionPtr u0 = Teuchos::rcp( new ZeroBC );
  FunctionPtr u_inlet = Teuchos::rcp( new InletBC );
  // FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
  bc->addDirichlet(beta_n_u_minus_sigma_n, lBoundary, u_inlet);
  bc->addDirichlet(beta_n_u_minus_sigma_n, tbBoundary, u0);
  bc->addDirichlet(uhat, rBoundary, u0);
  // pc->addConstraint(beta_n_u_minus_sigma_n - uhat == u0, rBoundary);

  ////////////////////////////////////////////////////////////////////
  // CREATE SOLUTION OBJECT
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, robIP) );
  // solution->setFilter(pc);

  // ==================== Enforce Local Conservation ==================
  if (enforceLocalConservation) {
    if (transient)
    {
      FunctionPtr conserved_rhs = u_prev_time * invDt;
      LinearTermPtr conserved_quantity = invDt * u;
      LinearTermPtr flux_part = Teuchos::rcp(new LinearTerm(-1.0, beta_n_u_minus_sigma_n));
      conserved_quantity->addTerm(flux_part, true);
      // conserved_quantity = conserved_quantity - beta_n_u_minus_sigma_n;
      solution->lagrangeConstraints()->addConstraint(conserved_quantity == conserved_rhs);
    }
    else
    {
      FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
      solution->lagrangeConstraints()->addConstraint(beta_n_u_minus_sigma_n == zero);
    }
  }

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow); // u_t(i-1)
  mesh->registerSolution(flowResidual); // u_t(i-1)

  double energyThreshold = 0.25; // for mesh refinements
  Teuchos::RCP<RefinementStrategy> refinementStrategy;
  refinementStrategy = Teuchos::rcp(new RefinementStrategy(solution,energyThreshold));

  ////////////////////////////////////////////////////////////////////
  // PSEUDO-TIME SOLVE STRATEGY 
  ////////////////////////////////////////////////////////////////////

  double time_tol = 1e-8;
  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    double L2_time_residual = 1e7;
    int timestepCount = 0;
    if (!transient)
      numTimeSteps = 1;
    while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
    {
      solution->solve(false);
      // subtract solutions to get residual
      flowResidual->setSolution(solution); // reset previous time solution to current time sol
      flowResidual->addSolution(prevTimeFlow, -1.0);       
      double L2u = flowResidual->L2NormOfSolutionGlobal(u->ID());
      double L2sigma1 = flowResidual->L2NormOfSolutionGlobal(sigma1->ID());
      double L2sigma2 = flowResidual->L2NormOfSolutionGlobal(sigma2->ID());
      L2_time_residual = sqrt(L2u*L2u + L2sigma1*L2sigma1 + L2sigma2*L2sigma2);
      cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;    	

      if (rank == 0)
      {
        stringstream outfile;
        if (transient)
          outfile << "TransientConfusion_" << refIndex << "_" << timestepCount;
        else
          outfile << "TransientConfusion_" << refIndex;
        solution->writeToVTK(outfile.str(), 5);
      }

      //////////////////////////////////////////////////////////////////////////
      // Check conservation by testing against one
      //////////////////////////////////////////////////////////////////////////
      VarPtr testOne = varFactory.testVar("1", CONSTANT_SCALAR);
      // Create a fake bilinear form for the testing
      BFPtr fakeBF = Teuchos::rcp( new BF(varFactory) );
      // Define our mass flux
      FunctionPtr flux_current_time = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_minus_sigma_n) );
      FunctionPtr delta_u = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
      LinearTermPtr surfaceFlux = -1.0 * flux_current_time * testOne;
      LinearTermPtr volumeChange = invDt * delta_u * testOne;
      LinearTermPtr massFluxTerm;
      if (transient)
      {
        massFluxTerm = volumeChange;
        // massFluxTerm->addTerm(surfaceFlux);
      }
      else
      {
        massFluxTerm = surfaceFlux;
      }
      // cout << "surface case = " << surfaceFlux->summands()[0].first->boundaryValueOnly() << " volume case = " << volumeChange->summands()[0].first->boundaryValueOnly() << endl;

      // FunctionPtr massFlux= Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_minus_sigma_n) );
      // LinearTermPtr massFluxTerm = massFlux * testOne;

      Teuchos::RCP<shards::CellTopology> quadTopoPtr = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() ));
      DofOrderingFactory dofOrderingFactory(fakeBF);
      int fakeTestOrder = H1Order;
      DofOrderingPtr testOrdering = dofOrderingFactory.testOrdering(fakeTestOrder, *quadTopoPtr);

      int testOneIndex = testOrdering->getDofIndex(testOne->ID(),0);
      vector< ElementTypePtr > elemTypes = mesh->elementTypes(); // global element types
      map<int, double> massFluxIntegral; // cellID -> integral
      double maxMassFluxIntegral = 0.0;
      double totalMassFlux = 0.0;
      double totalAbsMassFlux = 0.0;
      for (vector< ElementTypePtr >::iterator elemTypeIt = elemTypes.begin(); elemTypeIt != elemTypes.end(); elemTypeIt++) 
      {
        ElementTypePtr elemType = *elemTypeIt;
        vector< ElementPtr > elems = mesh->elementsOfTypeGlobal(elemType);
        vector<int> cellIDs;
        for (int i=0; i<elems.size(); i++) {
          cellIDs.push_back(elems[i]->cellID());
        }
        FieldContainer<double> physicalCellNodes = mesh->physicalCellNodesGlobal(elemType);
        BasisCachePtr basisCache = Teuchos::rcp( new BasisCache(elemType,mesh) );
        basisCache->setPhysicalCellNodes(physicalCellNodes,cellIDs,true); // true: create side caches
        FieldContainer<double> cellMeasures = basisCache->getCellMeasures();
        FieldContainer<double> fakeRHSIntegrals(elems.size(),testOrdering->totalDofs());
        massFluxTerm->integrate(fakeRHSIntegrals,testOrdering,basisCache,true); // true: force side evaluation
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          // pick out the ones for testOne:
          massFluxIntegral[cellID] = fakeRHSIntegrals(i,testOneIndex);
        }
        // find the largest:
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
        }
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
          totalMassFlux += massFluxIntegral[cellID];
          totalAbsMassFlux += abs( massFluxIntegral[cellID] );
        }
      }

      // Print results from processor with rank 0
      if (rank == 0)
      {
        cout << "largest mass flux: " << maxMassFluxIntegral << endl;
        cout << "total mass flux: " << totalMassFlux << endl;
        cout << "sum of mass flux absolute value: " << totalAbsMassFlux << endl;
      }

      prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
      timestepCount++;
    }

    if (refIndex < numRefs){
      if (rank==0){
        cout << "Performing refinement number " << refIndex << endl;
      }     
      refinementStrategy->refine(rank==0);    
      // RESET solution every refinement - make sure discretization error doesn't creep in
      // prevTimeFlow->projectOntoMesh(functionMap);
    }
  }

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int commRank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  // Required arguments
  int numRefs = args.Input<int>("--numRefs", "number of refinement steps");
  int norm = args.Input<int>("--norm", "0 = graph\n    1 = robust\n    2 = coupled robust");

  // Optional arguments (have defaults)
  int uniformRefinements = args.Input("--uniformRefinements", "number of uniform refinements", 0);
  bool enforceLocalConservation = args.Input<bool>("--conserve", "enforce local conservation", false);
  double radius = args.Input("--r", "cylinder radius", 0.6);
  int Re = args.Input("--Re", "Reynolds number", 1);
  int maxNewtonIterations = args.Input("--maxIterations", "maximum number of Newton iterations", 1);
  int polyOrder = args.Input("--polyOrder", "polynomial order for field variables", 2);
  int deltaP = args.Input("--deltaP", "how much to enrich test space", 2);
  // string saveFile = args.Input<string>("--meshSaveFile", "file to which to save refinement history", "");
  // string replayFile = args.Input<string>("--meshLoadFile", "file with refinement history to replay", "");
  args.Process();

  ////////////////////   PROBLEM DEFINITIONS   ///////////////////////
  int H1Order = polyOrder+1;

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  VarPtr tau1 = varFactory.testVar("tau1", HDIV);
  VarPtr tau2 = varFactory.testVar("tau2", HDIV);
  VarPtr v1 = varFactory.testVar("v1", HGRAD);
  VarPtr v2 = varFactory.testVar("v2", HGRAD);
  VarPtr vc = varFactory.testVar("vc", HGRAD);

  // define trial variables
  VarPtr u1 = varFactory.fieldVar("u1");
  VarPtr u2 = varFactory.fieldVar("u2");
  VarPtr p = varFactory.fieldVar("p");
  VarPtr u1hat = varFactory.traceVar("u1hat");
  VarPtr u2hat = varFactory.traceVar("u2hat");
  VarPtr t1hat = varFactory.fluxVar("t1hat");
  VarPtr t2hat = varFactory.fluxVar("t2hat");
  VarPtr sigma1 = varFactory.fieldVar("sigma1", VECTOR_L2);
  VarPtr sigma2 = varFactory.fieldVar("sigma2", VECTOR_L2);

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = MeshFactory::shiftedHemkerMesh(-1, 3, 2, radius, bf, H1Order, deltaP);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  vector<double> e1(2); // (1,0)
  e1[0] = 1;
  vector<double> e2(2); // (0,1)
  e2[1] = 1;

  FunctionPtr u1_prev = Function::solution(u1, backgroundFlow);
  FunctionPtr u2_prev = Function::solution(u2, backgroundFlow);
  FunctionPtr sigma1_prev = Function::solution(sigma1, backgroundFlow);
  FunctionPtr sigma2_prev = Function::solution(sigma2, backgroundFlow);

  FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  FunctionPtr one = Teuchos::rcp( new ConstantScalarFunction(1.0) );
  FunctionPtr beta = e1 * u1_prev + e2 * u2_prev;

  // ==================== SET INITIAL GUESS ==========================
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u1->ID()] = one;
  functionMap[u2->ID()] = zero;
  functionMap[sigma1->ID()] = Function::vectorize(zero,zero);
  functionMap[sigma2->ID()] = Function::vectorize(zero,zero);
  functionMap[p->ID()] = zero;

  backgroundFlow->projectOntoMesh(functionMap);

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////

  // // stress equation
  bf->addTerm( sigma1, tau1 );
  bf->addTerm( sigma2, tau2 );
  bf->addTerm( u1, tau1->div() );
  bf->addTerm( u2, tau2->div() );
  bf->addTerm( -u1hat, tau1->dot_normal() );
  bf->addTerm( -u2hat, tau2->dot_normal() );

  // momentum equation
  // bf->addTerm( Function::xPart(sigma1_prev)*u1, v1 );
  // bf->addTerm( Function::yPart(sigma1_prev)*u2, v1 );
  // bf->addTerm( Function::xPart(sigma2_prev)*u1, v2 );
  // bf->addTerm( Function::yPart(sigma2_prev)*u2, v2 );
  // bf->addTerm( beta*sigma1, v1);
  // bf->addTerm( beta*sigma2, v2);
  bf->addTerm( 1./Re*sigma1, v1->grad() );
  bf->addTerm( 1./Re*sigma2, v2->grad() );
  bf->addTerm( t1hat, v1);
  bf->addTerm( t2hat, v2);
  bf->addTerm( -p, v1->dx() );
  bf->addTerm( -p, v2->dy() );

  // continuity equation
  bf->addTerm( -u1, vc->dx() );
  bf->addTerm( -u2, vc->dy() );
  bf->addTerm( u1hat, vc->times_normal_x() );
  bf->addTerm( u2hat, vc->times_normal_y() );

  ////////////////////   SPECIFY RHS   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );

  // stress equation
  rhs->addTerm( -sigma1_prev * tau1 );
  rhs->addTerm( -sigma2_prev * tau2 );
  rhs->addTerm( -u1_prev * tau1->div() );
  rhs->addTerm( -u2_prev * tau2->div() );

  // momentum equation
  // rhs->addTerm( -beta*sigma1_prev * v1 );
  // rhs->addTerm( -beta*sigma2_prev * v2 );
  rhs->addTerm( -1./Re*sigma1_prev * v1->grad() );
  rhs->addTerm( -1./Re*sigma2_prev * v2->grad() );

  // continuity equation
  rhs->addTerm( u1_prev * vc->dx() );
  rhs->addTerm( u2_prev * vc->dy() );

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  IPPtr ip = Teuchos::rcp(new IP);
  if (norm == 0)
  {
    ip = bf->graphNorm();
  }
  else if (norm == 1)
  {
    // ip = bf->l2Norm();
  }

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  SpatialFilterPtr left = Teuchos::rcp( new ConstantXBoundary(-1) );
  SpatialFilterPtr right = Teuchos::rcp( new ConstantXBoundary(3) );
  SpatialFilterPtr top = Teuchos::rcp( new ConstantYBoundary(1) );
  SpatialFilterPtr bottom = Teuchos::rcp( new ConstantYBoundary(-1) );
  SpatialFilterPtr circle = Teuchos::rcp( new CircleBoundary(radius) );
  FunctionPtr boundaryU1 = Teuchos::rcp( new BoundaryU1 );
  bc->addDirichlet(u1hat, left, boundaryU1);
  bc->addDirichlet(u2hat, left, zero);
  bc->addDirichlet(u1hat, right, boundaryU1);
  bc->addDirichlet(u2hat, right, zero);
  bc->addDirichlet(u1hat, top, zero);
  bc->addDirichlet(u2hat, top, zero);
  bc->addDirichlet(u1hat, bottom, zero);
  bc->addDirichlet(u2hat, bottom, zero);
  bc->addDirichlet(u1hat, circle, zero);
  bc->addDirichlet(u2hat, circle, zero);

  // zero mean constraint on pressure
  bc->addZeroMeanConstraint(p);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );

  if (enforceLocalConservation)
  {
    solution->lagrangeConstraints()->addConstraint(u1hat->times_normal_x() + u2hat->times_normal_y() == zero);
  }

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(backgroundFlow);

  // Teuchos::RCP< RefinementHistory > refHistory = Teuchos::rcp( new RefinementHistory );
  // mesh->registerObserver(refHistory);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  double energyThreshold = 0.2; // for mesh refinements
  RefinementStrategy refinementStrategy( solution, energyThreshold );
  VTKExporter exporter(backgroundFlow, mesh, varFactory);
  ofstream errOut;
  ofstream fluxOut;
  if (commRank == 0)
  {
    errOut.open("stokeshemker_err.txt");
    fluxOut.open("stokeshemker_flux.txt");
  }
  errOut.precision(15);
  fluxOut.precision(15);

  // Cell IDs for flux calculations
  vector< pair<ElementPtr, int> > cellFace0;
  vector< pair<ElementPtr, int> > cellFace1;
  vector< pair<ElementPtr, int> > cellFace2;
  vector< pair<ElementPtr, int> > cellFace3;
  vector< pair<ElementPtr, int> > cellFace4;
  cellFace0.push_back(make_pair(mesh->getElement(12), 3));
  cellFace0.push_back(make_pair(mesh->getElement(13), 3));
  cellFace0.push_back(make_pair(mesh->getElement(14), 3));
  cellFace0.push_back(make_pair(mesh->getElement(15), 3));
  cellFace1.push_back(make_pair(mesh->getElement(12), 1));
  cellFace1.push_back(make_pair(mesh->getElement(13), 1));
  cellFace1.push_back(make_pair(mesh->getElement(14), 1));
  cellFace1.push_back(make_pair(mesh->getElement(15), 1));
  cellFace2.push_back(make_pair(mesh->getElement(11), 1));
  cellFace2.push_back(make_pair(mesh->getElement(2 ), 0));
  cellFace2.push_back(make_pair(mesh->getElement(5 ), 2));
  cellFace2.push_back(make_pair(mesh->getElement(16), 1));
  cellFace3.push_back(make_pair(mesh->getElement(9 ), 3));
  cellFace3.push_back(make_pair(mesh->getElement(8 ), 3));
  cellFace3.push_back(make_pair(mesh->getElement(19), 3));
  cellFace3.push_back(make_pair(mesh->getElement(18), 3));
  cellFace4.push_back(make_pair(mesh->getElement(9 ), 1));
  cellFace4.push_back(make_pair(mesh->getElement(8 ), 1));
  cellFace4.push_back(make_pair(mesh->getElement(19), 1));
  cellFace4.push_back(make_pair(mesh->getElement(18), 1));

  // // for loading refinement history
  // if (replayFile.length() > 0) {
  //   RefinementHistory refHistory;
  //   replayFile = replayFile;
  //   refHistory.loadFromFile(replayFile);
  //   refHistory.playback(mesh);
  //   int numElems = mesh->numActiveElements();
  //   if (commRank==0){
  //     double minSideLength = meshInfo.getMinCellSideLength() ;
  //     cout << "after replay, num elems = " << numElems << " and min side length = " << minSideLength << endl;
  //   }
  // }

  for (int i = 0; i < uniformRefinements; i++)
    refinementStrategy.hRefineUniformly(mesh);

  double nonlinearRelativeEnergyTolerance = 1e-5; // used to determine convergence of the nonlinear solution
  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    double L2Update = 1e10;
    int iterCount = 0;
    while (L2Update > nonlinearRelativeEnergyTolerance && iterCount < maxNewtonIterations)
    {
      solution->solve(false);
      double u1L2Update = solution->L2NormOfSolutionGlobal(u1->ID());
      double u2L2Update = solution->L2NormOfSolutionGlobal(u2->ID());
      L2Update = sqrt(u1L2Update*u1L2Update + u2L2Update*u2L2Update);
      double energy_error = solution->energyErrorTotal();

      // Check local conservation
      if (commRank == 0)
      {
        FunctionPtr n = Function::normal();
        FunctionPtr u1_prev = Function::solution(u1hat, solution);
        FunctionPtr u2_prev = Function::solution(u2hat, solution);
        FunctionPtr flux = u1_prev*n->x() + u2_prev*n->y();
        Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, zero, mesh);
        // cout << "Mass flux: Largest Local = " << fluxImbalances[0]
        //   << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;

        errOut << mesh->numGlobalDofs() << " " << energy_error << " "
               << fluxImbalances[0] << " " << fluxImbalances[1] << " " << fluxImbalances[2] << endl;

        double massFlux0 = computeFluxOverElementSides(u1_prev, mesh, cellFace0);
        double massFlux1 = computeFluxOverElementSides(u1_prev, mesh, cellFace1);
        double massFlux2 = computeFluxOverElementSides(u1_prev, mesh, cellFace2);
        double massFlux3 = computeFluxOverElementSides(u1_prev, mesh, cellFace3);
        double massFlux4 = computeFluxOverElementSides(u1_prev, mesh, cellFace4);
        fluxOut << massFlux0 << " " << massFlux1 << " " << massFlux2 << " " << massFlux3 << " " << massFlux4 << " " << endl;
        cout << "Total mass flux = " << massFlux0 << " " << massFlux1 << " " << massFlux2 << " " << massFlux3 << " " << massFlux4 << " " << endl;

        // if (saveFile.length() > 0) {
        //   std::ostringstream oss;
        //   oss << string(saveFile) << refIndex ;
        //   cout << "on refinement " << refIndex << " saving mesh file to " << oss.str() << endl;
        //   refHistory->saveToFile(oss.str());
        // }
      }

      // line search algorithm
      double alpha = 1.0;
      // bool useLineSearch = false;
      // int posEnrich = 5; // amount of enriching of grid points on which to ensure positivity
      // if (useLineSearch){ // to enforce positivity of density rho
      //   double lineSearchFactor = .5; double eps = .001; // arbitrary
      //   FunctionPtr rhoTemp = Function::solution(rho,backgroundFlow) + alpha*Function::solution(rho,solution) - Function::constant(eps);
      //   FunctionPtr eTemp = Function::solution(e,backgroundFlow) + alpha*Function::solution(e,solution) - Function::constant(eps);
      //   bool rhoIsPositive = rhoTemp->isPositive(mesh,posEnrich);
      //   bool eIsPositive = eTemp->isPositive(mesh,posEnrich);
      //   int iter = 0; int maxIter = 20;
      //   while (!(rhoIsPositive && eIsPositive) && iter < maxIter){
      //     alpha = alpha*lineSearchFactor;
      //     rhoTemp = Function::solution(rho,backgroundFlow) + alpha*Function::solution(rho,solution);
      //     eTemp = Function::solution(e,backgroundFlow) + alpha*Function::solution(e,solution);
      //     rhoIsPositive = rhoTemp->isPositive(mesh,posEnrich);
      //     eIsPositive = eTemp->isPositive(mesh,posEnrich);
      //     iter++;
      //   }
      //   if (commRank==0 && alpha < 1.0){
      //     cout << "line search factor alpha = " << alpha << endl;
      //   }
      // }

      backgroundFlow->addSolution(solution, alpha, false, true);
      iterCount++;
      // if (commRank == 0)
      //   cout << "L2 Norm of Update = " << L2Update << endl;
    }
    if (commRank == 0)
      cout << endl;

    if (commRank == 0)
    {
      stringstream outfile;
      outfile << "stokeshemker" << uniformRefinements << "_" << refIndex;
      exporter.exportSolution(outfile.str());
    }

    if (refIndex < numRefs)
      refinementStrategy.refine(commRank==0); // print to console on commRank 0
  }
  if (commRank == 0)
  {
    errOut.close();
    fluxOut.close();
  }

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int commRank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  // Required arguments
  int numRefs = args.Input<int>("--numRefs", "number of refinement steps");
  bool enforceLocalConservation = args.Input<bool>("--conserve", "enforce local conservation");
  bool steady = args.Input<bool>("--steady", "run steady rather than transient");

  // Optional arguments (have defaults)
  double dt = args.Input("--dt", "time step", 0.25);
  int numTimeSteps = args.Input("--nt", "number of time steps", 20);
  halfWidth = args.Input("--halfWidth", "half width of inlet profile", 1.0);
  args.Process();

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  VarPtr v = varFactory.testVar("v", HGRAD);

  // define trial variables
  VarPtr beta_n_u_hat = varFactory.fluxVar("\\widehat{\\beta \\cdot n }");
  VarPtr u = varFactory.fieldVar("u");

  vector<double> beta;
  beta.push_back(1.0);
  beta.push_back(0.0);

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr bf = Teuchos::rcp( new BF(varFactory) );
  // define nodes for mesh
  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) =  0.0; // x1
  meshBoundary(0,1) = -2.0; // y1
  meshBoundary(1,0) =  4.0;
  meshBoundary(1,1) = -2.0;
  meshBoundary(2,0) =  4.0;
  meshBoundary(2,1) =  2.0;
  meshBoundary(3,0) =  0.0;
  meshBoundary(3,1) =  2.0;

  int horizontalCells = 8, verticalCells = 8;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
                            bf, H1Order, H1Order+pToAdd);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));

  // v terms:
  bf->addTerm( beta * u, - v->grad() );
  bf->addTerm( beta_n_u_hat, v);

  if (!steady)
  {
    bf->addTerm( u, invDt*v );
    rhs->addTerm( u_prev_time * invDt * v );
  }

  ////////////////////   SPECIFY RHS   ///////////////////////
  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  IPPtr ip = bf->graphNorm();
  // ip->addTerm(v);
  // ip->addTerm(beta*v->grad());

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  SpatialFilterPtr lBoundary = Teuchos::rcp( new LeftBoundary );
  FunctionPtr u1 = Teuchos::rcp( new InletBC );
  bc->addDirichlet(beta_n_u_hat, lBoundary, -u1);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow);
  mesh->registerSolution(flowResidual);

  // ==================== SET INITIAL GUESS ==========================
  double u_free = 0.0;
  map<int, Teuchos::RCP<Function> > functionMap;
  // functionMap[u->ID()]      = Teuchos::rcp( new ConInletBC
  functionMap[u->ID()]      = Teuchos::rcp( new InletBC );

  prevTimeFlow->projectOntoMesh(functionMap);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  if (enforceLocalConservation)
  {
    if (steady)
    {
      FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
      solution->lagrangeConstraints()->addConstraint(beta_n_u_hat == zero);
    }
    else
    {
      // FunctionPtr parity = Teuchos::rcp<Function>( new SideParityFunction );
      // LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(parity, beta_n_u_minus_sigma_n) );
      LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(1.0, beta_n_u_hat) );
      LinearTermPtr sourcePart = Teuchos::rcp<LinearTerm>( new LinearTerm(invDt, u) );
      conservedQuantity->addTerm(sourcePart, true);
      solution->lagrangeConstraints()->addConstraint(conservedQuantity == u_prev_time * invDt);
    }
  }

  double energyThreshold = 0.2; // for mesh refinements
  RefinementStrategy refinementStrategy( solution, energyThreshold );
  VTKExporter exporter(solution, mesh, varFactory);

  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    if (steady)
    {
      solution->solve(false);

      if (commRank == 0)
      {
        stringstream outfile;
        outfile << "Convection_" << refIndex;
        exporter.exportSolution(outfile.str());

        // Check local conservation
        FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
        FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
        Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, zero, varFactory, mesh);
        cout << "Mass flux: Largest Local = " << fluxImbalances[0]
             << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
      }
    }
    else
    {
      int timestepCount = 0;
      double time_tol = 1e-8;
      double L2_time_residual = 1e9;
      // cout << L2_time_residual <<" "<< time_tol << timestepCount << numTimeSteps << endl;
      while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
      {
        solution->solve(false);
        // Subtract solutions to get residual
        flowResidual->setSolution(solution);
        flowResidual->addSolution(prevTimeFlow, -1.0);
        L2_time_residual = flowResidual->L2NormOfSolutionGlobal(u->ID());

        if (commRank == 0)
        {
          cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;

          stringstream outfile;
          outfile << "TransientConvection_" << refIndex << "-" << timestepCount;
          exporter.exportSolution(outfile.str());

          // Check local conservation
          FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
          FunctionPtr source = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
          source = -invDt * source;
          Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, source, varFactory, mesh);
          cout << "Mass flux: Largest Local = " << fluxImbalances[0]
               << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
        }

        prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
        timestepCount++;
      }
    }

    if (refIndex < numRefs)
      refinementStrategy.refine(commRank==0); // print to console on commRank 0
  }

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
    Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
    int rank=mpiSession.getRank();
    int numProcs=mpiSession.getNProc();
#else
    int rank = 0;
    int numProcs = 1;
#endif
    int polyOrder = 2;

    // define our manufactured solution or problem bilinear form:
    double epsilon = 1e-3;
    bool useTriangles = false;

    int pToAdd = 2;
    int nCells = 2;
    if ( argc > 1)
    {
        nCells = atoi(argv[1]);
        if (rank==0)
        {
            cout << "numCells = " << nCells << endl;
        }
    }
    int numSteps = 20;
    if ( argc > 2)
    {
        numSteps = atoi(argv[2]);
        if (rank==0)
        {
            cout << "num NR steps = " << numSteps << endl;
        }
    }
    int useHessian = 0; // defaults to "not use"
    if ( argc > 3)
    {
        useHessian = atoi(argv[3]);
        if (rank==0)
        {
            cout << "useHessian = " << useHessian << endl;
        }
    }

    int thresh = numSteps; // threshhold for when to apply linesearch/hessian
    if ( argc > 4)
    {
        thresh = atoi(argv[4]);
        if (rank==0)
        {
            cout << "thresh = " << thresh << endl;
        }
    }

    int H1Order = polyOrder + 1;

    double energyThreshold = 0.2; // for mesh refinements
    double nonlinearStepSize = 0.5;
    double nonlinearRelativeEnergyTolerance = 1e-8; // used to determine convergence of the nonlinear solution

    ////////////////////////////////////////////////////////////////////
    // DEFINE VARIABLES
    ////////////////////////////////////////////////////////////////////

    // new-style bilinear form definition
    VarFactory varFactory;
    VarPtr uhat = varFactory.traceVar("\\widehat{u}");
    VarPtr beta_n_u_minus_sigma_hat = varFactory.fluxVar("\\widehat{\\beta_n u - \\sigma_n}");
    VarPtr u = varFactory.fieldVar("u");
    VarPtr sigma1 = varFactory.fieldVar("\\sigma_1");
    VarPtr sigma2 = varFactory.fieldVar("\\sigma_2");

    VarPtr tau = varFactory.testVar("\\tau",HDIV);
    VarPtr v = varFactory.testVar("v",HGRAD);
    BFPtr bf = Teuchos::rcp( new BF(varFactory) ); // initialize bilinear form

    ////////////////////////////////////////////////////////////////////
    // CREATE MESH
    ////////////////////////////////////////////////////////////////////

    // create a pointer to a new mesh:
    Teuchos::RCP<Mesh> mesh = MeshUtilities::buildUnitQuadMesh(nCells, bf, H1Order, H1Order+pToAdd);
    mesh->setPartitionPolicy(Teuchos::rcp(new ZoltanMeshPartitionPolicy("HSFC")));

    ////////////////////////////////////////////////////////////////////
    // INITIALIZE BACKGROUND FLOW FUNCTIONS
    ////////////////////////////////////////////////////////////////////
    BCPtr nullBC = Teuchos::rcp((BC*)NULL);
    RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
    IPPtr nullIP = Teuchos::rcp((IP*)NULL);
    SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC,
                                 nullRHS, nullIP) );

    vector<double> e1(2); // (1,0)
    e1[0] = 1;
    vector<double> e2(2); // (0,1)
    e2[1] = 1;

    FunctionPtr u_prev = Teuchos::rcp( new PreviousSolutionFunction(backgroundFlow, u) );
    FunctionPtr beta = e1 * u_prev + Teuchos::rcp( new ConstantVectorFunction( e2 ) );

    ////////////////////////////////////////////////////////////////////
    // DEFINE BILINEAR FORM
    ////////////////////////////////////////////////////////////////////

    // tau parts:
    // 1/eps (sigma, tau)_K + (u, div tau)_K - (u_hat, tau_n)_dK
    bf->addTerm(sigma1 / epsilon, tau->x());
    bf->addTerm(sigma2 / epsilon, tau->y());
    bf->addTerm(u, tau->div());
    bf->addTerm( - uhat, tau->dot_normal() );

    // v:
    // (sigma, grad v)_K - (sigma_hat_n, v)_dK - (u, beta dot grad v) + (u_hat * n dot beta, v)_dK
    bf->addTerm( sigma1, v->dx() );
    bf->addTerm( sigma2, v->dy() );
    bf->addTerm( -u, beta * v->grad());
    bf->addTerm( beta_n_u_minus_sigma_hat, v);

    // ==================== SET INITIAL GUESS ==========================
    mesh->registerSolution(backgroundFlow);
    FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
    FunctionPtr u0 = Teuchos::rcp( new U0 );

    map<int, Teuchos::RCP<Function> > functionMap;
    functionMap[u->ID()] = u0;
    functionMap[sigma1->ID()] = zero;
    functionMap[sigma2->ID()] = zero;

    backgroundFlow->projectOntoMesh(functionMap);
    // ==================== END SET INITIAL GUESS ==========================

    ////////////////////////////////////////////////////////////////////
    // DEFINE INNER PRODUCT
    ////////////////////////////////////////////////////////////////////
    // function to scale the squared guy by epsilon/h
    FunctionPtr epsilonOverHScaling = Teuchos::rcp( new EpsilonScaling(epsilon) );
    IPPtr ip = Teuchos::rcp( new IP );
    ip->addTerm( epsilonOverHScaling * (1.0/sqrt(epsilon))* tau);
    ip->addTerm( tau->div());
    //  ip->addTerm( epsilonOverHScaling * v );
    ip->addTerm( v );
    ip->addTerm( sqrt(epsilon) * v->grad() );
    ip->addTerm(v->grad());
    //  ip->addTerm( beta * v->grad() );

    ////////////////////////////////////////////////////////////////////
    // DEFINE RHS
    ////////////////////////////////////////////////////////////////////
    RHSPtr rhs = RHS::rhs();
    FunctionPtr u_prev_squared_div2 = 0.5 * u_prev * u_prev;

    rhs->addTerm((e1 * u_prev_squared_div2 + e2 * u_prev) * v->grad() - u_prev * tau->div());

    ////////////////////////////////////////////////////////////////////
    // DEFINE DIRICHLET BC
    ////////////////////////////////////////////////////////////////////
    FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
    SpatialFilterPtr outflowBoundary = Teuchos::rcp( new TopBoundary);
    SpatialFilterPtr inflowBoundary = Teuchos::rcp( new NegatedSpatialFilter(outflowBoundary) );
    BCPtr inflowBC = BC::bc();
    FunctionPtr u0_squared_div_2 = 0.5 * u0 * u0;
    inflowBC->addDirichlet(beta_n_u_minus_sigma_hat,inflowBoundary,
                           ( e1 * u0_squared_div_2 + e2 * u0) * n );

    ////////////////////////////////////////////////////////////////////
    // CREATE SOLUTION OBJECT
    ////////////////////////////////////////////////////////////////////
    Teuchos::RCP<Solution> solution = Teuchos::rcp(new Solution(mesh, inflowBC, rhs, ip));
    mesh->registerSolution(solution);

    ////////////////////////////////////////////////////////////////////
    // WARNING: UNFINISHED HESSIAN BIT
    ////////////////////////////////////////////////////////////////////
    VarFactory hessianVars = varFactory.getBubnovFactory(VarFactory::BUBNOV_TRIAL);
    VarPtr du = hessianVars.test(u->ID());
    BFPtr hessianBF = Teuchos::rcp( new BF(hessianVars) ); // initialize bilinear form
    //  FunctionPtr e_v = Function::constant(1.0); // dummy error rep function for now - should do nothing

    FunctionPtr u_current  = Teuchos::rcp( new PreviousSolutionFunction(solution, u) );

    FunctionPtr sig1_prev = Teuchos::rcp( new PreviousSolutionFunction(solution, sigma1) );
    FunctionPtr sig2_prev = Teuchos::rcp( new PreviousSolutionFunction(solution, sigma2) );
    FunctionPtr sig_prev = (e1*sig1_prev + e2*sig2_prev);
    FunctionPtr fnhat = Teuchos::rcp(new PreviousSolutionFunction(solution,beta_n_u_minus_sigma_hat));
    FunctionPtr uhat_prev = Teuchos::rcp(new PreviousSolutionFunction(solution,uhat));
    LinearTermPtr residual = Teuchos::rcp(new LinearTerm);// residual
    residual->addTerm(fnhat*v - (e1 * (u_prev_squared_div2 - sig1_prev) + e2 * (u_prev - sig2_prev)) * v->grad());
    residual->addTerm((1/epsilon)*sig_prev * tau + u_prev * tau->div() - uhat_prev*tau->dot_normal());

    LinearTermPtr Bdu = Teuchos::rcp(new LinearTerm);// residual
    Bdu->addTerm( u_current*tau->div() - u_current*(beta*v->grad()));

    Teuchos::RCP<RieszRep> riesz = Teuchos::rcp(new RieszRep(mesh, ip, residual));
    Teuchos::RCP<RieszRep> duRiesz = Teuchos::rcp(new RieszRep(mesh, ip, Bdu));
    riesz->computeRieszRep();
    FunctionPtr e_v = Teuchos::rcp(new RepFunction(v,riesz));
    e_v->writeValuesToMATLABFile(mesh, "e_v.m");
    FunctionPtr posErrPart = Teuchos::rcp(new PositivePart(e_v->dx()));
    hessianBF->addTerm(e_v->dx()*u,du);
    //  hessianBF->addTerm(posErrPart*u,du);
    Teuchos::RCP<HessianFilter> hessianFilter = Teuchos::rcp(new HessianFilter(hessianBF));

    if (useHessian)
    {
        solution->setWriteMatrixToFile(true,"hessianStiffness.dat");
    }
    else
    {
        solution->setWriteMatrixToFile(true,"stiffness.dat");
    }

    Teuchos::RCP< LineSearchStep > LS_Step = Teuchos::rcp(new LineSearchStep(riesz));
    ofstream out;
    out.open("Burgers.txt");
    double NL_residual = 9e99;
    for (int i = 0; i<numSteps; i++)
    {
        solution->solve(false); // do one solve to initialize things...
        double stepLength = 1.0;
        stepLength = LS_Step->stepSize(backgroundFlow,solution, NL_residual);
        if (useHessian)
        {
            solution->setFilter(hessianFilter);
        }
        backgroundFlow->addSolution(solution,stepLength);
        NL_residual = LS_Step->getNLResidual();
        if (rank==0)
        {
            cout << "NL residual after adding = " << NL_residual << " with step size " << stepLength << endl;
            out << NL_residual << endl; // saves initial NL error
        }
    }
    out.close();


    ////////////////////////////////////////////////////////////////////
    // DEFINE REFINEMENT STRATEGY
    ////////////////////////////////////////////////////////////////////
    Teuchos::RCP<RefinementStrategy> refinementStrategy;
    refinementStrategy = Teuchos::rcp(new RefinementStrategy(solution,energyThreshold));

    int numRefs = 0;

    Teuchos::RCP<NonlinearStepSize> stepSize = Teuchos::rcp(new NonlinearStepSize(nonlinearStepSize));
    Teuchos::RCP<NonlinearSolveStrategy> solveStrategy;
    solveStrategy = Teuchos::rcp( new NonlinearSolveStrategy(backgroundFlow, solution, stepSize,
                                  nonlinearRelativeEnergyTolerance));

    ////////////////////////////////////////////////////////////////////
    // SOLVE
    ////////////////////////////////////////////////////////////////////

    for (int refIndex=0; refIndex<numRefs; refIndex++)
    {
        solveStrategy->solve(rank==0);       // print to console on rank 0
        refinementStrategy->refine(rank==0); // print to console on rank 0
    }
    //  solveStrategy->solve(rank==0);

    if (rank==0)
    {
        backgroundFlow->writeToVTK("Burgers.vtu",min(H1Order+1,4));
        solution->writeFluxesToFile(uhat->ID(), "burgers.dat");
        cout << "wrote solution files" << endl;
    }

    return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
#else
  int rank = 0;
  int numProcs = 1;
#endif
  int polyOrder = 3;
  int pToAdd = 2; // for tests

  // define our manufactured solution or problem bilinear form:
  bool useTriangles = false;

  FieldContainer<double> meshPoints(4,2);

  meshPoints(0,0) = 0.0; // x1
  meshPoints(0,1) = 0.0; // y1
  meshPoints(1,0) = 1.0;
  meshPoints(1,1) = 0.0;
  meshPoints(2,0) = 1.0;
  meshPoints(2,1) = 1.0;
  meshPoints(3,0) = 0.0;
  meshPoints(3,1) = 1.0;

  int H1Order = polyOrder + 1;
  int horizontalCells = 4, verticalCells = 4;

  double energyThreshold = 0.2; // for mesh refinements
  double nonlinearStepSize = 0.5;
  double nonlinearRelativeEnergyTolerance = 1e-8; // used to determine convergence of the nonlinear solution

  ////////////////////////////////////////////////////////////////////
  // DEFINE VARIABLES
  ////////////////////////////////////////////////////////////////////

  // new-style bilinear form definition
  VarFactory varFactory;
  VarPtr fhat = varFactory.fluxVar("\\widehat{f}");
  VarPtr u = varFactory.fieldVar("u");

  VarPtr v = varFactory.testVar("v",HGRAD);
  BFPtr bf = Teuchos::rcp( new BF(varFactory) ); // initialize bilinear form

  ////////////////////////////////////////////////////////////////////
  // CREATE MESH
  ////////////////////////////////////////////////////////////////////

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshPoints, horizontalCells,
                            verticalCells, bf, H1Order,
                            H1Order+pToAdd, useTriangles);
  mesh->setPartitionPolicy(Teuchos::rcp(new ZoltanMeshPartitionPolicy("HSFC")));

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////
  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr backgroundFlow = Teuchos::rcp(new Solution(mesh, nullBC,
                               nullRHS, nullIP) );

  vector<double> e1(2); // (1,0)
  e1[0] = 1;
  vector<double> e2(2); // (0,1)
  e2[1] = 1;

  FunctionPtr u_prev = Teuchos::rcp( new PreviousSolutionFunction(backgroundFlow, u) );
  FunctionPtr beta = e1 * u_prev + Teuchos::rcp( new ConstantVectorFunction( e2 ) );

  ////////////////////////////////////////////////////////////////////
  // DEFINE BILINEAR FORM
  ////////////////////////////////////////////////////////////////////

  // v:
  // (sigma, grad v)_K - (sigma_hat_n, v)_dK - (u, beta dot grad v) + (u_hat * n dot beta, v)_dK
  bf->addTerm( -u, beta * v->grad());
  bf->addTerm( fhat, v);

  // ==================== SET INITIAL GUESS ==========================
  mesh->registerSolution(backgroundFlow);
  FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  FunctionPtr u0 = Teuchos::rcp( new U0 );

  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()] = u0;

  backgroundFlow->projectOntoMesh(functionMap);
  // ==================== END SET INITIAL GUESS ==========================

  ////////////////////////////////////////////////////////////////////
  // DEFINE INNER PRODUCT
  ////////////////////////////////////////////////////////////////////
  IPPtr ip = Teuchos::rcp( new IP );
  ip->addTerm( v );
  ip->addTerm( beta * v->grad() );

  ////////////////////////////////////////////////////////////////////
  // DEFINE RHS
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr u_prev_squared_div2 = 0.5 * u_prev * u_prev;
  rhs->addTerm( (e1 * u_prev_squared_div2 + e2 * u_prev) * v->grad());

  ////////////////////////////////////////////////////////////////////
  // DEFINE DIRICHLET BC
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<BCEasy> inflowBC = Teuchos::rcp( new BCEasy );

  // Create spatial filters
  SpatialFilterPtr bottomBoundary = Teuchos::rcp( new BottomBoundary );
  SpatialFilterPtr leftBoundary = Teuchos::rcp( new LeftBoundary );
  SpatialFilterPtr rightBoundary = Teuchos::rcp( new LeftBoundary );

  // Create BCs
  FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
  FunctionPtr u0_squared_div_2 = 0.5 * u0 * u0;
  SimpleFunction* u0Ptr = static_cast<SimpleFunction *>(u0.get());
  double u0Left = u0Ptr->value(0,0);
  double u0Right = u0Ptr->value(1.0,0);
  FunctionPtr leftVal = Teuchos::rcp( new ConstantScalarFunction( -0.5*u0Left*u0Left ) );
  FunctionPtr rightVal = Teuchos::rcp( new ConstantScalarFunction( 0.5*u0Right*u0Right ) );
  inflowBC->addDirichlet(fhat, bottomBoundary, -u0 );
  inflowBC->addDirichlet(fhat, leftBoundary, leftVal );
  inflowBC->addDirichlet(fhat, rightBoundary, rightVal );

  ////////////////////////////////////////////////////////////////////
  // CREATE SOLUTION OBJECT
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<Solution> solution = Teuchos::rcp(new Solution(mesh, inflowBC, rhs, ip));
  mesh->registerSolution(solution);

  if (enforceLocalConservation)
  {
    FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
    solution->lagrangeConstraints()->addConstraint(fhat == zero);
  }

  ////////////////////////////////////////////////////////////////////
  // DEFINE REFINEMENT STRATEGY
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<RefinementStrategy> refinementStrategy;
  refinementStrategy = Teuchos::rcp(new RefinementStrategy(solution,energyThreshold));

  ////////////////////////////////////////////////////////////////////
  // SOLVE
  ////////////////////////////////////////////////////////////////////

  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    double L2Update = 1e7;
    int iterCount = 0;
    while (L2Update > nonlinearRelativeEnergyTolerance && iterCount < maxNewtonIterations)
    {
      solution->solve();
      L2Update = solution->L2NormOfSolutionGlobal(u->ID());
      cout << "L2 Norm of Update = " << L2Update << endl;
      // backgroundFlow->clear();
      backgroundFlow->addSolution(solution, newtonStepSize);
      iterCount++;
    }
    cout << endl;

    // check conservation
    VarPtr testOne = varFactory.testVar("1", CONSTANT_SCALAR);
    // Create a fake bilinear form for the testing
    BFPtr fakeBF = Teuchos::rcp( new BF(varFactory) );
    // Define our mass flux
    FunctionPtr massFlux = Teuchos::rcp( new PreviousSolutionFunction(solution, fhat) );
    LinearTermPtr massFluxTerm = massFlux * testOne;

    Teuchos::RCP<shards::CellTopology> quadTopoPtr = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() ));
    DofOrderingFactory dofOrderingFactory(fakeBF);
    int fakeTestOrder = H1Order;
    DofOrderingPtr testOrdering = dofOrderingFactory.testOrdering(fakeTestOrder, *quadTopoPtr);

    int testOneIndex = testOrdering->getDofIndex(testOne->ID(),0);
    vector< ElementTypePtr > elemTypes = mesh->elementTypes(); // global element types
    map<int, double> massFluxIntegral; // cellID -> integral
    double maxMassFluxIntegral = 0.0;
    double totalMassFlux = 0.0;
    double totalAbsMassFlux = 0.0;
    for (vector< ElementTypePtr >::iterator elemTypeIt = elemTypes.begin(); elemTypeIt != elemTypes.end(); elemTypeIt++)
    {
      ElementTypePtr elemType = *elemTypeIt;
      vector< ElementPtr > elems = mesh->elementsOfTypeGlobal(elemType);
      vector<int> cellIDs;
      for (int i=0; i<elems.size(); i++)
      {
        cellIDs.push_back(elems[i]->cellID());
      }
      FieldContainer<double> physicalCellNodes = mesh->physicalCellNodesGlobal(elemType);
      BasisCachePtr basisCache = Teuchos::rcp( new BasisCache(elemType,mesh) );
      basisCache->setPhysicalCellNodes(physicalCellNodes,cellIDs,true); // true: create side caches
      FieldContainer<double> cellMeasures = basisCache->getCellMeasures();
      FieldContainer<double> fakeRHSIntegrals(elems.size(),testOrdering->totalDofs());
      massFluxTerm->integrate(fakeRHSIntegrals,testOrdering,basisCache,true); // true: force side evaluation
      for (int i=0; i<elems.size(); i++)
      {
        int cellID = cellIDs[i];
        // pick out the ones for testOne:
        massFluxIntegral[cellID] = fakeRHSIntegrals(i,testOneIndex);
      }
      // find the largest:
      for (int i=0; i<elems.size(); i++)
      {
        int cellID = cellIDs[i];
        maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
      }
      for (int i=0; i<elems.size(); i++)
      {
        int cellID = cellIDs[i];
        maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
        totalMassFlux += massFluxIntegral[cellID];
        totalAbsMassFlux += abs( massFluxIntegral[cellID] );
      }
    }
    if (rank==0)
    {
      cout << endl;
      cout << "largest mass flux: " << maxMassFluxIntegral << endl;
      cout << "total mass flux: " << totalMassFlux << endl;
      cout << "sum of mass flux absolute value: " << totalAbsMassFlux << endl;
      cout << endl;

      stringstream outfile;
      outfile << "burgers_" << refIndex;
      backgroundFlow->writeToVTK(outfile.str(), 5);
    }

    if (refIndex < numRefs)
      refinementStrategy->refine(rank==0); // print to console on rank 0
  }

  return 0;
}