int main(int argc, char *argv[]) {
  // Process command line arguments
  if (argc > 1)
    numRefs = atof(argv[1]);
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
#else
  int rank = 0;
  int numProcs = 1;
#endif

  FunctionPtr beta = Teuchos::rcp(new Beta());

  ////////////////////////////////////////////////////////////////////
  // DEFINE VARIABLES 
  ////////////////////////////////////////////////////////////////////
  // test variables
  VarFactory varFactory; 
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);

  // trial variables
  VarPtr uhat = varFactory.traceVar("\\widehat{u}");
  VarPtr beta_n_u_minus_sigma_n = varFactory.fluxVar("\\widehat{\\beta \\cdot n u - \\sigma_{n}}");
  VarPtr u = varFactory.fieldVar("u");
  VarPtr sigma1 = varFactory.fieldVar("\\sigma_1");
  VarPtr sigma2 = varFactory.fieldVar("\\sigma_2");

  ////////////////////////////////////////////////////////////////////
  // CREATE MESH 
  ////////////////////////////////////////////////////////////////////

  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );

  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) =  0.0; // x1
  meshBoundary(0,1) = -2.0; // y1
  meshBoundary(1,0) =  4.0;
  meshBoundary(1,1) = -2.0;
  meshBoundary(2,0) =  4.0;
  meshBoundary(2,1) =  2.0;
  meshBoundary(3,0) =  0.0;
  meshBoundary(3,1) =  2.0;

  int horizontalCells = 4, verticalCells = 4;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
      confusionBF, H1Order, H1Order+pToAdd, false);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE BACKGROUND FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );  
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );  

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  // ==================== SET INITIAL GUESS ==========================
  double u_free = 0.0;
  double sigma1_free = 0.0;
  double sigma2_free = 0.0;
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()] = Teuchos::rcp( new ConstantScalarFunction(u_free) );
  functionMap[sigma1->ID()] = Teuchos::rcp( new ConstantScalarFunction(sigma1_free) );
  functionMap[sigma2->ID()] = Teuchos::rcp( new ConstantScalarFunction(sigma2_free) );

  prevTimeFlow->projectOntoMesh(functionMap);
  // ==================== END SET INITIAL GUESS ==========================

  ////////////////////////////////////////////////////////////////////
  // DEFINE BILINEAR FORM
  ////////////////////////////////////////////////////////////////////

  // tau terms:
  confusionBF->addTerm(sigma1 / epsilon, tau->x());
  confusionBF->addTerm(sigma2 / epsilon, tau->y());
  confusionBF->addTerm(u, tau->div());
  confusionBF->addTerm(-uhat, tau->dot_normal());

  // v terms:
  confusionBF->addTerm( sigma1, v->dx() );
  confusionBF->addTerm( sigma2, v->dy() );
  confusionBF->addTerm( beta * u, - v->grad() );
  confusionBF->addTerm( beta_n_u_minus_sigma_n, v);

  ////////////////////////////////////////////////////////////////////
  // TIMESTEPPING TERMS
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );

  double dt = 0.25;
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));    
  if (rank==0){
    cout << "Timestep dt = " << dt << endl;
  }
  if (transient)
  {
    confusionBF->addTerm( u, invDt*v );
    rhs->addTerm( u_prev_time * invDt * v );
  }

  ////////////////////////////////////////////////////////////////////
  // DEFINE INNER PRODUCT
  ////////////////////////////////////////////////////////////////////

  // mathematician's norm
  IPPtr mathIP = Teuchos::rcp(new IP());
  mathIP->addTerm(tau);
  mathIP->addTerm(tau->div());

  mathIP->addTerm(v);
  mathIP->addTerm(v->grad());

  // quasi-optimal norm
  IPPtr qoptIP = Teuchos::rcp(new IP);
  qoptIP->addTerm( v );
  qoptIP->addTerm( tau / epsilon + v->grad() );
  qoptIP->addTerm( beta * v->grad() - tau->div() );

  // robust test norm
  IPPtr robIP = Teuchos::rcp(new IP);
  FunctionPtr ip_scaling = Teuchos::rcp( new EpsilonScaling(epsilon) ); 
  if (!enforceLocalConservation)
  {
    robIP->addTerm( ip_scaling * v );
    if (transient)
      robIP->addTerm( invDt * v );
  }
  robIP->addTerm( sqrt(epsilon) * v->grad() );
  // Weight these two terms for inflow
  FunctionPtr ip_weight = Teuchos::rcp( new IPWeight() );
  robIP->addTerm( ip_weight * beta * v->grad() );
  robIP->addTerm( ip_weight * tau->div() );
  robIP->addTerm( ip_scaling/sqrt(epsilon) * tau );
  if (enforceLocalConservation)
    robIP->addZeroMeanTerm( v );

  ////////////////////////////////////////////////////////////////////
  // DEFINE RHS
  ////////////////////////////////////////////////////////////////////

  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////////////////////////////////////////////////////
  // DEFINE BC
  ////////////////////////////////////////////////////////////////////

  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  // Teuchos::RCP<PenaltyConstraints> pc = Teuchos::rcp( new PenaltyConstraints );
  SpatialFilterPtr lBoundary = Teuchos::rcp( new LeftBoundary );
  SpatialFilterPtr tbBoundary = Teuchos::rcp( new TopBottomBoundary );
  SpatialFilterPtr rBoundary = Teuchos::rcp( new RightBoundary );
  FunctionPtr u0 = Teuchos::rcp( new ZeroBC );
  FunctionPtr u_inlet = Teuchos::rcp( new InletBC );
  // FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );
  bc->addDirichlet(beta_n_u_minus_sigma_n, lBoundary, u_inlet);
  bc->addDirichlet(beta_n_u_minus_sigma_n, tbBoundary, u0);
  bc->addDirichlet(uhat, rBoundary, u0);
  // pc->addConstraint(beta_n_u_minus_sigma_n - uhat == u0, rBoundary);

  ////////////////////////////////////////////////////////////////////
  // CREATE SOLUTION OBJECT
  ////////////////////////////////////////////////////////////////////
  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, robIP) );
  // solution->setFilter(pc);

  // ==================== Enforce Local Conservation ==================
  if (enforceLocalConservation) {
    if (transient)
    {
      FunctionPtr conserved_rhs = u_prev_time * invDt;
      LinearTermPtr conserved_quantity = invDt * u;
      LinearTermPtr flux_part = Teuchos::rcp(new LinearTerm(-1.0, beta_n_u_minus_sigma_n));
      conserved_quantity->addTerm(flux_part, true);
      // conserved_quantity = conserved_quantity - beta_n_u_minus_sigma_n;
      solution->lagrangeConstraints()->addConstraint(conserved_quantity == conserved_rhs);
    }
    else
    {
      FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
      solution->lagrangeConstraints()->addConstraint(beta_n_u_minus_sigma_n == zero);
    }
  }

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow); // u_t(i-1)
  mesh->registerSolution(flowResidual); // u_t(i-1)

  double energyThreshold = 0.25; // for mesh refinements
  Teuchos::RCP<RefinementStrategy> refinementStrategy;
  refinementStrategy = Teuchos::rcp(new RefinementStrategy(solution,energyThreshold));

  ////////////////////////////////////////////////////////////////////
  // PSEUDO-TIME SOLVE STRATEGY 
  ////////////////////////////////////////////////////////////////////

  double time_tol = 1e-8;
  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    double L2_time_residual = 1e7;
    int timestepCount = 0;
    if (!transient)
      numTimeSteps = 1;
    while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
    {
      solution->solve(false);
      // subtract solutions to get residual
      flowResidual->setSolution(solution); // reset previous time solution to current time sol
      flowResidual->addSolution(prevTimeFlow, -1.0);       
      double L2u = flowResidual->L2NormOfSolutionGlobal(u->ID());
      double L2sigma1 = flowResidual->L2NormOfSolutionGlobal(sigma1->ID());
      double L2sigma2 = flowResidual->L2NormOfSolutionGlobal(sigma2->ID());
      L2_time_residual = sqrt(L2u*L2u + L2sigma1*L2sigma1 + L2sigma2*L2sigma2);
      cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;    	

      if (rank == 0)
      {
        stringstream outfile;
        if (transient)
          outfile << "TransientConfusion_" << refIndex << "_" << timestepCount;
        else
          outfile << "TransientConfusion_" << refIndex;
        solution->writeToVTK(outfile.str(), 5);
      }

      //////////////////////////////////////////////////////////////////////////
      // Check conservation by testing against one
      //////////////////////////////////////////////////////////////////////////
      VarPtr testOne = varFactory.testVar("1", CONSTANT_SCALAR);
      // Create a fake bilinear form for the testing
      BFPtr fakeBF = Teuchos::rcp( new BF(varFactory) );
      // Define our mass flux
      FunctionPtr flux_current_time = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_minus_sigma_n) );
      FunctionPtr delta_u = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
      LinearTermPtr surfaceFlux = -1.0 * flux_current_time * testOne;
      LinearTermPtr volumeChange = invDt * delta_u * testOne;
      LinearTermPtr massFluxTerm;
      if (transient)
      {
        massFluxTerm = volumeChange;
        // massFluxTerm->addTerm(surfaceFlux);
      }
      else
      {
        massFluxTerm = surfaceFlux;
      }
      // cout << "surface case = " << surfaceFlux->summands()[0].first->boundaryValueOnly() << " volume case = " << volumeChange->summands()[0].first->boundaryValueOnly() << endl;

      // FunctionPtr massFlux= Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_minus_sigma_n) );
      // LinearTermPtr massFluxTerm = massFlux * testOne;

      Teuchos::RCP<shards::CellTopology> quadTopoPtr = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData<shards::Quadrilateral<4> >() ));
      DofOrderingFactory dofOrderingFactory(fakeBF);
      int fakeTestOrder = H1Order;
      DofOrderingPtr testOrdering = dofOrderingFactory.testOrdering(fakeTestOrder, *quadTopoPtr);

      int testOneIndex = testOrdering->getDofIndex(testOne->ID(),0);
      vector< ElementTypePtr > elemTypes = mesh->elementTypes(); // global element types
      map<int, double> massFluxIntegral; // cellID -> integral
      double maxMassFluxIntegral = 0.0;
      double totalMassFlux = 0.0;
      double totalAbsMassFlux = 0.0;
      for (vector< ElementTypePtr >::iterator elemTypeIt = elemTypes.begin(); elemTypeIt != elemTypes.end(); elemTypeIt++) 
      {
        ElementTypePtr elemType = *elemTypeIt;
        vector< ElementPtr > elems = mesh->elementsOfTypeGlobal(elemType);
        vector<int> cellIDs;
        for (int i=0; i<elems.size(); i++) {
          cellIDs.push_back(elems[i]->cellID());
        }
        FieldContainer<double> physicalCellNodes = mesh->physicalCellNodesGlobal(elemType);
        BasisCachePtr basisCache = Teuchos::rcp( new BasisCache(elemType,mesh) );
        basisCache->setPhysicalCellNodes(physicalCellNodes,cellIDs,true); // true: create side caches
        FieldContainer<double> cellMeasures = basisCache->getCellMeasures();
        FieldContainer<double> fakeRHSIntegrals(elems.size(),testOrdering->totalDofs());
        massFluxTerm->integrate(fakeRHSIntegrals,testOrdering,basisCache,true); // true: force side evaluation
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          // pick out the ones for testOne:
          massFluxIntegral[cellID] = fakeRHSIntegrals(i,testOneIndex);
        }
        // find the largest:
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
        }
        for (int i=0; i<elems.size(); i++) {
          int cellID = cellIDs[i];
          maxMassFluxIntegral = max(abs(massFluxIntegral[cellID]), maxMassFluxIntegral);
          totalMassFlux += massFluxIntegral[cellID];
          totalAbsMassFlux += abs( massFluxIntegral[cellID] );
        }
      }

      // Print results from processor with rank 0
      if (rank == 0)
      {
        cout << "largest mass flux: " << maxMassFluxIntegral << endl;
        cout << "total mass flux: " << totalMassFlux << endl;
        cout << "sum of mass flux absolute value: " << totalAbsMassFlux << endl;
      }

      prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
      timestepCount++;
    }

    if (refIndex < numRefs){
      if (rank==0){
        cout << "Performing refinement number " << refIndex << endl;
      }     
      refinementStrategy->refine(rank==0);    
      // RESET solution every refinement - make sure discretization error doesn't creep in
      // prevTimeFlow->projectOntoMesh(functionMap);
    }
  }

  return 0;
}
int main(int argc, char *argv[])
{
  // Process command line arguments
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
#else
  int rank = 0;
  int numProcs = 1;
#endif

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  VarPtr v = varFactory.testVar("v", HGRAD);

  // define trial variables
  VarPtr beta_n_u_hat = varFactory.fluxVar("\\widehat{\\beta \\cdot n }");
  VarPtr u = varFactory.fieldVar("u");

  FunctionPtr beta = Teuchos::rcp(new Beta());

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );
  // define nodes for mesh
  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) = -1.0; // x1
  meshBoundary(0,1) = -1.0; // y1
  meshBoundary(1,0) =  1.0;
  meshBoundary(1,1) = -1.0;
  meshBoundary(2,0) =  1.0;
  meshBoundary(2,1) =  1.0;
  meshBoundary(3,0) = -1.0;
  meshBoundary(3,1) =  1.0;

  int horizontalCells = 32, verticalCells = 32;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
                            confusionBF, H1Order, H1Order+pToAdd);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));

  // v terms:
  confusionBF->addTerm( beta * u, - v->grad() );
  confusionBF->addTerm( beta_n_u_hat, v);

  confusionBF->addTerm( u, invDt*v );
  rhs->addTerm( u_prev_time * invDt * v );

  ////////////////////   SPECIFY RHS   ///////////////////////
  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  // robust test norm
  IPPtr ip = confusionBF->graphNorm();
  // IPPtr ip = Teuchos::rcp(new IP);
  // ip->addTerm(v);
  // ip->addTerm(invDt*v - beta*v->grad());

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  SpatialFilterPtr inflowBoundary = Teuchos::rcp( new InflowSquareBoundary(beta) );
  FunctionPtr u0 = Teuchos::rcp( new ConstantScalarFunction(0) );
  FunctionPtr n = Teuchos::rcp( new UnitNormalFunction );

  bc->addDirichlet(beta_n_u_hat, inflowBoundary, beta*n*u0);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow);
  mesh->registerSolution(flowResidual);

  // ==================== SET INITIAL GUESS ==========================
  FunctionPtr u_init = Teuchos::rcp(new InitialCondition());
  map<int, Teuchos::RCP<Function> > functionMap;
  functionMap[u->ID()]      = u_init;

  prevTimeFlow->projectOntoMesh(functionMap);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  // if (enforceLocalConservation) {
  //   // FunctionPtr parity = Teuchos::rcp<Function>( new SideParityFunction );
  //   // LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(parity, beta_n_u_minus_sigma_n) );
  //   LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(1.0, beta_n_u_minus_sigma_n) );
  //   LinearTermPtr sourcePart = Teuchos::rcp<LinearTerm>( new LinearTerm(invDt, u) );
  //   conservedQuantity->addTerm(sourcePart, true);
  //   solution->lagrangeConstraints()->addConstraint(conservedQuantity == u_prev_time * invDt);
  // }

  int timestepCount = 0;
  double time_tol = 1e-8;
  double L2_time_residual = 1e9;
  while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
  {
    solution->solve(false);
    // Subtract solutions to get residual
    flowResidual->setSolution(solution);
    flowResidual->addSolution(prevTimeFlow, -1.0);
    L2_time_residual = flowResidual->L2NormOfSolutionGlobal(u->ID());

    if (rank == 0)
    {
      cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;

      stringstream outfile;
      outfile << "rotatingCylinder_" << timestepCount;
      solution->writeToVTK(outfile.str(), 5);

      // Check local conservation
      FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
      FunctionPtr source = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
      source = invDt * source;
      Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, source, varFactory, mesh);
      cout << "Mass flux: Largest Local = " << fluxImbalances[0]
           << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
    }

    prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
    timestepCount++;
  }

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  choice::MpiArgs args( argc, argv );
#else
  choice::Args args( argc, argv );
#endif
  int commRank = Teuchos::GlobalMPISession::getRank();
  int numProcs = Teuchos::GlobalMPISession::getNProc();

  // Required arguments
  int numRefs = args.Input<int>("--numRefs", "number of refinement steps");
  bool enforceLocalConservation = args.Input<bool>("--conserve", "enforce local conservation");
  bool steady = args.Input<bool>("--steady", "run steady rather than transient");

  // Optional arguments (have defaults)
  double dt = args.Input("--dt", "time step", 0.25);
  int numTimeSteps = args.Input("--nt", "number of time steps", 20);
  halfWidth = args.Input("--halfWidth", "half width of inlet profile", 1.0);
  args.Process();

  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory;
  VarPtr v = varFactory.testVar("v", HGRAD);

  // define trial variables
  VarPtr beta_n_u_hat = varFactory.fluxVar("\\widehat{\\beta \\cdot n }");
  VarPtr u = varFactory.fieldVar("u");

  vector<double> beta;
  beta.push_back(1.0);
  beta.push_back(0.0);

  ////////////////////   BUILD MESH   ///////////////////////
  BFPtr bf = Teuchos::rcp( new BF(varFactory) );
  // define nodes for mesh
  FieldContainer<double> meshBoundary(4,2);

  meshBoundary(0,0) =  0.0; // x1
  meshBoundary(0,1) = -2.0; // y1
  meshBoundary(1,0) =  4.0;
  meshBoundary(1,1) = -2.0;
  meshBoundary(2,0) =  4.0;
  meshBoundary(2,1) =  2.0;
  meshBoundary(3,0) =  0.0;
  meshBoundary(3,1) =  2.0;

  int horizontalCells = 8, verticalCells = 8;

  // create a pointer to a new mesh:
  Teuchos::RCP<Mesh> mesh = Mesh::buildQuadMesh(meshBoundary, horizontalCells, verticalCells,
                            bf, H1Order, H1Order+pToAdd);

  ////////////////////////////////////////////////////////////////////
  // INITIALIZE FLOW FUNCTIONS
  ////////////////////////////////////////////////////////////////////

  BCPtr nullBC = Teuchos::rcp((BC*)NULL);
  RHSPtr nullRHS = Teuchos::rcp((RHS*)NULL);
  IPPtr nullIP = Teuchos::rcp((IP*)NULL);
  SolutionPtr prevTimeFlow = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );
  SolutionPtr flowResidual = Teuchos::rcp(new Solution(mesh, nullBC, nullRHS, nullIP) );

  FunctionPtr u_prev_time = Teuchos::rcp( new PreviousSolutionFunction(prevTimeFlow, u) );

  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
  Teuchos::RCP<RHSEasy> rhs = Teuchos::rcp( new RHSEasy );
  FunctionPtr invDt = Teuchos::rcp(new ScalarParamFunction(1.0/dt));

  // v terms:
  bf->addTerm( beta * u, - v->grad() );
  bf->addTerm( beta_n_u_hat, v);

  if (!steady)
  {
    bf->addTerm( u, invDt*v );
    rhs->addTerm( u_prev_time * invDt * v );
  }

  ////////////////////   SPECIFY RHS   ///////////////////////
  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  IPPtr ip = bf->graphNorm();
  // ip->addTerm(v);
  // ip->addTerm(beta*v->grad());

  ////////////////////   CREATE BCs   ///////////////////////
  Teuchos::RCP<BCEasy> bc = Teuchos::rcp( new BCEasy );
  SpatialFilterPtr lBoundary = Teuchos::rcp( new LeftBoundary );
  FunctionPtr u1 = Teuchos::rcp( new InletBC );
  bc->addDirichlet(beta_n_u_hat, lBoundary, -u1);

  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, ip) );

  // ==================== Register Solutions ==========================
  mesh->registerSolution(solution);
  mesh->registerSolution(prevTimeFlow);
  mesh->registerSolution(flowResidual);

  // ==================== SET INITIAL GUESS ==========================
  double u_free = 0.0;
  map<int, Teuchos::RCP<Function> > functionMap;
  // functionMap[u->ID()]      = Teuchos::rcp( new ConInletBC
  functionMap[u->ID()]      = Teuchos::rcp( new InletBC );

  prevTimeFlow->projectOntoMesh(functionMap);

  ////////////////////   SOLVE & REFINE   ///////////////////////
  if (enforceLocalConservation)
  {
    if (steady)
    {
      FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
      solution->lagrangeConstraints()->addConstraint(beta_n_u_hat == zero);
    }
    else
    {
      // FunctionPtr parity = Teuchos::rcp<Function>( new SideParityFunction );
      // LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(parity, beta_n_u_minus_sigma_n) );
      LinearTermPtr conservedQuantity = Teuchos::rcp<LinearTerm>( new LinearTerm(1.0, beta_n_u_hat) );
      LinearTermPtr sourcePart = Teuchos::rcp<LinearTerm>( new LinearTerm(invDt, u) );
      conservedQuantity->addTerm(sourcePart, true);
      solution->lagrangeConstraints()->addConstraint(conservedQuantity == u_prev_time * invDt);
    }
  }

  double energyThreshold = 0.2; // for mesh refinements
  RefinementStrategy refinementStrategy( solution, energyThreshold );
  VTKExporter exporter(solution, mesh, varFactory);

  for (int refIndex=0; refIndex<=numRefs; refIndex++)
  {
    if (steady)
    {
      solution->solve(false);

      if (commRank == 0)
      {
        stringstream outfile;
        outfile << "Convection_" << refIndex;
        exporter.exportSolution(outfile.str());

        // Check local conservation
        FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
        FunctionPtr zero = Teuchos::rcp( new ConstantScalarFunction(0.0) );
        Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, zero, varFactory, mesh);
        cout << "Mass flux: Largest Local = " << fluxImbalances[0]
             << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
      }
    }
    else
    {
      int timestepCount = 0;
      double time_tol = 1e-8;
      double L2_time_residual = 1e9;
      // cout << L2_time_residual <<" "<< time_tol << timestepCount << numTimeSteps << endl;
      while((L2_time_residual > time_tol) && (timestepCount < numTimeSteps))
      {
        solution->solve(false);
        // Subtract solutions to get residual
        flowResidual->setSolution(solution);
        flowResidual->addSolution(prevTimeFlow, -1.0);
        L2_time_residual = flowResidual->L2NormOfSolutionGlobal(u->ID());

        if (commRank == 0)
        {
          cout << endl << "Timestep: " << timestepCount << ", dt = " << dt << ", Time residual = " << L2_time_residual << endl;

          stringstream outfile;
          outfile << "TransientConvection_" << refIndex << "-" << timestepCount;
          exporter.exportSolution(outfile.str());

          // Check local conservation
          FunctionPtr flux = Teuchos::rcp( new PreviousSolutionFunction(solution, beta_n_u_hat) );
          FunctionPtr source = Teuchos::rcp( new PreviousSolutionFunction(flowResidual, u) );
          source = -invDt * source;
          Teuchos::Tuple<double, 3> fluxImbalances = checkConservation(flux, source, varFactory, mesh);
          cout << "Mass flux: Largest Local = " << fluxImbalances[0]
               << ", Global = " << fluxImbalances[1] << ", Sum Abs = " << fluxImbalances[2] << endl;
        }

        prevTimeFlow->setSolution(solution); // reset previous time solution to current time sol
        timestepCount++;
      }
    }

    if (refIndex < numRefs)
      refinementStrategy.refine(commRank==0); // print to console on commRank 0
  }

  return 0;
}