int main(int argc, char *argv[])
{
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, 0);
  
  int spaceDim = 2;
  int meshWidth = 2;
  bool conformingTraces = true;
  int H1Order = 2, delta_k = 3;
  double domainWidth = 1.0e-3;
  bool diagScaling = false;
  double h = domainWidth / meshWidth;
  double weight = h / 4.0; // ratio of area of square with sidelength h to its perimeter
  
  double sigma_weight = 1.0; // h / 4.0; // sigma = sigma_weight * u->grad()

  Space uHatSpace = conformingTraces ? HGRAD : L2;
  
  VarFactoryPtr vf = VarFactory::varFactory();
  
  // fields
  VarPtr u = vf->fieldVar("u");
  VarPtr sigma = vf->fieldVar("sigma", VECTOR_L2);
  
  // traces
  VarPtr u_hat = vf->traceVar("u_hat", uHatSpace);
  VarPtr sigma_n = vf->fluxVar("sigma_n");
  
  // tests
  VarPtr v = vf->testVar("v", HGRAD);
  VarPtr tau = vf->testVar("tau", HDIV);
  
  BFPtr bf = BF::bf(vf);
  
// standard BF:
//  bf->addTerm(sigma, v->grad());
//  bf->addTerm(sigma_n, v);
//  
//  bf->addTerm(sigma, tau);
//  bf->addTerm(u, tau->div());
//  bf->addTerm(-u_hat, tau->dot_normal());
  
  // weighted BF:
  bf->addTerm(sigma, v->grad());
  bf->addTerm(weight * sigma_n, v);
  
  bf->addTerm(sigma, tau);
  bf->addTerm(sigma_weight * u, tau->div());
  bf->addTerm(- sigma_weight * weight * u_hat, tau->dot_normal());
  
  IPPtr ip = IP::ip();
// standard IP:
  ip->addTerm(tau + v->grad());
  ip->addTerm(tau->div());
  ip->addTerm(v);
  ip->addTerm(tau);
  
  // weighted IP:
//  ip->addTerm(tau + v->grad());
//  ip->addTerm(sigma_weight * tau->div());
//  ip->addTerm(max(sigma_weight,1e-3) * v);
//  ip->addTerm(sigma_weight * weight * tau);
  
  BCPtr bc = BC::bc();
  bc->addDirichlet(u_hat, SpatialFilter::allSpace(), Function::zero());
  
  RHSPtr rhs = RHS::rhs();
  rhs->addTerm(1.0 * sigma_weight * v);
  
  vector<double> dimensions(spaceDim,domainWidth);
  vector<int> elementCounts(spaceDim,meshWidth);
  
  MeshPtr mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k);
  
  SolutionPtr soln = Solution::solution(mesh, bc, rhs, ip);
  
  soln->setUseCondensedSolve(true);
  soln->initializeLHSVector();
  soln->initializeStiffnessAndLoad();
  soln->populateStiffnessAndLoad();
  
  Teuchos::RCP<Epetra_RowMatrix> stiffness = soln->getStiffnessMatrix();
  
  double condNumber = conditionNumberLAPACK(*stiffness, diagScaling);
  
  cout << "condest (1-norm): " << condNumber << endl;
  
  return 0;
}
int main(int argc, char *argv[])
{
#ifdef ENABLE_INTEL_FLOATING_POINT_EXCEPTIONS
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";
  _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID);
#endif

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

#ifdef HAVE_MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  //cout << "rank: " << rank << " of " << numProcs << endl;
#else
  Epetra_SerialComm Comm;
#endif

  Comm.Barrier(); // set breakpoint here to allow debugger attachment to other MPI processes than the one you automatically attached to.

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  double minTol = 1e-8;

  bool use3D = false;
  int refCount = 10;

  int k = 4; // poly order for field variables
  int delta_k = use3D ? 3 : 2;   // test space enrichment
  int k_coarse = 0;

  bool useMumps = true;
  bool useGMGSolver = true;

  bool enforceOneIrregularity = true;
  bool useStaticCondensation = false;
  bool conformingTraces = false;
  bool useDiagonalScaling = false; // of the global stiffness matrix in GMGSolver

  bool printRefinementDetails = false;

  bool useWeightedGraphNorm = true; // graph norm scaled according to units, more or less

  int numCells = 2;

  int AztecOutputLevel = 1;
  int gmgMaxIterations = 10000;
  int smootherOverlap = 0;
  double relativeTol = 1e-6;
  double D = 1.0; // characteristic length scale

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");
  cmdp.setOption("k_coarse", &k_coarse, "polynomial order for field variables on coarse mesh");
  cmdp.setOption("numRefs",&refCount,"number of refinements");
  cmdp.setOption("D", &D, "domain dimension");
  cmdp.setOption("useConformingTraces", "useNonConformingTraces", &conformingTraces);
  cmdp.setOption("enforceOneIrregularity", "dontEnforceOneIrregularity", &enforceOneIrregularity);

  cmdp.setOption("smootherOverlap", &smootherOverlap, "overlap for smoother");

  cmdp.setOption("printRefinementDetails", "dontPrintRefinementDetails", &printRefinementDetails);
  cmdp.setOption("azOutput", &AztecOutputLevel, "Aztec output level");
  cmdp.setOption("numCells", &numCells, "number of cells in the initial mesh");
  cmdp.setOption("useScaledGraphNorm", "dontUseScaledGraphNorm", &useWeightedGraphNorm);
//  cmdp.setOption("gmgTol", &gmgTolerance, "tolerance for GMG convergence");
  cmdp.setOption("relativeTol", &relativeTol, "Energy error-relative tolerance for iterative solver.");
  cmdp.setOption("gmgMaxIterations", &gmgMaxIterations, "tolerance for GMG convergence");

  bool enhanceUField = false;
  cmdp.setOption("enhanceUField", "dontEnhanceUField", &enhanceUField);
  cmdp.setOption("useStaticCondensation", "dontUseStaticCondensation", &useStaticCondensation);

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
  {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  double width = D, height = D, depth = D;

  VarFactory varFactory;
  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);
  VarPtr sigma = varFactory.fieldVar("\\sigma", VECTOR_L2);

  FunctionPtr n = Function::normal();
  // traces:
  VarPtr u_hat;

  if (conformingTraces)
  {
    u_hat = varFactory.traceVar("\\widehat{u}", u);
  }
  else
  {
    cout << "Note: using non-conforming traces.\n";
    u_hat = varFactory.traceVar("\\widehat{u}", u, L2);
  }
  VarPtr sigma_n_hat = varFactory.fluxVar("\\widehat{\\sigma}_{n}", sigma * n);

  // test functions:
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);

  BFPtr poissonBF = Teuchos::rcp( new BF(varFactory) );
  FunctionPtr alpha = Function::constant(1); // viscosity

  // tau terms:
  poissonBF->addTerm(sigma / alpha, tau);
  poissonBF->addTerm(-u, tau->div()); // (sigma1, tau1)
  poissonBF->addTerm(u_hat, tau * n);

  // v terms:
  poissonBF->addTerm(- sigma, v->grad()); // (mu sigma1, grad v1)
  poissonBF->addTerm( sigma_n_hat, v);

  int horizontalCells = numCells, verticalCells = numCells, depthCells = numCells;

  vector<double> domainDimensions;
  domainDimensions.push_back(width);
  domainDimensions.push_back(height);

  vector<int> elementCounts;
  elementCounts.push_back(horizontalCells);
  elementCounts.push_back(verticalCells);

  if (use3D)
  {
    domainDimensions.push_back(depth);
    elementCounts.push_back(depthCells);
  }

  MeshPtr mesh, k0Mesh;

  int H1Order = k + 1;
  int H1Order_coarse = k_coarse + 1;
  if (!use3D)
  {
    Teuchos::ParameterList pl;

    map<int,int> trialOrderEnhancements;

    if (enhanceUField)
    {
      trialOrderEnhancements[u->ID()] = 1;
    }

    BFPtr poissonBilinearForm = poissonBF;

    pl.set("useMinRule", true);
    pl.set("bf",poissonBilinearForm);
    pl.set("H1Order", H1Order);
    pl.set("delta_k", delta_k);
    pl.set("horizontalElements", horizontalCells);
    pl.set("verticalElements", verticalCells);
    pl.set("divideIntoTriangles", false);
    pl.set("useConformingTraces", conformingTraces);
    pl.set("trialOrderEnhancements", &trialOrderEnhancements);
    pl.set("x0",(double)0);
    pl.set("y0",(double)0);
    pl.set("width", width);
    pl.set("height",height);

    mesh = MeshFactory::quadMesh(pl);

    pl.set("H1Order", H1Order_coarse);
    k0Mesh = MeshFactory::quadMesh(pl);

  }
  else
  {
    mesh = MeshFactory::rectilinearMesh(poissonBF, domainDimensions, elementCounts, H1Order, delta_k);
    k0Mesh = MeshFactory::rectilinearMesh(poissonBF, domainDimensions, elementCounts, H1Order_coarse, delta_k);
  }

  mesh->registerObserver(k0Mesh); // ensure that the k0 mesh refinements track those of the solution mesh

  RHSPtr rhs = RHS::rhs(); // zero
  FunctionPtr sin_pi_x = Teuchos::rcp( new Sin_ax(PI/D) );
  FunctionPtr sin_pi_y = Teuchos::rcp( new Sin_ay(PI/D) );
  FunctionPtr u_exact = sin_pi_x * sin_pi_y;
  FunctionPtr f = -(2.0 * PI * PI / (D * D)) * sin_pi_x * sin_pi_y;
  rhs->addTerm( f * v );

  BCPtr bc = BC::bc();
  SpatialFilterPtr boundary = SpatialFilter::allSpace();

  bc->addDirichlet(u_hat, boundary, u_exact);

  IPPtr graphNorm;

  FunctionPtr h = Teuchos::rcp( new hFunction() );

  if (useWeightedGraphNorm)
  {
    graphNorm = IP::ip();
    graphNorm->addTerm( tau->div() ); // u
    graphNorm->addTerm( (h / alpha) * tau - h * v->grad() ); // sigma
    graphNorm->addTerm( v ); // boundary term (adjoint to u)
    graphNorm->addTerm( h * tau );

//    // new effort, with the idea that the test norm should be considered in reference space, basically
//    graphNorm = IP::ip();
//    graphNorm->addTerm( tau->div() ); // u
//    graphNorm->addTerm( tau / h - v->grad() ); // sigma
//    graphNorm->addTerm( v / h ); // boundary term (adjoint to u)
//    graphNorm->addTerm( tau / h );
  }
  else
  {
    map<int, double> trialWeights; // on the squared terms in the trial space norm
    trialWeights[u->ID()] = 1.0 / (D * D);
    trialWeights[sigma->ID()] = 1.0;
    graphNorm = poissonBF->graphNorm(trialWeights, 1.0); // 1.0: weight on the L^2 terms
  }

  SolutionPtr solution = Solution::solution(mesh, bc, rhs, graphNorm);
  solution->setUseCondensedSolve(useStaticCondensation);

  mesh->registerSolution(solution); // sign up for projection of old solution onto refined cells.

  double energyThreshold = 0.2;
  RefinementStrategy refinementStrategy( solution, energyThreshold );

  refinementStrategy.setReportPerCellErrors(true);
  refinementStrategy.setEnforceOneIrregularity(enforceOneIrregularity);

  Teuchos::RCP<Solver> coarseSolver, fineSolver;
  if (useMumps)
  {
#ifdef HAVE_AMESOS_MUMPS
    coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );
#else
    cout << "useMumps=true, but MUMPS is not available!\n";
    exit(0);
#endif
  }
  else
  {
    coarseSolver = Teuchos::rcp( new KluSolver );
  }
  GMGSolver* gmgSolver;

  if (useGMGSolver)
  {
    double tol = relativeTol;
    int maxIters = gmgMaxIterations;
    BCPtr zeroBCs = bc->copyImposingZero();
    gmgSolver = new GMGSolver(zeroBCs, k0Mesh, graphNorm, mesh, solution->getDofInterpreter(),
                              solution->getPartitionMap(), maxIters, tol, coarseSolver,
                              useStaticCondensation);

    gmgSolver->setAztecOutput(AztecOutputLevel);
    gmgSolver->setUseConjugateGradient(true);
    gmgSolver->gmgOperator()->setSmootherType(GMGOperator::IFPACK_ADDITIVE_SCHWARZ);
    gmgSolver->gmgOperator()->setSmootherOverlap(smootherOverlap);

    fineSolver = Teuchos::rcp( gmgSolver );
  }
  else
  {
    fineSolver = coarseSolver;
  }

//  if (rank==0) cout << "experimentally starting by solving with MUMPS on the fine mesh.\n";
//  solution->solve( Teuchos::rcp( new MumpsSolver) );

  solution->solve(fineSolver);

#ifdef HAVE_EPETRAEXT_HDF5
  ostringstream dir_name;
  dir_name << "poissonCavityFlow_k" << k;
  HDF5Exporter exporter(mesh,dir_name.str());
  exporter.exportSolution(solution,varFactory,0);
#endif

#ifdef HAVE_AMESOS_MUMPS
  if (useMumps) coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );
#endif

  solution->reportTimings();
  if (useGMGSolver) gmgSolver->gmgOperator()->reportTimings();
  for (int refIndex=0; refIndex < refCount; refIndex++)
  {
    double energyError = solution->energyErrorTotal();
    GlobalIndexType numFluxDofs = mesh->numFluxDofs();
    if (rank==0)
    {
      cout << "Before refinement " << refIndex << ", energy error = " << energyError;
      cout << " (using " << numFluxDofs << " trace degrees of freedom)." << endl;
    }
    bool printToConsole = printRefinementDetails && (rank==0);
    refinementStrategy.refine(printToConsole);

    if (useStaticCondensation)
    {
      CondensedDofInterpreter* condensedDofInterpreter = dynamic_cast<CondensedDofInterpreter*>(solution->getDofInterpreter().get());
      if (condensedDofInterpreter != NULL)
      {
        condensedDofInterpreter->reinitialize();
      }
    }

    GlobalIndexType fineDofs = mesh->globalDofCount();
    GlobalIndexType coarseDofs = k0Mesh->globalDofCount();
    if (rank==0)
    {
      cout << "After refinement, coarse mesh has " << k0Mesh->numActiveElements() << " elements and " << coarseDofs << " dofs.\n";
      cout << "  Fine mesh has " << mesh->numActiveElements() << " elements and " << fineDofs << " dofs.\n";
    }

    if (!use3D)
    {
      ostringstream fineMeshLocation, coarseMeshLocation;
      fineMeshLocation << "poissonFineMesh_k" << k << "_ref" << refIndex;
      GnuPlotUtil::writeComputationalMeshSkeleton(fineMeshLocation.str(), mesh, true); // true: label cells
      coarseMeshLocation << "poissonCoarseMesh_k" << k << "_ref" << refIndex;
      GnuPlotUtil::writeComputationalMeshSkeleton(coarseMeshLocation.str(), k0Mesh, true); // true: label cells
    }

    if (useGMGSolver)   // create fresh fineSolver now that the meshes have changed:
    {
#ifdef HAVE_AMESOS_MUMPS
      if (useMumps) coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );
#endif
      double tol = max(relativeTol * energyError, minTol);
      int maxIters = gmgMaxIterations;
      BCPtr zeroBCs = bc->copyImposingZero();
      gmgSolver = new GMGSolver(zeroBCs, k0Mesh, graphNorm, mesh, solution->getDofInterpreter(),
                                solution->getPartitionMap(), maxIters, tol, coarseSolver, useStaticCondensation);
      gmgSolver->setAztecOutput(AztecOutputLevel);
      gmgSolver->setUseDiagonalScaling(useDiagonalScaling);
      fineSolver = Teuchos::rcp( gmgSolver );
    }

    solution->solve(fineSolver);
    solution->reportTimings();
    if (useGMGSolver) gmgSolver->gmgOperator()->reportTimings();

#ifdef HAVE_EPETRAEXT_HDF5
    exporter.exportSolution(solution,varFactory,refIndex+1);
#endif
  }
  double energyErrorTotal = solution->energyErrorTotal();

  GlobalIndexType numFluxDofs = mesh->numFluxDofs();
  GlobalIndexType numGlobalDofs = mesh->numGlobalDofs();
  if (rank==0)
  {
    cout << "Final mesh has " << mesh->numActiveElements() << " elements and " << numFluxDofs << " trace dofs (";
    cout << numGlobalDofs << " total dofs, including fields).\n";
    cout << "Final energy error: " << energyErrorTotal << endl;
  }

#ifdef HAVE_EPETRAEXT_HDF5
  exporter.exportSolution(solution,varFactory,0);
#endif

  if (!use3D)
  {
    GnuPlotUtil::writeComputationalMeshSkeleton("poissonRefinedMesh", mesh, true);
  }

  coarseSolver = Teuchos::rcp((Solver*) NULL); // without this when useMumps = true and running on one rank, we see a crash on exit, which may have to do with MPI being finalized before coarseSolver is deleted.

  return 0;
}
int main(int argc, char *argv[])
{
#ifdef ENABLE_INTEL_FLOATING_POINT_EXCEPTIONS
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";
  _MM_SET_EXCEPTION_MASK(_MM_GET_EXCEPTION_MASK() & ~_MM_MASK_INVALID);
#endif

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  const static double PI  = 3.141592653589793238462;

  bool useCondensedSolve = true; // condensed solve not yet compatible with minimum rule meshes

  int k = 2; // poly order for u in every direction, including temporal
  int numCells = 32; // in x, y
  int numTimeCells = 1;
  int numTimeSlabs = -1;
  int numFrames = 201;
  int delta_k = 3;   // test space enrichment: should be 3 for 3D
  int maxRefinements = 0; // maximum # of refinements on each time slab
  bool useMumpsIfAvailable  = true;
  bool useConstantConvection = false;
  double refinementTolerance = 0.1;

  int checkPointFrequency = 50; // output solution and mesh every 50 time slabs

  int previousSolutionTimeSlabNumber = -1;
  string previousSolutionFile = "";
  string previousMeshFile = "";

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");

  cmdp.setOption("numCells",&numCells,"number of cells in x and y directions");
  cmdp.setOption("numTimeCells",&numTimeCells,"number of time axis cells");
  cmdp.setOption("numTimeSlabs",&numTimeSlabs,"number of time slabs");
  cmdp.setOption("numFrames",&numFrames,"number of frames for export");

  cmdp.setOption("useConstantConvection", "useVariableConvection", &useConstantConvection);

  cmdp.setOption("useCondensedSolve", "useUncondensedSolve", &useCondensedSolve, "use static condensation to reduce the size of the global solve");
  cmdp.setOption("useMumps", "useKLU", &useMumpsIfAvailable, "use MUMPS (if available)");

  cmdp.setOption("refinementTolerance", &refinementTolerance, "relative error beyond which to stop refining");
  cmdp.setOption("maxRefinements", &maxRefinements, "maximum # of refinements on each time slab");

  cmdp.setOption("previousSlabNumber", &previousSolutionTimeSlabNumber, "time slab number of previous solution");
  cmdp.setOption("previousSolution", &previousSolutionFile, "file with previous solution");
  cmdp.setOption("previousMesh", &previousMeshFile, "file with previous mesh");

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
  {
#ifdef HAVE_MPI
    MPI_Finalize();
#endif
    return -1;
  }

  int H1Order = k + 1;

  VarFactory varFactory;
  // traces:
  VarPtr qHat = varFactory.fluxVar("\\widehat{q}");

  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);

  // test functions:
  VarPtr v = varFactory.testVar("v", HGRAD);

  FunctionPtr x = Function::xn(1);
  FunctionPtr y = Function::yn(1);

  FunctionPtr c;
  if (useConstantConvection)
  {
    c = Function::vectorize(Function::constant(0.5), Function::constant(0.5), Function::constant(1.0));
  }
  else
  {
    c = Function::vectorize(y-0.5, 0.5-x, Function::constant(1.0));
  }
  FunctionPtr n = Function::normal();

  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  bf->addTerm( u, c * v->grad());
  bf->addTerm(qHat, v);

  double width = 2.0, height = 2.0;
  int horizontalCells = numCells, verticalCells = numCells;
  int depthCells = numTimeCells;
  double x0 = -0.5;
  double y0 = -0.5;
  double t0 = 0;

  double totalTime = 2.0 * PI;

  vector<double> frameTimes;
  for (int i=0; i<numFrames; i++)
  {
    frameTimes.push_back((totalTime*i) / (numFrames-1));
  }

  if (numTimeSlabs==-1)
  {
    // want the number of grid points in temporal direction to be about 2000.  The temporal length is 2 * PI
    numTimeSlabs = (int) 2000 / k;
  }
  double timeLengthPerSlab = totalTime / numTimeSlabs;

  if (rank==0)
  {
    cout << "solving on " << numCells << " x " << numCells << " x " << numTimeCells << " mesh " << "of order " << k << ".\n";
    cout << "numTimeSlabs: " << numTimeSlabs << endl;
  }

  SpatialFilterPtr inflowFilter  = Teuchos::rcp( new InflowFilterForClockwisePlanarRotation (x0,x0+width,y0,y0+height,0.5,0.5));

  vector<double> dimensions;
  dimensions.push_back(width);
  dimensions.push_back(height);
  dimensions.push_back(timeLengthPerSlab);

  vector<int> elementCounts(3);
  elementCounts[0] = horizontalCells;
  elementCounts[1] = verticalCells;
  elementCounts[2] = depthCells;

  vector<double> origin(3);
  origin[0] = x0;
  origin[1] = y0;
  origin[2] = t0;

  Teuchos::RCP<Solver> solver = Teuchos::rcp( new KluSolver );

#ifdef HAVE_AMESOS_MUMPS
  if (useMumpsIfAvailable) solver = Teuchos::rcp( new MumpsSolver );
#endif

//  double errorPercentage = 0.5; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
//  Teuchos::RCP<RefinementStrategy> refinementStrategy;
//  refinementStrategy = Teuchos::rcp( new ErrorPercentageRefinementStrategy( soln, errorPercentage ));

  if (maxRefinements != 0)
  {
    cout << "Warning: maxRefinements is not 0, but the slice exporter implicitly assumes there won't be any refinements.\n";
  }

  MeshPtr mesh;

  MeshPtr prevMesh;
  SolutionPtr prevSoln;

  mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

  if (rank==0) cout << "Initial mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";

  FunctionPtr sideParity = Function::sideParity();

  int lastFrameOutputted = -1;

  SolutionPtr soln;

  IPPtr ip;
  ip = bf->graphNorm();

  FunctionPtr u0 = Teuchos::rcp( new Cone_U0(0.0, 0.25, 0.1, 1.0, false) );

  BCPtr bc = BC::bc();
  bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
  bc->addDirichlet(qHat, SpatialFilter::matchingZ(t0), u0);

  MeshPtr initialMesh = mesh;

  int startingSlabNumber;
  if (previousSolutionTimeSlabNumber != -1)
  {
    startingSlabNumber = previousSolutionTimeSlabNumber + 1;

    if (rank==0) cout << "Loading mesh from " << previousMeshFile << endl;

    prevMesh = MeshFactory::loadFromHDF5(bf, previousMeshFile);
    prevSoln = Solution::solution(mesh, bc, RHS::rhs(), ip); // include BC and IP objects for sake of condensed dof interpreter setup...
    prevSoln->setUseCondensedSolve(useCondensedSolve);

    if (rank==0) cout << "Loading solution from " << previousSolutionFile << endl;
    prevSoln->loadFromHDF5(previousSolutionFile);

    double tn = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_prev = Function::solution(qHat, prevSoln);
    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, prevMesh, mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    double t_slab_final = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    int frameOrdinal = 0;

    while (frameTimes[frameOrdinal] < t_slab_final)
    {
      lastFrameOutputted = frameOrdinal++;
    }
  }
  else
  {
    startingSlabNumber = 0;
  }


#ifdef HAVE_EPETRAEXT_HDF5
  ostringstream dir_name;
  dir_name << "spacetime_slice_convectingCone_k" << k << "_startSlab" << startingSlabNumber;
  map<GlobalIndexType,GlobalIndexType> cellMap;
  MeshPtr meshSlice = MeshTools::timeSliceMesh(initialMesh, 0, cellMap, H1Order);
  HDF5Exporter sliceExporter(meshSlice,dir_name.str());
#endif

  soln = Solution::solution(mesh, bc, RHS::rhs(), ip);
  soln->setUseCondensedSolve(useCondensedSolve);

  for(int timeSlab = startingSlabNumber; timeSlab<numTimeSlabs; timeSlab++)
  {
    double energyThreshold = 0.2; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
    Teuchos::RCP<RefinementStrategy> refinementStrategy;
    refinementStrategy = Teuchos::rcp( new RefinementStrategy( soln, energyThreshold ));

    FunctionPtr u_spacetime = Function::solution(u, soln);

    double relativeEnergyError;
    int refNumber = 0;

//    {
//      // DEBUGGING: just to try running the time slicing:
//      double t_slab_final = (timeStep+1) * timeLengthPerSlab;
//      int frameOrdinal = lastFrameOutputted + 1;
//      while (frameTimes[frameOrdinal] < t_slab_final) {
//        FunctionPtr u_spacetime = Function::solution(u, soln);
//        ostringstream dir_name;
//        dir_name << "spacetime_slice_convectingCone_k" << k;
//        MeshTools::timeSliceExport(dir_name.str(), mesh, u_spacetime, frameTimes[frameOrdinal], "u_slice");
//
//        cout << "Exported frame " << frameOrdinal << ", t=" << frameTimes[frameOrdinal] << endl;
//        frameOrdinal++;
//      }
//    }

    do
    {
      soln->solve(solver);
      soln->reportTimings();

#ifdef HAVE_EPETRAEXT_HDF5
      ostringstream dir_name;
      dir_name << "spacetime_convectingCone_k" << k << "_t" << timeSlab;
      HDF5Exporter exporter(soln->mesh(),dir_name.str());
      exporter.exportSolution(soln, varFactory);

      if (rank==0) cout << "Exported HDF solution for time slab to directory " << dir_name.str() << endl;
//      string u_name = "u_spacetime";
//      exporter.exportFunction(u_spacetime, u_name);

      ostringstream file_name;
      file_name << dir_name.str();

      bool saveSolutionAndMeshForThisSlab = ((timeSlab + 1) % checkPointFrequency == 0); // +1 so that first output is nth, not first
      if (saveSolutionAndMeshForThisSlab)
      {
        dir_name << ".soln";
        soln->saveToHDF5(dir_name.str());
        if (rank==0) cout << endl << "wrote " << dir_name.str() << endl;

        file_name << ".mesh";
        soln->mesh()->saveToHDF5(file_name.str());
      }
#endif
      FunctionPtr u_soln = Function::solution(u, soln);

      double solnNorm = u_soln->l2norm(mesh);

      double energyError = soln->energyErrorTotal();
      relativeEnergyError = energyError / solnNorm;

      if (rank==0)
      {
        cout << "Relative energy error for refinement " << refNumber++ << ": " << relativeEnergyError << endl;
      }

      if ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements))
      {
        refinementStrategy->refine();
        if (rank==0)
        {
          cout << "After refinement, mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";
        }
      }

    }
    while ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements));

    double t_slab_final = (timeSlab+1) * timeLengthPerSlab;
    int frameOrdinal = lastFrameOutputted + 1;
    vector<double> timesForSlab;
    while (frameTimes[frameOrdinal] < t_slab_final)
    {
      double t = frameTimes[frameOrdinal];
      if (rank==0) cout << "exporting t=" << t << " on slab " << timeSlab << endl;
      FunctionPtr sliceFunction = MeshTools::timeSliceFunction(mesh, cellMap, u_spacetime, t);
      sliceExporter.exportFunction(sliceFunction, "u_slice", t);
      lastFrameOutputted = frameOrdinal++;
    }

    // set up next mesh/solution:
    FunctionPtr q_prev = Function::solution(qHat, soln);

//    cout << "Error in setup of q_prev: simple solution doesn't know about the map from the previous time slab to the current one. (TODO: fix this.)\n";

    double tn = (timeSlab+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, soln->mesh(), mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    // IMPORTANT: now that we are ready to step to next soln, nullify BC.  If we do not do this, then we have an RCP chain
    //            that extends back to the first time slab, effectively a memory leak.
    soln->setBC(BC::bc());

    soln = Solution::solution(mesh, bc, RHS::rhs(), ip);
    soln->setUseCondensedSolve(useCondensedSolve);
  }

  return 0;
}