bool CVProjectUtil::RunStudiomdl(
	const std::string &file )
	const std::string game( Prefix() + "/game/" + Game() );
	const std::string src( ModelSrc( file ) );

	const std::string cmd( std::string( "studiomdl -game \"" ) + game + std::string( "\" \"" ) + src + "\"" );

	FILE *cmdp( _popen( cmd.c_str(), "rt" ) );
	if ( !cmdp )
		merr << "Couldn't execute studiomdl command: " << cmd << std::endl;

		return false;

	char buf[ BUFSIZ ];

	while ( !feof( cmdp ) )
		if ( fgets( buf, BUFSIZ, cmdp ) == NULL )

		minfo << "studiomdl: " << buf;
	minfo << std::endl;

	_pclose( cmdp );

	return true;
void process_command_line(int argc, char*argv[], std::string& xml_file)
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("xml_file", &xml_file, "XML Parameters file");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    throw std::runtime_error("Error parsing command-line.");
int main(int argc, char *argv[]) {
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  // Get example parameters from command-line processor
  int numThreads = -1;
  std::string filename("bcsstk14.hb");
  int verbose = 1;
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("num-threads",&numThreads,"Number of threads.");
  cmdp.setOption("verbose",&verbose,"Verbose (zero for silent).");
  cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;

  // Say hello, print some communicator info
  Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::createMpiComm<int>(Teuchos::opaqueWrapper<MPI_Comm>(MPI_COMM_WORLD));
  if (comm->getRank() == 0) {
    std::cout << "\n" << Tpetra::version() << std::endl << std::endl;
    std::cout << argv[0] << filename << std::endl;
    std::cout << "Comm info: " << *comm;

  typedef KokkosClassic::TBBNode Node;
  Teuchos::ParameterList params;
  params.set<int>("Num Threads",numThreads);
  Teuchos::RCP<Node> node = Teuchos::rcp(new Node(params));

  if (comm->getRank() == 0) {
    typedef KokkosClassic::DefaultKernels<double,int,Node>::SparseOps DSM;
    KokkosClassic::CrsMatrix<double,int,Node,DSM> *mat = NULL;
    std::cout << "Using Kokkos first-touch matrix objects." << std::endl;
    std::cout << "Not using Kokkos first-touch matrix objects." << std::endl;

  // Read Tpetra::CrsMatrix from file
  Teuchos::RCP< Tpetra::CrsMatrix<double,int,int,Node> > A;
  if (comm->getRank() == 0 && verbose) {
    std::cout << std::endl << A->description() << std::endl << std::endl;


  return 0;
int main(int argc, char *argv[]) {
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  // Get example parameters from command-line processor
  int numThreads = -1;
  std::string filename("bcsstk14.hb");
  int verbose = 1;
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("num-threads",&numThreads,"Number of threads.");
  cmdp.setOption("verbose",&verbose,"Verbose (zero for silent).");
  cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;

  // Say hello, print some communicator info
  Teuchos::RCP<const Teuchos::Comm<int> > comm =
    Teuchos::rcp (new Teuchos::MpiComm<int> (MPI_COMM_WORLD));
  if (comm->getRank () == 0) {
    std::cout << "\n" << Tpetra::version() << std::endl << std::endl;
    std::cout << argv[0] << filename << std::endl;
    std::cout << "Comm info: " << *comm;

  typedef KokkosClassic::DoNotUse::TPINode Node;
  Teuchos::ParameterList params;
  params.set<int>("Num Threads",numThreads);
  Teuchos::RCP<Node> node = Teuchos::rcp(new Node(params));

  // Read Tpetra::CrsMatrix from file
  Teuchos::RCP< Tpetra::CrsMatrix<double,int,int,Node> > A;
  if (comm->getRank() == 0 && verbose) {
    std::cout << std::endl << A->description() << std::endl << std::endl;


  return 0;
process_command_line (bool& printedHelp,
                      std::string& xml_file,
                      int argc,
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("xml_file", &xml_file, "XML Parameters file");
  const auto result = cmdp.parse (argc, argv);

  // mfh 21 Apr 2016: By ignoring options that this executable doesn't
  // recognize, we can pass them through to (e.g.,) Kokkos.

  if (result == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) {
    printedHelp = true; // not an error to ask for help
  else if (result == Teuchos::CommandLineProcessor::PARSE_ERROR) {
    throw std::runtime_error ("Error parsing command-line.");
int main(int argc, char *argv[]) {
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  // Get example parameters from command-line processor
  int M = 10000;
  int N = 1000;
  int verbose = 1;
  int device = 0;
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("M",&M,"Global matrix num rows.");
  cmdp.setOption("N",&N,"Global matrix num cols.");
  cmdp.setOption("verbose",&verbose,"Verbose (zero for silent).");
  cmdp.setOption("device",&device,"CUDA device number.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;

  // Say hello, print some communicator info
  Teuchos::RCP<const Teuchos::Comm<int> > comm = Teuchos::createMpiComm<int>(Teuchos::opaqueWrapper<MPI_Comm>(MPI_COMM_WORLD));
  if (comm->getRank() == 0) {
    std::cout << "\n" << Tpetra::version() << std::endl << std::endl;
    std::cout << argv[0] << ", M == " << M << ", N == " << N << std::endl;
    std::cout << "Comm info: " << *comm;

  typedef Kokkos::ThrustGPUNode Node;
  Teuchos::ParameterList params;
  params.set<int>("Device Number",device);
  Teuchos::RCP<Node> node = Teuchos::rcp(new Node(params));


  return 0;
int main(int argc, char *argv[])
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

#ifdef HAVE_MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  //cout << "rank: " << rank << " of " << numProcs << endl;
  Epetra_SerialComm Comm;

  Comm.Barrier(); // set breakpoint here to allow debugger attachment to other MPI processes than the one you automatically attached to.

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  double minTol = 1e-8;

  bool use3D = false;
  int refCount = 10;

  int k = 4; // poly order for field variables
  int delta_k = use3D ? 3 : 2;   // test space enrichment
  int k_coarse = 0;

  bool useMumps = true;
  bool useGMGSolver = true;

  bool enforceOneIrregularity = true;
  bool useStaticCondensation = false;
  bool conformingTraces = false;
  bool useDiagonalScaling = false; // of the global stiffness matrix in GMGSolver

  bool printRefinementDetails = false;

  bool useWeightedGraphNorm = true; // graph norm scaled according to units, more or less

  int numCells = 2;

  int AztecOutputLevel = 1;
  int gmgMaxIterations = 10000;
  int smootherOverlap = 0;
  double relativeTol = 1e-6;
  double D = 1.0; // characteristic length scale

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");
  cmdp.setOption("k_coarse", &k_coarse, "polynomial order for field variables on coarse mesh");
  cmdp.setOption("numRefs",&refCount,"number of refinements");
  cmdp.setOption("D", &D, "domain dimension");
  cmdp.setOption("useConformingTraces", "useNonConformingTraces", &conformingTraces);
  cmdp.setOption("enforceOneIrregularity", "dontEnforceOneIrregularity", &enforceOneIrregularity);

  cmdp.setOption("smootherOverlap", &smootherOverlap, "overlap for smoother");

  cmdp.setOption("printRefinementDetails", "dontPrintRefinementDetails", &printRefinementDetails);
  cmdp.setOption("azOutput", &AztecOutputLevel, "Aztec output level");
  cmdp.setOption("numCells", &numCells, "number of cells in the initial mesh");
  cmdp.setOption("useScaledGraphNorm", "dontUseScaledGraphNorm", &useWeightedGraphNorm);
//  cmdp.setOption("gmgTol", &gmgTolerance, "tolerance for GMG convergence");
  cmdp.setOption("relativeTol", &relativeTol, "Energy error-relative tolerance for iterative solver.");
  cmdp.setOption("gmgMaxIterations", &gmgMaxIterations, "tolerance for GMG convergence");

  bool enhanceUField = false;
  cmdp.setOption("enhanceUField", "dontEnhanceUField", &enhanceUField);
  cmdp.setOption("useStaticCondensation", "dontUseStaticCondensation", &useStaticCondensation);

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
#ifdef HAVE_MPI
    return -1;

  double width = D, height = D, depth = D;

  VarFactory varFactory;
  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);
  VarPtr sigma = varFactory.fieldVar("\\sigma", VECTOR_L2);

  FunctionPtr n = Function::normal();
  // traces:
  VarPtr u_hat;

  if (conformingTraces)
    u_hat = varFactory.traceVar("\\widehat{u}", u);
    cout << "Note: using non-conforming traces.\n";
    u_hat = varFactory.traceVar("\\widehat{u}", u, L2);
  VarPtr sigma_n_hat = varFactory.fluxVar("\\widehat{\\sigma}_{n}", sigma * n);

  // test functions:
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);

  BFPtr poissonBF = Teuchos::rcp( new BF(varFactory) );
  FunctionPtr alpha = Function::constant(1); // viscosity

  // tau terms:
  poissonBF->addTerm(sigma / alpha, tau);
  poissonBF->addTerm(-u, tau->div()); // (sigma1, tau1)
  poissonBF->addTerm(u_hat, tau * n);

  // v terms:
  poissonBF->addTerm(- sigma, v->grad()); // (mu sigma1, grad v1)
  poissonBF->addTerm( sigma_n_hat, v);

  int horizontalCells = numCells, verticalCells = numCells, depthCells = numCells;

  vector<double> domainDimensions;

  vector<int> elementCounts;

  if (use3D)

  MeshPtr mesh, k0Mesh;

  int H1Order = k + 1;
  int H1Order_coarse = k_coarse + 1;
  if (!use3D)
    Teuchos::ParameterList pl;

    map<int,int> trialOrderEnhancements;

    if (enhanceUField)
      trialOrderEnhancements[u->ID()] = 1;

    BFPtr poissonBilinearForm = poissonBF;

    pl.set("useMinRule", true);
    pl.set("H1Order", H1Order);
    pl.set("delta_k", delta_k);
    pl.set("horizontalElements", horizontalCells);
    pl.set("verticalElements", verticalCells);
    pl.set("divideIntoTriangles", false);
    pl.set("useConformingTraces", conformingTraces);
    pl.set("trialOrderEnhancements", &trialOrderEnhancements);
    pl.set("width", width);

    mesh = MeshFactory::quadMesh(pl);

    pl.set("H1Order", H1Order_coarse);
    k0Mesh = MeshFactory::quadMesh(pl);

    mesh = MeshFactory::rectilinearMesh(poissonBF, domainDimensions, elementCounts, H1Order, delta_k);
    k0Mesh = MeshFactory::rectilinearMesh(poissonBF, domainDimensions, elementCounts, H1Order_coarse, delta_k);

  mesh->registerObserver(k0Mesh); // ensure that the k0 mesh refinements track those of the solution mesh

  RHSPtr rhs = RHS::rhs(); // zero
  FunctionPtr sin_pi_x = Teuchos::rcp( new Sin_ax(PI/D) );
  FunctionPtr sin_pi_y = Teuchos::rcp( new Sin_ay(PI/D) );
  FunctionPtr u_exact = sin_pi_x * sin_pi_y;
  FunctionPtr f = -(2.0 * PI * PI / (D * D)) * sin_pi_x * sin_pi_y;
  rhs->addTerm( f * v );

  BCPtr bc = BC::bc();
  SpatialFilterPtr boundary = SpatialFilter::allSpace();

  bc->addDirichlet(u_hat, boundary, u_exact);

  IPPtr graphNorm;

  FunctionPtr h = Teuchos::rcp( new hFunction() );

  if (useWeightedGraphNorm)
    graphNorm = IP::ip();
    graphNorm->addTerm( tau->div() ); // u
    graphNorm->addTerm( (h / alpha) * tau - h * v->grad() ); // sigma
    graphNorm->addTerm( v ); // boundary term (adjoint to u)
    graphNorm->addTerm( h * tau );

//    // new effort, with the idea that the test norm should be considered in reference space, basically
//    graphNorm = IP::ip();
//    graphNorm->addTerm( tau->div() ); // u
//    graphNorm->addTerm( tau / h - v->grad() ); // sigma
//    graphNorm->addTerm( v / h ); // boundary term (adjoint to u)
//    graphNorm->addTerm( tau / h );
    map<int, double> trialWeights; // on the squared terms in the trial space norm
    trialWeights[u->ID()] = 1.0 / (D * D);
    trialWeights[sigma->ID()] = 1.0;
    graphNorm = poissonBF->graphNorm(trialWeights, 1.0); // 1.0: weight on the L^2 terms

  SolutionPtr solution = Solution::solution(mesh, bc, rhs, graphNorm);

  mesh->registerSolution(solution); // sign up for projection of old solution onto refined cells.

  double energyThreshold = 0.2;
  RefinementStrategy refinementStrategy( solution, energyThreshold );


  Teuchos::RCP<Solver> coarseSolver, fineSolver;
  if (useMumps)
    coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );
    cout << "useMumps=true, but MUMPS is not available!\n";
    coarseSolver = Teuchos::rcp( new KluSolver );
  GMGSolver* gmgSolver;

  if (useGMGSolver)
    double tol = relativeTol;
    int maxIters = gmgMaxIterations;
    BCPtr zeroBCs = bc->copyImposingZero();
    gmgSolver = new GMGSolver(zeroBCs, k0Mesh, graphNorm, mesh, solution->getDofInterpreter(),
                              solution->getPartitionMap(), maxIters, tol, coarseSolver,


    fineSolver = Teuchos::rcp( gmgSolver );
    fineSolver = coarseSolver;

//  if (rank==0) cout << "experimentally starting by solving with MUMPS on the fine mesh.\n";
//  solution->solve( Teuchos::rcp( new MumpsSolver) );


  ostringstream dir_name;
  dir_name << "poissonCavityFlow_k" << k;
  HDF5Exporter exporter(mesh,dir_name.str());

  if (useMumps) coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );

  if (useGMGSolver) gmgSolver->gmgOperator()->reportTimings();
  for (int refIndex=0; refIndex < refCount; refIndex++)
    double energyError = solution->energyErrorTotal();
    GlobalIndexType numFluxDofs = mesh->numFluxDofs();
    if (rank==0)
      cout << "Before refinement " << refIndex << ", energy error = " << energyError;
      cout << " (using " << numFluxDofs << " trace degrees of freedom)." << endl;
    bool printToConsole = printRefinementDetails && (rank==0);

    if (useStaticCondensation)
      CondensedDofInterpreter* condensedDofInterpreter = dynamic_cast<CondensedDofInterpreter*>(solution->getDofInterpreter().get());
      if (condensedDofInterpreter != NULL)

    GlobalIndexType fineDofs = mesh->globalDofCount();
    GlobalIndexType coarseDofs = k0Mesh->globalDofCount();
    if (rank==0)
      cout << "After refinement, coarse mesh has " << k0Mesh->numActiveElements() << " elements and " << coarseDofs << " dofs.\n";
      cout << "  Fine mesh has " << mesh->numActiveElements() << " elements and " << fineDofs << " dofs.\n";

    if (!use3D)
      ostringstream fineMeshLocation, coarseMeshLocation;
      fineMeshLocation << "poissonFineMesh_k" << k << "_ref" << refIndex;
      GnuPlotUtil::writeComputationalMeshSkeleton(fineMeshLocation.str(), mesh, true); // true: label cells
      coarseMeshLocation << "poissonCoarseMesh_k" << k << "_ref" << refIndex;
      GnuPlotUtil::writeComputationalMeshSkeleton(coarseMeshLocation.str(), k0Mesh, true); // true: label cells

    if (useGMGSolver)   // create fresh fineSolver now that the meshes have changed:
      if (useMumps) coarseSolver = Teuchos::rcp( new MumpsSolver(512, true) );
      double tol = max(relativeTol * energyError, minTol);
      int maxIters = gmgMaxIterations;
      BCPtr zeroBCs = bc->copyImposingZero();
      gmgSolver = new GMGSolver(zeroBCs, k0Mesh, graphNorm, mesh, solution->getDofInterpreter(),
                                solution->getPartitionMap(), maxIters, tol, coarseSolver, useStaticCondensation);
      fineSolver = Teuchos::rcp( gmgSolver );

    if (useGMGSolver) gmgSolver->gmgOperator()->reportTimings();

  double energyErrorTotal = solution->energyErrorTotal();

  GlobalIndexType numFluxDofs = mesh->numFluxDofs();
  GlobalIndexType numGlobalDofs = mesh->numGlobalDofs();
  if (rank==0)
    cout << "Final mesh has " << mesh->numActiveElements() << " elements and " << numFluxDofs << " trace dofs (";
    cout << numGlobalDofs << " total dofs, including fields).\n";
    cout << "Final energy error: " << energyErrorTotal << endl;


  if (!use3D)
    GnuPlotUtil::writeComputationalMeshSkeleton("poissonRefinedMesh", mesh, true);

  coarseSolver = Teuchos::rcp((Solver*) NULL); // without this when useMumps = true and running on one rank, we see a crash on exit, which may have to do with MPI being finalized before coarseSolver is deleted.

  return 0;
int main(int argc, char *argv[]) {
  bool haveM = false;

#ifdef EPETRA_MPI  
  // Initialize MPI  
  Epetra_MpiComm Comm( MPI_COMM_WORLD );  
  Epetra_SerialComm Comm;  
  int MyPID = Comm.MyPID();

  int nev = 5;
  int blockSize = 5;
  int maxIterations = 1000;
  double tol = 1.0e-8;
  bool verbose=false, locking=false, fullOrtho=true;
  std::string k_filename = "";
  std::string m_filename = "";
  std::string which = "SM";
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("nev",&nev,"Number of eigenvalues to compute.");
  cmdp.setOption("blocksize",&blockSize,"Block size used in LOBPCG.");
  cmdp.setOption("maxiters",&maxIterations,"Maximum number of iterations used in LOBPCG.");
  cmdp.setOption("tol",&tol,"Convergence tolerance requested for computed eigenvalues.");
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("locking","nolocking",&locking,"Use locking of converged eigenvalues.");
  cmdp.setOption("fullortho","nofullortho",&fullOrtho,"Use full orthogonalization.");
  cmdp.setOption("sort",&which,"Targetted eigenvalues (SM,LM,SR,or LR).");
  cmdp.setOption("K-filename",&k_filename,"Filename and path of the stiffness matrix.");
  cmdp.setOption("M-filename",&m_filename,"Filename and path of the mass matrix.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    return -1;
  if (k_filename=="") {
    cout << "The matrix K must be supplied through an input file!!!" << endl;
#ifdef HAVE_MPI
    return -1;
  if (m_filename!="") {
    haveM = true;
  //******************Set up the problem to be solved*********************
  // *****Read in matrix from file******
  Teuchos::RCP<Epetra_Map> Map;
  Teuchos::RCP<Epetra_CrsMatrix> K, M;
  EpetraExt::readEpetraLinearSystem( k_filename, Comm, &K, &Map );

  if (haveM) {
    EpetraExt::readEpetraLinearSystem( m_filename, Comm, &M, &Map );
  // Start the block Davidson iteration 
  //  Variables used for the LOBPCG Method
  // Set verbosity level
  int verbosity = Anasazi::Errors + Anasazi::Warnings;
  if (verbose) {
    verbosity += Anasazi::FinalSummary + Anasazi::TimingDetails;
  // Create parameter list to pass into solver
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Which", which );
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Maximum Iterations", maxIterations );
  MyPL.set( "Convergence Tolerance", tol );
  MyPL.set( "Use Locking", locking );
  MyPL.set( "Locking Tolerance", tol/10 );
  MyPL.set( "Full Ortho", fullOrtho );

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Anasazi::MultiVecTraits<double, MV> MVT;
  typedef Anasazi::OperatorTraits<double, MV, OP> OPT;
  // Create the eigenproblem to be solved.
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(*Map, blockSize) );
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem;
  if (haveM) {
    MyProblem = Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>( K, M, ivec ) );
  else {
    MyProblem = Teuchos::rcp( new Anasazi::BasicEigenproblem<double, MV, OP>( K, ivec ) );
  // Inform the eigenproblem that (K,M) is Hermitian

  // Set the number of eigenvalues requested 
  MyProblem->setNEV( nev );

  // Inform the eigenproblem that you are finished passing it information
  bool boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (verbose && MyPID == 0) {
      cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." << endl;
#ifdef HAVE_MPI
    MPI_Finalize() ;
    return -1;

  // Initialize the LOBPCG solver
  Anasazi::LOBPCGSolMgr<double, MV, OP> MySolverMgr(MyProblem, MyPL);
  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  if (returnCode != Anasazi::Converged && MyPID==0 && verbose) {
    cout << "Anasazi::EigensolverMgr::solve() returned unconverged." << endl;
  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<double,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;
  std::vector<int> index = sol.index;
  int numev = sol.numVecs;

  if (numev > 0) {
    // Compute residuals.
    Teuchos::LAPACK<int,double> lapack;
    std::vector<double> normEV(numev);
    // Get storage
    Teuchos::RCP<Epetra_MultiVector> Kevecs, Mevecs;
    Teuchos::SerialDenseMatrix<int,double> B(numev,numev);
    for (int i=0; i<numev; i++) {B(i,i) = evals[i].realpart;}
    // Compute K*evecs
    Kevecs = Teuchos::rcp(new Epetra_MultiVector(*Map,numev) );
    OPT::Apply( *K, *evecs, *Kevecs );

    // Compute M*evecs
    if (haveM) {
      Mevecs = Teuchos::rcp(new Epetra_MultiVector(*Map,numev) );
      OPT::Apply( *M, *evecs, *Mevecs );
    else {
      Mevecs = evecs;

    // Compute K*evecs - lambda*M*evecs and its norm
    MVT::MvTimesMatAddMv( -1.0, *Mevecs, B, 1.0, *Kevecs );
    MVT::MvNorm( *Kevecs, normEV );
    // Scale the norms by the eigenvalue
    for (int i=0; i<numev; i++) {
      normEV[i] /= Teuchos::ScalarTraits<double>::magnitude( evals[i].realpart );
    // Output computed eigenvalues and their direct residuals
    if (verbose && MyPID==0) {
      cout.setf(std::ios_base::right, std::ios_base::adjustfield);
      cout<<endl<< "Actual Residuals"<<endl;
      cout<< std::setw(16) << "Real Part"
        << std::setw(20) << "Direct Residual"<< endl;
      for (int i=0; i<numev; i++) {
        cout<< std::setw(16) << evals[i].realpart 
          << std::setw(20) << normEV[i] << endl;
  MPI_Finalize() ;
  return 0;
} // end LOBPCGEpetraExFile.cpp
int main(int argc, char *argv[]) {

  // Initialize MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  Epetra_SerialComm Comm;

  bool testFailed;
  bool boolret;
  int MyPID = Comm.MyPID();

  bool verbose = true;
  bool debug = false;
  std::string which("SM");

  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("debug","nodebug",&debug,"Print debugging information.");
  cmdp.setOption("sort",&which,"Targetted eigenvalues (SM,LM,SR,LR,SI,or LI).");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    return -1;

  typedef double ScalarType;
  typedef Teuchos::ScalarTraits<ScalarType>          ScalarTypeTraits;
  typedef ScalarTypeTraits::magnitudeType            MagnitudeType;
  typedef Epetra_MultiVector                         MV;
  typedef Epetra_Operator                            OP;
  typedef Anasazi::MultiVecTraits<ScalarType,MV>     MVTraits;
  typedef Anasazi::OperatorTraits<ScalarType,MV,OP>  OpTraits;

  //  Dimension of the matrix
  int nx = 10;        // Discretization points in any one direction.
  int NumGlobalElements = nx*nx;  // Size of matrix nx*nx

  // Construct a Map that puts approximately the same number of
  // equations on each processor.

  Epetra_Map Map(NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.

  int NumMyElements = Map.NumMyElements();

  std::vector<int> MyGlobalElements(NumMyElements);

  // Create an integer vector NumNz that is used to build the Petra Matrix.
  // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation
  // on this processor
  std::vector<int> NumNz(NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.

  for (int i=0; i<NumMyElements; i++) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    else {
      NumNz[i] = 5;

  // Create an Epetra_Matrix

  Teuchos::RCP<Epetra_CrsMatrix> A = Teuchos::rcp( new Epetra_CrsMatrix(Copy, Map, &NumNz[0]) );

  // Diffusion coefficient, can be set by user.
  // When rho*h/2 <= 1, the discrete convection-diffusion operator has real eigenvalues.
  // When rho*h/2 > 1, the operator has complex eigenvalues.
  double rho = 2*(nx+1);

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries, info;

  for (int i=0; i<NumMyElements; i++)
    if (MyGlobalElements[i]==0)
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i] == nx*(nx-1))
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i] == nx-1)
      Indices[0] = nx-2;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i] == NumGlobalElements-1)
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i] < nx)
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i] > nx*(nx-1))
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    else if (MyGlobalElements[i]%nx == 0)
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      assert( info==0 );
    else if ((MyGlobalElements[i]+1)%nx == 0)
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      assert( info==0 );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert( info==0 );
    // Put in the diagonal entry
    info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    assert( info==0 );

  // Finish up
  info = A->FillComplete();
  assert( info==0 );
  A->SetTracebackMode(1); // Shutdown Epetra Warning tracebacks

  // Start the block Davidson iteration
  //  Variables used for the Generalized Davidson Method
  int nev = 4;
  int blockSize = 1;
  int maxDim = 50;
  int restartDim = 10;
  int maxRestarts = 500;
  double tol = 1e-10;

  // Set verbosity level
  int verbosity = Anasazi::Errors + Anasazi::Warnings;
  if (verbose) {
    verbosity += Anasazi::FinalSummary + Anasazi::TimingDetails;
  if (debug) {
    verbosity += Anasazi::Debug;
  // Create parameter list to pass into solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set( "Verbosity", verbosity );
  MyPL.set( "Which", which );
  MyPL.set( "Block Size", blockSize );
  MyPL.set( "Maximum Subspace Dimension", maxDim);
  MyPL.set( "Restart Dimension", restartDim);
  MyPL.set( "Maximum Restarts", maxRestarts );
  MyPL.set( "Convergence Tolerance", tol );
  MyPL.set( "Relative Convergence Tolerance", true );
  MyPL.set( "Initial Guess", "User" );

  // Create an Epetra_MultiVector for an initial vector to start the solver.
  // Note:  This needs to have the same number of columns as the blocksize.
  Teuchos::RCP<Epetra_MultiVector> ivec = Teuchos::rcp( new Epetra_MultiVector(Map, blockSize) );

  // Create the eigenproblem.
  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem = Teuchos::rcp(
    new Anasazi::BasicEigenproblem<double,MV,OP>() );

  // Inform the eigenproblem that the operator A is non-Hermitian

  // Set the number of eigenvalues requested
  MyProblem->setNEV( nev );

  // Inform the eigenproblem that you are finishing passing it information
  boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (verbose && MyPID == 0) {
      std::cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." << std::endl;
#ifdef HAVE_MPI
    MPI_Finalize() ;
    return -1;

  // Initialize the Block Arnoldi solver
  Anasazi::GeneralizedDavidsonSolMgr<double, MV, OP> MySolverMgr(MyProblem, MyPL);

  // Solve the problem to the specified tolerances or length
  Anasazi::ReturnType returnCode = MySolverMgr.solve();
  testFailed = false;
  if (returnCode != Anasazi::Converged && MyPID==0 && verbose) {
    testFailed = true;

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Anasazi::Eigensolution<ScalarType,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<ScalarType> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;
  std::vector<int> index = sol.index;
  int numev = sol.numVecs;

  // Output computed eigenvalues and their direct residuals
  if (verbose && MyPID==0) {
    int numritz = (int)evals.size();
    std::cout.setf(std::ios_base::right, std::ios_base::adjustfield);
    std::cout<<std::endl<< "Computed Ritz Values"<< std::endl;
    std::cout<< std::setw(16) << "Real Part"
        << std::setw(16) << "Imag Part"
        << std::endl;
    for (int i=0; i<numritz; i++) {
      std::cout<< std::setw(16) << evals[i].realpart
          << std::setw(16) << evals[i].imagpart
          << std::endl;

  if (numev > 0) {
    // Compute residuals.
    Teuchos::LAPACK<int,double> lapack;
    std::vector<double> normA(numev);

    // The problem is non-Hermitian.
    int i=0;
    std::vector<int> curind(1);
    std::vector<double> resnorm(1), tempnrm(1);
    Teuchos::RCP<MV> tempAevec;
    Teuchos::RCP<const MV> evecr, eveci;
    Epetra_MultiVector Aevec(Map,numev);

    // Compute A*evecs
    OpTraits::Apply( *A, *evecs, Aevec );

    Teuchos::SerialDenseMatrix<int,double> Breal(1,1), Bimag(1,1);
    while (i<numev) {
      if (index[i]==0) {
        // Get a view of the current eigenvector (evecr)
        curind[0] = i;
        evecr = MVTraits::CloneView( *evecs, curind );

        // Get a copy of A*evecr
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Compute A*evecr - lambda*evecr
        Breal(0,0) = evals[i].realpart;
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );

        // Compute the norm of the residual and increment counter
        MVTraits::MvNorm( *tempAevec, resnorm );
        normA[i] = resnorm[0] / Teuchos::ScalarTraits<MagnitudeType>::magnitude( evals[i].realpart );
      } else {
        // Get a view of the real part of the eigenvector (evecr)
        curind[0] = i;
        evecr = MVTraits::CloneView( *evecs, curind );

        // Get a copy of A*evecr
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Get a view of the imaginary part of the eigenvector (eveci)
        curind[0] = i+1;
        eveci = MVTraits::CloneView( *evecs, curind );

        // Set the eigenvalue into Breal and Bimag
        Breal(0,0) = evals[i].realpart;
        Bimag(0,0) = evals[i].imagpart;

        // Compute A*evecr - evecr*lambdar + eveci*lambdai
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Breal, 1.0, *tempAevec );
        MVTraits::MvTimesMatAddMv( 1.0, *eveci, Bimag, 1.0, *tempAevec );
        MVTraits::MvNorm( *tempAevec, tempnrm );

        // Get a copy of A*eveci
        tempAevec = MVTraits::CloneCopy( Aevec, curind );

        // Compute A*eveci - eveci*lambdar - evecr*lambdai
        MVTraits::MvTimesMatAddMv( -1.0, *evecr, Bimag, 1.0, *tempAevec );
        MVTraits::MvTimesMatAddMv( -1.0, *eveci, Breal, 1.0, *tempAevec );
        MVTraits::MvNorm( *tempAevec, resnorm );

        // Compute the norms and scale by magnitude of eigenvalue
        normA[i] = lapack.LAPY2( tempnrm[0], resnorm[0] ) /
          lapack.LAPY2( evals[i].realpart, evals[i].imagpart );
        normA[i+1] = normA[i];


    // Output computed eigenvalues and their direct residuals
    if (verbose && MyPID==0) {
      std::cout.setf(std::ios_base::right, std::ios_base::adjustfield);
      std::cout<<std::endl<< "Actual Residuals"<<std::endl;
      std::cout<< std::setw(16) << "Real Part"
          << std::setw(16) << "Imag Part"
          << std::setw(20) << "Direct Residual"<< std::endl;
      for (int j=0; j<numev; j++) {
        std::cout<< std::setw(16) << evals[j].realpart
            << std::setw(16) << evals[j].imagpart
            << std::setw(20) << normA[j] << std::endl;
        if ( normA[j] > tol ) {
          testFailed = true;


  if (testFailed) {
    if (verbose && MyPID==0) {
      std::cout << "End Result: TEST FAILED" << std::endl;
    return -1;
  // Default return value
  if (verbose && MyPID==0) {
    std::cout << "End Result: TEST PASSED" << std::endl;

  return 0;
int main(int argc, char *argv[])
  int ierr = 0, i;

#ifdef HAVE_MPI
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
  Epetra_SerialComm Comm;

  bool success = false;
  bool verbose = true;
  try {
    //int myRank = Comm.MyPID();

    //int numGlobalElements = 10000000;
    int numGlobalElements = 100;

    Teuchos::CommandLineProcessor cmdp(false,true);
    cmdp.setOption("numGlobalElements",&numGlobalElements,"Global problem size.");
    if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      throw -1;

    Epetra_Map Map(numGlobalElements, 0, Comm);

    int NumMyElements = Map.NumMyElements();

    std::vector<int> MyGlobalElements(NumMyElements);

    int NumNz = 3;
    // std::vector<int> NumNz(NumMyElements);
    // for (i=0; i<NumMyElements; i++)
    //     if (MyGlobalElements[i]==0 || MyGlobalElements[i] == numGlobalElements-1)
    //       NumNz[i] = 2;
    //     else
    //       NumNz[i] = 3;
    //  Epetra_CrsMatrix A(Copy, Map, &NumNz[0]);

    PrintMemoryUsage("Initial memory usage", "epetra-init.heap");

    Epetra_CrsMatrix A(Copy, Map, NumNz);

    PrintMemoryUsage("Memory after CrsMatrix constructor", "epetra-after-ctor.heap");

    std::vector<double> Values(2);
    Values[0] = -1.0; Values[1] = -1.0;
    std::vector<int> Indices(2);
    double two = 2.0;
    int NumEntries;

    for (i=0; i<NumMyElements; i++) {

      if (MyGlobalElements[i]==0) {
        Indices[0] = 1;
        NumEntries = 1;
      } else if (MyGlobalElements[i] == numGlobalElements-1) {
        Indices[0] = numGlobalElements-2;
        NumEntries = 1;
      } else {
        Indices[0] = MyGlobalElements[i]-1;
        Indices[1] = MyGlobalElements[i]+1;
        NumEntries = 2;

      ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);

      // Put in the diagonal entry
      ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]);

    PrintMemoryUsage("Memory after InsertGlobalValues()", "epetra-after-insert.heap");

    ierr = A.FillComplete();
    assert(ierr == 0);

    PrintMemoryUsage("Memory after FillComplete()", "epetra-after-fillcomplete.heap");


    success = true;
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef HAVE_MPI

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
int main(int argc, char *argv[]) {
#ifdef HAVE_MPI
  Teuchos::GlobalMPISession mpiSession(&argc, &argv,0);
  int rank=mpiSession.getRank();
  int numProcs=mpiSession.getNProc();
  int rank = 0;
  int numProcs = 1;
  bool useCompliantGraphNorm = false;
  bool enforceOneIrregularity = true;
  bool writeStiffnessMatrices = false;
  bool writeWorstCaseGramMatrices = false;
  int numRefs = 10;
  // problem parameters:
  double eps = 1e-8;
  vector<double> beta_const;
  int k = 2, delta_k = 2;
  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options
  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");
  cmdp.setOption("numRefs",&numRefs,"number of refinements");
  cmdp.setOption("eps", &eps, "epsilon");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    return -1;
  int H1Order = k + 1;
  if (rank==0) {
    string normChoice = useCompliantGraphNorm ? "unit-compliant graph norm" : "standard graph norm";
    cout << "Using " << normChoice << "." << endl;
    cout << "eps = " << eps << endl;
    cout << "numRefs = " << numRefs << endl;
    cout << "p = " << k << endl;
  ////////////////////   DECLARE VARIABLES   ///////////////////////
  // define test variables
  VarFactory varFactory; 
  VarPtr tau = varFactory.testVar("\\tau", HDIV);
  VarPtr v = varFactory.testVar("v", HGRAD);
  // define trial variables
  VarPtr uhat = varFactory.traceVar("\\widehat{u}");
  VarPtr beta_n_u_minus_sigma_n = varFactory.fluxVar("\\widehat{\\beta \\cdot n u - \\sigma_{n}}");
  VarPtr u;
  if (useCompliantGraphNorm) {
    u = varFactory.fieldVar("u",HGRAD);
  } else {
    u = varFactory.fieldVar("u");
  VarPtr sigma1 = varFactory.fieldVar("\\sigma_1");
  VarPtr sigma2 = varFactory.fieldVar("\\sigma_2");
  ////////////////////   DEFINE BILINEAR FORM   ///////////////////////
  BFPtr confusionBF = Teuchos::rcp( new BF(varFactory) );
  // tau terms:
  confusionBF->addTerm(sigma1 / eps, tau->x());
  confusionBF->addTerm(sigma2 / eps, tau->y());
  confusionBF->addTerm(u, tau->div());
  confusionBF->addTerm(-uhat, tau->dot_normal());
  // v terms:
  confusionBF->addTerm( sigma1, v->dx() );
  confusionBF->addTerm( sigma2, v->dy() );
  confusionBF->addTerm( beta_const * u, - v->grad() );
  confusionBF->addTerm( beta_n_u_minus_sigma_n, v);
  ////////////////////   DEFINE INNER PRODUCT(S)   ///////////////////////
  // mathematician's norm
  IPPtr mathIP = Teuchos::rcp(new IP());


  // quasi-optimal norm
  IPPtr qoptIP = Teuchos::rcp(new IP);
  if (!useCompliantGraphNorm) {
    qoptIP->addTerm( tau / eps + v->grad() );
    qoptIP->addTerm( beta_const * v->grad() - tau->div() );
    qoptIP->addTerm( v );
  } else {
    FunctionPtr h = Teuchos::rcp( new hFunction );
    // here, we're aiming at optimality in 1/h^2 |u|^2 + 1/eps^2 |sigma|^2
    qoptIP->addTerm( tau + eps * v->grad() );
    qoptIP->addTerm( h * beta_const * v->grad() - tau->div() );
  ////////////////////   SPECIFY RHS   ///////////////////////
  RHSPtr rhs = RHS::rhs();
  FunctionPtr f = Teuchos::rcp( new ConstantScalarFunction(0.0) );
  rhs->addTerm( f * v ); // obviously, with f = 0 adding this term is not necessary!

  ////////////////////   CREATE BCs   ///////////////////////
  BCPtr bc = BC::bc();
  SpatialFilterPtr inflowBoundary = Teuchos::rcp( new InflowSquareBoundary );
  SpatialFilterPtr outflowBoundary = Teuchos::rcp( new OutflowSquareBoundary );
  FunctionPtr u0 = Teuchos::rcp( new U0 );
  bc->addDirichlet(uhat, outflowBoundary, u0);

  bc->addDirichlet(uhat, inflowBoundary, u0);
//  Teuchos::RCP<PenaltyConstraints> pc = Teuchos::rcp(new PenaltyConstraints);
//  pc->addConstraint(uhat==u0,inflowBoundary);

  ////////////////////   BUILD MESH   ///////////////////////
  // create a new mesh on a single-cell, unit square domain
  Teuchos::RCP<Mesh> mesh = MeshFactory::quadMeshMinRule(confusionBF, H1Order, delta_k);
  ////////////////////   SOLVE & REFINE   ///////////////////////
  Teuchos::RCP<Solution> solution = Teuchos::rcp( new Solution(mesh, bc, rhs, qoptIP) );
//  solution->setFilter(pc);
  double energyThreshold = 0.2; // for mesh refinements
  bool useRieszRepBasedRefStrategy = true;
  if (rank==0) {
    if (useRieszRepBasedRefStrategy) {
      cout << "using RieszRep-based refinement strategy.\n";
    } else {
      cout << "using solution-based refinement strategy.\n";
  Teuchos::RCP<RefinementStrategy> refinementStrategy;
  if (!useRieszRepBasedRefStrategy) {
    refinementStrategy = Teuchos::rcp( new RefinementStrategy( solution, energyThreshold ) );
  } else {
    LinearTermPtr residual = confusionBF->testFunctional(solution) - rhs->linearTerm();
    refinementStrategy = Teuchos::rcp( new RefinementStrategy( mesh, residual, qoptIP, energyThreshold ) );
  for (int refIndex=0; refIndex<numRefs; refIndex++){
    if (writeStiffnessMatrices) {
      string stiffnessFile = fileNameForRefinement("confusion_stiffness", refIndex);
      solution->setWriteMatrixToFile(true, stiffnessFile);
    if (writeWorstCaseGramMatrices) {
      string gramFile = fileNameForRefinement("confusion_gram", refIndex);
      bool jacobiScaling = true;
      double condNum = MeshUtilities::computeMaxLocalConditionNumber(qoptIP, mesh, jacobiScaling, gramFile);
      if (rank==0) {
        cout << "estimated worst-case Gram matrix condition number: " << condNum << endl;
        cout << "putative worst-case Gram matrix written to file " << gramFile << endl;
    if (refIndex == numRefs-1) { // write out second-to-last mesh
      if (rank==0)
        GnuPlotUtil::writeComputationalMeshSkeleton("confusionMesh", mesh, true);
    refinementStrategy->refine(rank==0); // print to console on rank 0
  if (writeStiffnessMatrices) {
    string stiffnessFile = fileNameForRefinement("confusion_stiffness", numRefs);
    solution->setWriteMatrixToFile(true, stiffnessFile);
  if (writeWorstCaseGramMatrices) {
    string gramFile = fileNameForRefinement("confusion_gram", numRefs);
    bool jacobiScaling = true;
    double condNum = MeshUtilities::computeMaxLocalConditionNumber(qoptIP, mesh, jacobiScaling, gramFile);
    if (rank==0) {
      cout << "estimated worst-case Gram matrix condition number: " << condNum << endl;
      cout << "putative worst-case Gram matrix written to file " << gramFile << endl;
  // one more solve on the final refined mesh:
  ostringstream dir_name;
  dir_name << "confusion_eps" << eps;
  HDF5Exporter exporter(mesh,dir_name.str());
  exporter.exportSolution(solution, varFactory, 0);
  if (rank==0) cout << "wrote solution to " << dir_name.str() << endl;

  return 0;
int main(int argc, char* argv[]) {

#ifdef HAVE_MPI
  MPI::Init(argc, argv);
  RCP<MxComm> myComm = rcp(new MxComm());

#if 0
#ifdef HAVE_MPI
  MPI::Init(argc, argv);
  //MPI_Init(argc, argv);
  Epetra_MpiComm myComm(MPI_COMM_WORLD);
  Epetra_SerialComm myComm;

// input file method
#if 1

  std::string inFile;

  Teuchos::CommandLineProcessor cmdp(false, true);
  cmdp.setOption("infile", &inFile, "XML format input file.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;

  if (inFile == "") {
    std::cout << "Please specify an input file using --infile=your_file.mx\n";

  // now read the input file with trilinos XML reader
  Teuchos::XMLObject xmlObj(Teuchos::FileInputSource(inFile).getObject());

  // get simulation dimension
  int dim = atoi(MxUtil::XML::getAttr("dim", xmlObj).c_str());
  if (dim < 1 or dim > 3) {
    std::cout << "Simulation dimension invalid or not given, using 3D.\n";
    dim = 3;

  // get simulation type
  std::string domain = MxUtil::XML::getAttr("domain", xmlObj).c_str();
  if (domain != "frequency" and domain != "time") {
    std::cout << "Simulation domain invalid or not given, using frequency-domain.\n";
    domain = "frequency";

  // create problem
  MxProblem<1> * prob1d;
  MxProblem<2> * prob2d;
  MxProblem<3> * prob3d;
  switch (dim) {
    case 1:
      prob1d = new MxProblem<1>(xmlObj, myComm);
      delete prob1d;
    case 2:
      prob2d = new MxProblem<2>(xmlObj, myComm);
      delete prob2d;
    case 3:
      prob3d = new MxProblem<3>(xmlObj, myComm);
      delete prob3d;

#if 0

  // epetra stuff test
  MxMap map(10, 0, myComm);
  Epetra_CrsMatrix mat(Copy, map, 0);
  int ind = 2;
  double val = 0;
  mat.InsertGlobalValues(1, 1, &val, &ind);
  ind = 3;
  val = 4;
  mat.InsertGlobalValues(1, 1, &val, &ind);
  mat.FillComplete(map, map);

  Epetra_Vector myvec(map);

  std::cout << myvec;
  mat.Apply(myvec, myvec);
  std::cout << myvec;

  Epetra_CrsMatrix copy(mat);

  std::cout << mat;
  std::cout << mat;

  //throw 1;

  typedef MxDimVector<double, 3> vecd3;
  typedef MxDimVector<int, 3> veci3;

  vecd3 midPt(0);

#if 0
  //std::cout << "Crab cavity setup:\n";

  int crabNumCells = 4;
  double crabCellLen = 2.0 * 0.0192; //meters
  double crabCavRad = 0.04719;
  double crabIrisRad = 0.015;
  double crabCavRho = 0.0136;
  double crabIrisRho = 0.00331;

  int crabCellRes = 40;
  int padCells = 2;
  int cnx, cny, cnz;
  double clx, cly, clz;
  double cox, coy, coz;

  double crabDelta = crabCellLen / double(crabCellRes);

  cnz = crabNumCells * crabCellRes + 2 * padCells;
  clz = double(cnz) * crabDelta;
  coz = -0.5 * clz;

  cny = cnx = 2 * (int(ceil(crabCavRad / crabDelta)) + padCells);
  cly = clx = double(cnx) * crabDelta;
  coy = cox = -0.5 * clx;
  veci3 crabN; crabN[0] = cnx; crabN[1] = cny; crabN[2] = cnz;
  vecd3 crabL; crabL[0] = clx; crabL[1] = cly; crabL[2] = clz;
  vecd3 crabO; crabO[0] = cox; crabO[1] = coy; crabO[2] = coz;
  MxGrid<3> crabGrid(crabO, crabN, crabL, &myComm);

  MxCrabCav crabCav(midPt, crabNumCells, crabCellLen, crabIrisRad, crabCavRad, crabIrisRho, crabCavRho);


  Teuchos::ParameterList crabList;
  crabList.set("geo-mg : levels", 1);
  crabList.set("geo-mg : smoothers : sweeps", 5);
  crabList.set("amg : smoothers : sweeps", 1);
  crabList.set("amg : smoothers : type", "Chebyshev");
  crabList.set("eigensolver : output", 2);
  crabList.set("eigensolver : nev", 15);
  crabList.set("eigensolver : tol", 1.e-8);
  crabList.set("eigensolver : block size", 2);
  crabList.set("eigensolver : num blocks", 30);
  crabList.set("eigensolver : spectrum", "LM");
  crabList.set("wave operator : invert", true);
  crabList.set("wave operator : invert : tol", 1.e-10);
  //crabList.set("wave operator : invert : shift", 1000.0);
  crabList.set("wave operator : invert : max basis size", 40);

  MxEMSim<dim> crabSim;

  MxSolver<dim> * solver;
  solver = new MxSolver<dim>(&crabSim, crabList);

  delete solver;

  //return 1;

// optimized phc cavity
#if 0
  double rodRad = 0.003175; // meters
  const int numRods = 24;
  double rodx[numRods] = {0.0158406582694, 0.0551748491968, 0.0209567636489, 
                          0.0384658321918, 0.00792032913471, 0.0338604938991,
                          0.00477355412058, 0.00485955186622, -0.00792032913471,
                          -0.0213143552977, -0.0161832095283, -0.0336062803256,
                          -0.0158406582694, -0.0551748491968, -0.0209567636489,
                          -0.0384658321918, -0.00792032913471, -0.0338604938991,
                          -0.00477355412058, -0.00485955186622, 0.00792032913471,
                          0.0213143552977, 0.0161832095283, 0.0336062803256};
  double rody[numRods] = {0.0, -0.00724351649877, 0.006587367621, 
                    0.0165969314144, 0.013718412474, 0.044161062805,
                    0.0214427735115, 0.041610853563, 0.013718412474,
                    0.0514045793038, 0.0148554058905, 0.0250139221487,
                    1.9399211446e-18, 0.00724351649877, -0.006587367621,
                    -0.0165969314144, -0.013718412474, -0.044161062805,
                    -0.0214427735115, -0.041610853563, -0.013718412474,
                    -0.0514045793038, -0.0148554058905, -0.0250139221487};

  std::vector<MxShape<3> *> rods;
  MxShapeUnion<3> rodsShape;
  vecd3 rodPos;
  vecd3 zhat(0); zhat[2] = 1.0;
  for (int i = 0; i < numRods; i++) {
    rodPos[0] = rodx[i];
    rodPos[1] = rody[i];
    rodPos[2] = 0.0;
    rods.push_back(new MxCylinder(rodPos, zhat, rodRad));

  MxDimMatrix<double, 3> sapphEps(0);
  sapphEps(0, 0) = 9.3;
  sapphEps(1, 1) = 9.3;
  sapphEps(2, 2) = 11.5;

  MxDielectric<3> phcDiel;
  phcDiel.add(&rodsShape, sapphEps);

  // conducting cavity
  double cavLen = 0.019624116824498831;
  double cavRad = 0.1;
  MxCylinder cavCyl(0, zhat, cavRad);
  MxSlab<3> cavCaps(0, zhat, cavLen);
  MxShapeIntersection<3> phcCav;

  // setup grid
  int rodDiaCells = 6;
  int pad = 2;
  double delta = 2.0 * rodRad / double(rodDiaCells);

  veci3 phcN;
  phcN[0] = phcN[1] = int(2.0 * cavRad / delta) + 2 * pad;
  phcN[2] = int(cavLen / delta) + 2 * pad;

  vecd3 phcL;
  phcL[0] = phcL[1] = delta * double(phcN[0]);
  phcL[2] = delta * double(phcN[2]);

  vecd3 phcO;
  phcO[0] = phcO[1] = -0.5 * phcL[0];
  phcO[2] = -0.5 * phcL[2];

  MxGrid<3> phcGrid(phcO, phcN, phcL, &myComm);

  Teuchos::ParameterList phcList;
  phcList.set("geo-mg : levels", 1);
  phcList.set("geo-mg : smoothers : sweeps", 5);
  phcList.set("eigensolver : output", 2);
  phcList.set("eigensolver : nev", 15);
  phcList.set("eigensolver : tol", 1.e-8);
  phcList.set("eigensolver : block size", 1);
  phcList.set("eigensolver : num blocks", 30);
  phcList.set("eigensolver : spectrum", "LM");
  phcList.set("wave operator : invert", true);
  phcList.set("wave operator : invert : tol", 1.e-8);
  //phcList.set("wave operator : invert : shift", 1000.0);
  phcList.set("wave operator : invert : max basis size", 40);

  MxEMSim<dim> phcSim;

  MxSolver<dim> * solver;
  solver = new MxSolver<dim>(&phcSim, phcList);

  delete solver;

  for (int i = 0; i < numRods; i++)
    delete rods[i];


#if 0
  double sphR = 0.37;
  int sphN = 64;
  MxEllipsoid ell(0.0, sphR);

  MxGrid<3> sphGrid(-0.5, sphN, 1.0, &myComm);

  MxDimMatrix<double, 3> rotSapphEps(0);
  rotSapphEps(0, 0) = 10.225;
  rotSapphEps(1, 1) = 10.225;
  rotSapphEps(2, 2) = 9.95;
  rotSapphEps(0, 1) = rotSapphEps(1, 0) = -0.825;
  rotSapphEps(0, 2) = rotSapphEps(2, 0) = -0.67360967926537398;
  rotSapphEps(1, 2) = rotSapphEps(2, 1) = 0.67360967926537398;

  MxDielectric<3> phcDiel;
  phcDiel.add(&ell, rotSapphEps);

  vecd3 ell2Loc(0); ell2Loc[0] = 0.6;
  vecd3 ell3Loc(0); ell3Loc[0] = 0.3; ell3Loc[2] = 0.3;
  MxEllipsoid ell2(ell2Loc, sphR);
  MxEllipsoid ell3(ell3Loc, sphR);

  MxShapeUnion<3> shUnion;

  MxShapeIntersection<3> shInt;

  MxShapeSubtract<3> shSub;

  MxDielectric<3> dielEll;
  MxDimMatrix<double, 3> epsEll(vecd3(10.0)); // isotropic eps = 10
  dielEll.add(&ell, epsEll);

  Teuchos::ParameterList sphList;
  sphList.set("geo-mg : levels", 1);
  sphList.set("geo-mg : smoothers : sweeps", 4);
  sphList.set("eigensolver : output", 2);
  sphList.set("eigensolver : nev", 12);
  sphList.set("eigensolver : tol", 1.e-8);
  sphList.set("eigensolver : block size", 1);
  sphList.set("eigensolver : num blocks", 30);
  sphList.set("eigensolver : spectrum", "LM");
  sphList.set("wave operator : invert", true);
  sphList.set("wave operator : invert : tol", 1.e-8);
  //sphList.set("wave operator : invert : shift", -0.1);
  sphList.set("wave operator : invert : shift", 1.0);
  sphList.set("wave operator : invert : max basis size", 40);

  MxEMSim<dim> sphSim;

  MxSolver<dim> * solver;
  solver = new MxSolver<dim>(&sphSim, sphList);

  delete solver;


#ifdef HAVE_MPI

  return 0;

Пример #13
int main(int argc, char *argv[])
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  bool useCondensedSolve = false; // condensed solve not yet compatible with minimum rule meshes

  int numGridPoints = 32; // in x,y -- idea is to keep the overall order of approximation constant
  int k = 4; // poly order for u
  double theta = 0.5;
  int numTimeSteps = 2000;
  int numCells = -1; // in x, y (-1 so we can set a default if unset from the command line.)
  int numFrames = 50;
  int delta_k = 2;   // test space enrichment: should be 2 for 2D
  bool useMumpsIfAvailable  = true;
  bool convertSolutionsToVTK = false; // when true assumes we've already run with precisely the same options, except without VTK support (so we have a bunch of .soln files)
  bool usePeriodicBCs = false;
  bool useConstantConvection = false;

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");

  cmdp.setOption("numCells",&numCells,"number of cells in x and y directions");
  cmdp.setOption("theta",&theta,"theta weight for time-stepping");
  cmdp.setOption("numTimeSteps",&numTimeSteps,"number of time steps");
  cmdp.setOption("numFrames",&numFrames,"number of frames for export");

  cmdp.setOption("usePeriodicBCs", "useDirichletBCs", &usePeriodicBCs);
  cmdp.setOption("useConstantConvection", "useVariableConvection", &useConstantConvection);

  cmdp.setOption("useCondensedSolve", "useUncondensedSolve", &useCondensedSolve, "use static condensation to reduce the size of the global solve");
  cmdp.setOption("useMumps", "useKLU", &useMumpsIfAvailable, "use MUMPS (if available)");
  cmdp.setOption("convertPreComputedSolutionsToVTK", "computeSolutions", &convertSolutionsToVTK);

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
#ifdef HAVE_MPI
    return -1;

  bool saveSolutionFiles = true;

  if (numCells==-1) numCells = numGridPoints / k;

  if (rank==0)
    cout << "solving on " << numCells << " x " << numCells << " mesh " << "of order " << k << ".\n";

  set<int> timeStepsToExport;

  int timeStepsPerFrame = numTimeSteps / (numFrames - 1);
  if (timeStepsPerFrame==0) timeStepsPerFrame = 1;
  for (int n=0; n<numTimeSteps; n += timeStepsPerFrame)

  int H1Order = k + 1;

  const static double PI  = 3.141592653589793238462;

  double dt = 2 * PI / numTimeSteps;

  VarFactory varFactory;
  // traces:
  VarPtr qHat = varFactory.fluxVar("\\widehat{q}");

  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);

  // test functions:
  VarPtr v = varFactory.testVar("v", HGRAD);

  FunctionPtr x = Function::xn(1);
  FunctionPtr y = Function::yn(1);

  FunctionPtr c;
  if (useConstantConvection)
    c = Function::vectorize(Function::constant(0.5), Function::constant(0.5));
    c = Function::vectorize(y-0.5, 0.5-x);
//  FunctionPtr c = Function::vectorize(y, x);
  FunctionPtr n = Function::normal();

  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  bf->addTerm(u / dt, v);
  bf->addTerm(- theta * u, c * v->grad());
//  bf->addTerm(theta * u_hat, (c * n) * v);
  bf->addTerm(qHat, v);

  double width = 2.0, height = 2.0;
  int horizontalCells = numCells, verticalCells = numCells;
  double x0 = -0.5;
  double y0 = -0.5;

  if (usePeriodicBCs)
    x0 = 0.0;
    y0 = 0.0;
    width = 1.0;
    height = 1.0;

  BCPtr bc = BC::bc();

  SpatialFilterPtr inflowFilter  = Teuchos::rcp( new InflowFilterForClockwisePlanarRotation (x0,x0+width,y0,y0+height,0.5,0.5));

  vector< PeriodicBCPtr > periodicBCs;
  if (! usePeriodicBCs)
    //  bc->addDirichlet(u_hat, SpatialFilter::allSpace(), Function::zero());
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    periodicBCs.push_back(PeriodicBC::xIdentification(x0, x0+width));
    periodicBCs.push_back(PeriodicBC::yIdentification(y0, y0+height));

  MeshPtr mesh = MeshFactory::quadMeshMinRule(bf, H1Order, delta_k, width, height,
                 horizontalCells, verticalCells, false, x0, y0, periodicBCs);

  FunctionPtr u0 = Teuchos::rcp( new Cone_U0(0.0, 0.25, 0.1, 1.0, usePeriodicBCs) );

  RHSPtr initialRHS = RHS::rhs();
  initialRHS->addTerm(u0 / dt * v);
  initialRHS->addTerm((1-theta) * u0 * c * v->grad());

  IPPtr ip;
//  ip = Teuchos::rcp( new IP );
//  ip->addTerm(v);
//  ip->addTerm(c * v->grad());
  ip = bf->graphNorm();

  // create two Solution objects; we'll switch between these for time steps
  SolutionPtr soln0 = Solution::solution(mesh, bc, initialRHS, ip);
  FunctionPtr u_soln0 = Function::solution(u, soln0);
  FunctionPtr qHat_soln0 = Function::solution(qHat, soln0);

  RHSPtr rhs1 = RHS::rhs();
  rhs1->addTerm(u_soln0 / dt * v);
  rhs1->addTerm((1-theta) * u_soln0 * c * v->grad());

  SolutionPtr soln1 = Solution::solution(mesh, bc, rhs1, ip);
  FunctionPtr u_soln1 = Function::solution(u, soln1);
  FunctionPtr qHat_soln1 = Function::solution(qHat, soln1);

  RHSPtr rhs2 = RHS::rhs(); // after the first solve on soln0, we'll swap out initialRHS for rhs2
  rhs2->addTerm(u_soln1 / dt * v);
  rhs2->addTerm((1-theta) * u_soln1 * c * v->grad());

  Teuchos::RCP<Solver> solver = Teuchos::rcp( new KluSolver );

  if (useMumpsIfAvailable) solver = Teuchos::rcp( new MumpsSolver );

//  double energyErrorSum = 0;

  ostringstream filePrefix;
  filePrefix << "convectingCone_k" << k << "_t";
  int frameNumber = 0;

#ifdef USE_HDF5
  ostringstream dir_name;
  dir_name << "convectingCone_k" << k;
  HDF5Exporter exporter(mesh,dir_name.str());

#ifdef USE_VTK
  VTKExporter soln0Exporter(soln0,mesh,varFactory);
  VTKExporter soln1Exporter(soln1,mesh,varFactory);

  if (convertSolutionsToVTK)
#ifdef USE_VTK
    if (rank==0)
      cout << "Converting .soln files to VTK.\n";
      for (int frameNumber=0; frameNumber<=numFrames; frameNumber++)
        ostringstream filename;
        filename << filePrefix.str() << frameNumber << ".soln";
        filename << filePrefix.str() << frameNumber;
    if (rank==0) cout << "Driver was built without USE_VTK defined.  This must be defined to convert solution files to VTK files.\n";

  if (timeStepsToExport.find(0) != timeStepsToExport.end())
    map<int,FunctionPtr> solnMap;
    solnMap[u->ID()] = u0; // project field variables
    if (rank==0) cout << "About to project initial solution onto mesh.\n";
    if (rank==0) cout << "...projected initial solution onto mesh.\n";
    ostringstream filename;
    filename << filePrefix.str() << frameNumber++;
    if (rank==0) cout << "About to export initial solution.\n";
#ifdef USE_VTK
    if (rank==0) soln0Exporter.exportFields(filename.str());
#ifdef USE_HDF5
    exporter.exportSolution(soln0, varFactory,0);
    if (saveSolutionFiles)
      if (rank==0)
        filename << ".soln";
        cout << endl << "wrote " << filename.str() << endl;
    if (rank==0) cout << "...exported initial solution.\n";

  if (rank==0) cout << "About to solve initial time step.\n";
  // first time step:
  soln0->setReportTimingResults(true); // added to gain insight into why MPI blocks in some cases on the server...
  if (useCondensedSolve) soln0->condensedSolve(solver);
  else soln0->solve(solver);
//  energyErrorSum += soln0->energyErrorTotal();
  if (rank==0) cout << "Solved initial time step.\n";

  if (timeStepsToExport.find(1) != timeStepsToExport.end())
    ostringstream filename;
    filename << filePrefix.str() << frameNumber++;
#ifdef USE_VTK
    if (rank==0) soln0Exporter.exportFields(filename.str());
#ifdef USE_HDF5
    exporter.exportSolution(soln0, varFactory);
    if (saveSolutionFiles)
      if (rank==0)
        filename << ".soln";
        cout << endl << "wrote " << filename.str() << endl;

  bool reportTimings = false;

  for (int n=1; n<numTimeSteps; n++)
    bool odd = (n%2)==1;
    SolutionPtr soln_n = odd ? soln1 : soln0;
    if (useCondensedSolve) soln_n->solve(solver);
    else soln_n->solve(solver);
    if (reportTimings)
      if (rank==0) cout << "time step " << n << ", timing report:\n";
    if (rank==0)
      cout << "\x1B[2K"; // Erase the entire current line.
      cout << "\x1B[0E"; // Move to the beginning of the current line.
      cout << "Solved time step: " << n;
    if (timeStepsToExport.find(n+1)!=timeStepsToExport.end())
      ostringstream filename;
      filename << filePrefix.str() << frameNumber++;
#ifdef USE_VTK
      if (rank==0)
        if (odd)
#ifdef USE_HDF5
      double t = n * dt;
      if (odd)
        exporter.exportSolution(soln1, varFactory, t);
        exporter.exportSolution(soln0, varFactory, t);
      if (saveSolutionFiles)
        if (rank==0)
          filename << ".soln";
          if (odd)
          cout << endl << "wrote " << filename.str() << endl;
//    energyErrorSum += soln_n->energyErrorTotal();

//  if (rank==0) cout << "energy error, sum over all time steps: " << energyErrorSum << endl;

  return 0;
int main(int argc, char *argv[])
  cout << "NOTE: enabling floating point exceptions for divide by zero.\n";

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  int rank = Teuchos::GlobalMPISession::getRank();

  Teuchos::CommandLineProcessor cmdp(false,true); // false: don't throw exceptions; true: do return errors for unrecognized options

  const static double PI  = 3.141592653589793238462;

  bool useCondensedSolve = true; // condensed solve not yet compatible with minimum rule meshes

  int k = 2; // poly order for u in every direction, including temporal
  int numCells = 32; // in x, y
  int numTimeCells = 1;
  int numTimeSlabs = -1;
  int numFrames = 201;
  int delta_k = 3;   // test space enrichment: should be 3 for 3D
  int maxRefinements = 0; // maximum # of refinements on each time slab
  bool useMumpsIfAvailable  = true;
  bool useConstantConvection = false;
  double refinementTolerance = 0.1;

  int checkPointFrequency = 50; // output solution and mesh every 50 time slabs

  int previousSolutionTimeSlabNumber = -1;
  string previousSolutionFile = "";
  string previousMeshFile = "";

  cmdp.setOption("polyOrder",&k,"polynomial order for field variable u");
  cmdp.setOption("delta_k", &delta_k, "test space polynomial order enrichment");

  cmdp.setOption("numCells",&numCells,"number of cells in x and y directions");
  cmdp.setOption("numTimeCells",&numTimeCells,"number of time axis cells");
  cmdp.setOption("numTimeSlabs",&numTimeSlabs,"number of time slabs");
  cmdp.setOption("numFrames",&numFrames,"number of frames for export");

  cmdp.setOption("useConstantConvection", "useVariableConvection", &useConstantConvection);

  cmdp.setOption("useCondensedSolve", "useUncondensedSolve", &useCondensedSolve, "use static condensation to reduce the size of the global solve");
  cmdp.setOption("useMumps", "useKLU", &useMumpsIfAvailable, "use MUMPS (if available)");

  cmdp.setOption("refinementTolerance", &refinementTolerance, "relative error beyond which to stop refining");
  cmdp.setOption("maxRefinements", &maxRefinements, "maximum # of refinements on each time slab");

  cmdp.setOption("previousSlabNumber", &previousSolutionTimeSlabNumber, "time slab number of previous solution");
  cmdp.setOption("previousSolution", &previousSolutionFile, "file with previous solution");
  cmdp.setOption("previousMesh", &previousMeshFile, "file with previous mesh");

  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
#ifdef HAVE_MPI
    return -1;

  int H1Order = k + 1;

  VarFactory varFactory;
  // traces:
  VarPtr qHat = varFactory.fluxVar("\\widehat{q}");

  // fields:
  VarPtr u = varFactory.fieldVar("u", L2);

  // test functions:
  VarPtr v = varFactory.testVar("v", HGRAD);

  FunctionPtr x = Function::xn(1);
  FunctionPtr y = Function::yn(1);

  FunctionPtr c;
  if (useConstantConvection)
    c = Function::vectorize(Function::constant(0.5), Function::constant(0.5), Function::constant(1.0));
    c = Function::vectorize(y-0.5, 0.5-x, Function::constant(1.0));
  FunctionPtr n = Function::normal();

  BFPtr bf = Teuchos::rcp( new BF(varFactory) );

  bf->addTerm( u, c * v->grad());
  bf->addTerm(qHat, v);

  double width = 2.0, height = 2.0;
  int horizontalCells = numCells, verticalCells = numCells;
  int depthCells = numTimeCells;
  double x0 = -0.5;
  double y0 = -0.5;
  double t0 = 0;

  double totalTime = 2.0 * PI;

  vector<double> frameTimes;
  for (int i=0; i<numFrames; i++)
    frameTimes.push_back((totalTime*i) / (numFrames-1));

  if (numTimeSlabs==-1)
    // want the number of grid points in temporal direction to be about 2000.  The temporal length is 2 * PI
    numTimeSlabs = (int) 2000 / k;
  double timeLengthPerSlab = totalTime / numTimeSlabs;

  if (rank==0)
    cout << "solving on " << numCells << " x " << numCells << " x " << numTimeCells << " mesh " << "of order " << k << ".\n";
    cout << "numTimeSlabs: " << numTimeSlabs << endl;

  SpatialFilterPtr inflowFilter  = Teuchos::rcp( new InflowFilterForClockwisePlanarRotation (x0,x0+width,y0,y0+height,0.5,0.5));

  vector<double> dimensions;

  vector<int> elementCounts(3);
  elementCounts[0] = horizontalCells;
  elementCounts[1] = verticalCells;
  elementCounts[2] = depthCells;

  vector<double> origin(3);
  origin[0] = x0;
  origin[1] = y0;
  origin[2] = t0;

  Teuchos::RCP<Solver> solver = Teuchos::rcp( new KluSolver );

  if (useMumpsIfAvailable) solver = Teuchos::rcp( new MumpsSolver );

//  double errorPercentage = 0.5; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
//  Teuchos::RCP<RefinementStrategy> refinementStrategy;
//  refinementStrategy = Teuchos::rcp( new ErrorPercentageRefinementStrategy( soln, errorPercentage ));

  if (maxRefinements != 0)
    cout << "Warning: maxRefinements is not 0, but the slice exporter implicitly assumes there won't be any refinements.\n";

  MeshPtr mesh;

  MeshPtr prevMesh;
  SolutionPtr prevSoln;

  mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

  if (rank==0) cout << "Initial mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";

  FunctionPtr sideParity = Function::sideParity();

  int lastFrameOutputted = -1;

  SolutionPtr soln;

  IPPtr ip;
  ip = bf->graphNorm();

  FunctionPtr u0 = Teuchos::rcp( new Cone_U0(0.0, 0.25, 0.1, 1.0, false) );

  BCPtr bc = BC::bc();
  bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
  bc->addDirichlet(qHat, SpatialFilter::matchingZ(t0), u0);

  MeshPtr initialMesh = mesh;

  int startingSlabNumber;
  if (previousSolutionTimeSlabNumber != -1)
    startingSlabNumber = previousSolutionTimeSlabNumber + 1;

    if (rank==0) cout << "Loading mesh from " << previousMeshFile << endl;

    prevMesh = MeshFactory::loadFromHDF5(bf, previousMeshFile);
    prevSoln = Solution::solution(mesh, bc, RHS::rhs(), ip); // include BC and IP objects for sake of condensed dof interpreter setup...

    if (rank==0) cout << "Loading solution from " << previousSolutionFile << endl;

    double tn = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_prev = Function::solution(qHat, prevSoln);
    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, prevMesh, mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    double t_slab_final = (previousSolutionTimeSlabNumber+1) * timeLengthPerSlab;
    int frameOrdinal = 0;

    while (frameTimes[frameOrdinal] < t_slab_final)
      lastFrameOutputted = frameOrdinal++;
    startingSlabNumber = 0;

  ostringstream dir_name;
  dir_name << "spacetime_slice_convectingCone_k" << k << "_startSlab" << startingSlabNumber;
  map<GlobalIndexType,GlobalIndexType> cellMap;
  MeshPtr meshSlice = MeshTools::timeSliceMesh(initialMesh, 0, cellMap, H1Order);
  HDF5Exporter sliceExporter(meshSlice,dir_name.str());

  soln = Solution::solution(mesh, bc, RHS::rhs(), ip);

  for(int timeSlab = startingSlabNumber; timeSlab<numTimeSlabs; timeSlab++)
    double energyThreshold = 0.2; // for mesh refinements: ask to refine elements that account for 80% of the error in each step
    Teuchos::RCP<RefinementStrategy> refinementStrategy;
    refinementStrategy = Teuchos::rcp( new RefinementStrategy( soln, energyThreshold ));

    FunctionPtr u_spacetime = Function::solution(u, soln);

    double relativeEnergyError;
    int refNumber = 0;

//    {
//      // DEBUGGING: just to try running the time slicing:
//      double t_slab_final = (timeStep+1) * timeLengthPerSlab;
//      int frameOrdinal = lastFrameOutputted + 1;
//      while (frameTimes[frameOrdinal] < t_slab_final) {
//        FunctionPtr u_spacetime = Function::solution(u, soln);
//        ostringstream dir_name;
//        dir_name << "spacetime_slice_convectingCone_k" << k;
//        MeshTools::timeSliceExport(dir_name.str(), mesh, u_spacetime, frameTimes[frameOrdinal], "u_slice");
//        cout << "Exported frame " << frameOrdinal << ", t=" << frameTimes[frameOrdinal] << endl;
//        frameOrdinal++;
//      }
//    }


      ostringstream dir_name;
      dir_name << "spacetime_convectingCone_k" << k << "_t" << timeSlab;
      HDF5Exporter exporter(soln->mesh(),dir_name.str());
      exporter.exportSolution(soln, varFactory);

      if (rank==0) cout << "Exported HDF solution for time slab to directory " << dir_name.str() << endl;
//      string u_name = "u_spacetime";
//      exporter.exportFunction(u_spacetime, u_name);

      ostringstream file_name;
      file_name << dir_name.str();

      bool saveSolutionAndMeshForThisSlab = ((timeSlab + 1) % checkPointFrequency == 0); // +1 so that first output is nth, not first
      if (saveSolutionAndMeshForThisSlab)
        dir_name << ".soln";
        if (rank==0) cout << endl << "wrote " << dir_name.str() << endl;

        file_name << ".mesh";
      FunctionPtr u_soln = Function::solution(u, soln);

      double solnNorm = u_soln->l2norm(mesh);

      double energyError = soln->energyErrorTotal();
      relativeEnergyError = energyError / solnNorm;

      if (rank==0)
        cout << "Relative energy error for refinement " << refNumber++ << ": " << relativeEnergyError << endl;

      if ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements))
        if (rank==0)
          cout << "After refinement, mesh has " << mesh->getTopology()->activeCellCount() << " active (leaf) cells " << "and " << mesh->globalDofCount() << " degrees of freedom.\n";

    while ((relativeEnergyError > refinementTolerance) && (refNumber < maxRefinements));

    double t_slab_final = (timeSlab+1) * timeLengthPerSlab;
    int frameOrdinal = lastFrameOutputted + 1;
    vector<double> timesForSlab;
    while (frameTimes[frameOrdinal] < t_slab_final)
      double t = frameTimes[frameOrdinal];
      if (rank==0) cout << "exporting t=" << t << " on slab " << timeSlab << endl;
      FunctionPtr sliceFunction = MeshTools::timeSliceFunction(mesh, cellMap, u_spacetime, t);
      sliceExporter.exportFunction(sliceFunction, "u_slice", t);
      lastFrameOutputted = frameOrdinal++;

    // set up next mesh/solution:
    FunctionPtr q_prev = Function::solution(qHat, soln);

//    cout << "Error in setup of q_prev: simple solution doesn't know about the map from the previous time slab to the current one. (TODO: fix this.)\n";

    double tn = (timeSlab+1) * timeLengthPerSlab;
    origin[2] = tn;
    mesh = MeshFactory::rectilinearMesh(bf, dimensions, elementCounts, H1Order, delta_k, origin);

    FunctionPtr q_transfer = Teuchos::rcp( new MeshTransferFunction(-q_prev, soln->mesh(), mesh, tn) ); // negate because the normals go in opposite directions

    bc = BC::bc();
    bc->addDirichlet(qHat, inflowFilter, Function::zero()); // zero BCs enforced at the inflow boundary.
    bc->addDirichlet(qHat, SpatialFilter::matchingZ(tn), q_transfer);

    // IMPORTANT: now that we are ready to step to next soln, nullify BC.  If we do not do this, then we have an RCP chain
    //            that extends back to the first time slab, effectively a memory leak.

    soln = Solution::solution(mesh, bc, RHS::rhs(), ip);

  return 0;