Esempio n. 1
0
int main(int argc, char *argv[])
{

  using std::endl;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ParameterList;
  using Teuchos::CommandLineProcessor;
  
  bool result, success = true;

  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
  epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
  epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

  RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

  try {

    //
    // Read commandline options
    //

    CommandLineProcessor clp;
    clp.throwExceptions(false);
    clp.addOutputSetupOptions(true);

    Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
    setVerbosityLevelOption( "verb-level", &verbLevel,
      "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
      &clp );

    bool dumpFinalSolutions = false;
    clp.setOption(
      "dump-final-solutions", "no-dump-final-solutions", &dumpFinalSolutions,
      "Determine if the final solutions are dumpped or not." );
    
    CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
    if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;
    
    if ( Teuchos::VERB_DEFAULT == verbLevel )
      verbLevel = Teuchos::VERB_LOW;

    const Teuchos::EVerbosityLevel
      solnVerbLevel = ( dumpFinalSolutions ? Teuchos::VERB_EXTREME : verbLevel );

    //
    // Get the base parameter list that all other parameter lists will be read
    // from.
    //
    
    RCP<ParameterList>
      paramList = Teuchos::parameterList();

    //
    // Create the underlying EpetraExt::ModelEvaluator
    //

    RCP<LorenzModel>
      lorenzModel = rcp(new LorenzModel( epetra_comm, *paramList ));

    //
    // Create the Thyra-wrapped ModelEvaluator
    //
    
    RCP<Thyra::ModelEvaluator<double> >
      thyraLorenzModel = Thyra::epetraModelEvaluator(lorenzModel,Teuchos::null);

    //
    // Create the Rythmos GAASP ErrorEstimator
    //

    RCP<Rythmos::GAASPErrorEstimator> gaaspEE = rcp(new Rythmos::GAASPErrorEstimator);
    gaaspEE->setModel(thyraLorenzModel);
    //gaaspEE->setQuantityOfInterest( AVERAGE_ERROR_QTY );  // Not passed through yet.
    Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
    pl->sublist("GAASP Interface Parameters").set<double>("eTime",1.0);
    pl->sublist("GAASP Interface Parameters").set<double>("timeStep",0.1);
    //pl->sublist("GAASP Interface Parameters").set<double>("timeStep",0.5);
    gaaspEE->setParameterList(pl);

    //RCP<const Rythmos::ErrorEstimateBase<double> > error = gaaspEE->getErrorEstimate();

    //double uTOL = 1.0e-8;
    double uTOL = 1.0e-2;
    RCP<const Rythmos::ErrorEstimateBase<double> > error = gaaspEE->controlGlobalError(uTOL);


    double err = error->getTotalError();
    out->precision(15);
    *out << "err = " << err << std::endl;

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

//  if(success)
//    *out << "\nEnd Result: TEST PASSED" << endl;
//  else
//    *out << "\nEnd Result: TEST FAILED" << endl;
  
  return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]
Esempio n. 2
0
TEUCHOS_UNIT_TEST(Belos_Hypre, Laplace2D){
  const double tol = 1E-7;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ParameterList;
  typedef Belos::LinearProblem<double,Epetra_MultiVector,Epetra_Operator>  LinearProblem;

  //
  // Create Laplace2D
  //
#ifdef HAVE_MPI
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm();
#endif
  Teuchos::ParameterList GaleriList;
  int nx = 10 * Comm.NumProc();
  int ny = 10 * Comm.NumProc();
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);
  Epetra_Map Map(nx*ny,0,Comm);
  RCP<Epetra_CrsMatrix> Crs_Matrix   = rcp(Galeri::CreateCrsMatrix("Laplace2D", &Map, GaleriList));
  int NumProc = Crs_Matrix->Comm().NumProc();

  //
  // Create the hypre preconditioner
  //
  RCP<Ifpack_Hypre> preconditioner = rcp(new Ifpack_Hypre(Crs_Matrix.get()));
  TEST_EQUALITY(preconditioner->Initialize(),0);
  TEST_EQUALITY(preconditioner->SetParameter(Preconditioner, ParaSails),0); // Use a Euclid Preconditioner (but not really used)
  TEST_EQUALITY(preconditioner->SetParameter(Preconditioner),0); // Solve the problem
  TEST_EQUALITY(preconditioner->Compute(),0);

  //
  // Create the solution vector and rhs
  //
  int numVec = 1;
  RCP<Epetra_MultiVector> X = rcp(new Epetra_MultiVector(Crs_Matrix->OperatorDomainMap(), numVec));
  RCP<Epetra_MultiVector> KnownX = rcp(new Epetra_MultiVector(Crs_Matrix->OperatorDomainMap(), numVec));
  KnownX->Random();
  RCP<Epetra_MultiVector> B = rcp(new Epetra_MultiVector(Crs_Matrix->OperatorRangeMap(), numVec));
  Crs_Matrix->Apply(*KnownX, *B);

  //
  // Test the EpetraExt wrapper
  // amk November 24, 2015: Should we deprecate this?
  //
//  RCP<ParameterList> pl = rcp(new ParameterList());
//  TEST_EQUALITY(X->PutScalar(0.0),0);
//  HYPRE_IJMatrix hypre_mat = preconditioner->HypreMatrix();
//  RCP<EpetraExt_HypreIJMatrix> Hyp_Matrix = rcp(new EpetraExt_HypreIJMatrix(hypre_mat));
//  TEST_EQUALITY(Hyp_Matrix->SetParameter(Preconditioner, ParaSails),0);
//  TEST_EQUALITY(Hyp_Matrix->SetParameter(Preconditioner),0);
//  TEST_EQUALITY(EquivalentMatrices(*Hyp_Matrix, *Crs_Matrix, tol), true);
//  RCP<LinearProblem> problem1 = rcp(new LinearProblem(Crs_Matrix,X,B));
//  problem1->setLeftPrec(Hyp_Matrix);
//  TEST_EQUALITY(problem1->setProblem(),true);
//  Belos::PseudoBlockGmresSolMgr<double,Epetra_MultiVector,Epetra_Operator> solMgr1(problem1,pl);
//  Belos::ReturnType rv1 = solMgr1.solve(); // TEST_EQUALITY(solMgr2.solve(),Belos::Converged);
//  TEST_EQUALITY(rv1,Belos::Converged);
//  TEST_EQUALITY(EquivalentVectors(*X, *KnownX, tol*10*pow(10.0,NumProc)), true);

  //
  // Test the Ifpack hypre interface
  //
  RCP<ParameterList> pl2 = rcp(new ParameterList());
  RCP<Epetra_Operator> invOp = rcp(new Epetra_InvOperator(preconditioner.get()));
  TEST_EQUALITY(X->PutScalar(0.0),0);
  RCP<LinearProblem> problem2 = rcp(new LinearProblem(Crs_Matrix,X,B));
  problem2->setLeftPrec(invOp);
  TEST_EQUALITY(problem2->setProblem(),true);
  Belos::PseudoBlockGmresSolMgr<double,Epetra_MultiVector,Epetra_Operator> solMgr2(problem2,pl2);
  Belos::ReturnType rv2 = solMgr2.solve(); // TEST_EQUALITY(solMgr2.solve(),Belos::Converged);
  TEST_EQUALITY(rv2,Belos::Converged);
  TEST_EQUALITY(EquivalentVectors(*X, *KnownX, tol*10*pow(10.0,NumProc)), true);
}
TEUCHOS_UNIT_TEST(tIterativePreconditionerFactory, parameter_list_init)
{
   // build global (or serial communicator)
   #ifdef HAVE_MPI
      Epetra_MpiComm Comm(MPI_COMM_WORLD);
   #else
      Epetra_SerialComm Comm;
   #endif

   Teko::LinearOp  A = build2x2(Comm,1,2,3,4);

   Thyra::LinearOpTester<double> tester;
   tester.show_all_tests(true);

   {
      RCP<Teuchos::ParameterList> pl = buildLibPL(4,"Amesos");
      RCP<Teko::IterativePreconditionerFactory> precFact = rcp(new Teko::IterativePreconditionerFactory());
      RCP<Teko::InverseFactory> invFact = rcp(new Teko::PreconditionerInverseFactory(precFact,Teuchos::null));

      try {
         precFact->initializeFromParameterList(*pl);
         out << "Passed correct parameter list" << std::endl;

         Teko::LinearOp prec = Teko::buildInverse(*invFact,A);
      }
      catch(...) {
         success = false; 
         out << "Failed correct parameter list" << std::endl;
      }
   }

   {
      Teuchos::ParameterList pl;
      pl.set("Preconditioner Type","Amesos");

      RCP<Teko::IterativePreconditionerFactory> precFact = rcp(new Teko::IterativePreconditionerFactory());
      RCP<Teko::InverseFactory> invFact = rcp(new Teko::PreconditionerInverseFactory(precFact,Teuchos::null));

      try {
         precFact->initializeFromParameterList(pl);
         out << "Passed iteration count" << std::endl;

         Teko::LinearOp prec = Teko::buildInverse(*invFact,A);
      }
      catch(...) {
         out << "Failed iteration count" << std::endl;
      }
   }

   {
      Teuchos::ParameterList pl;
      pl.set("Iteration Count",4);
      pl.set("Precondiioner Type","Amesos");

      RCP<Teko::IterativePreconditionerFactory> precFact = rcp(new Teko::IterativePreconditionerFactory());

      try {
         precFact->initializeFromParameterList(pl);
         success = false;
         out << "Failed preconditioner type" << std::endl;

         // these should not be executed
         RCP<Teko::InverseFactory> invFact = rcp(new Teko::PreconditionerInverseFactory(precFact,Teuchos::null));
         Teko::LinearOp prec = Teko::buildInverse(*invFact,A);
      }
      catch(const std::exception & exp) {
         out << "Passed preconditioner type" << std::endl;
      }
   }
}
Esempio n. 4
0
int main(int argc, char *argv[]) {
  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  typedef double Scalar;
  typedef Teuchos::ScalarTraits<Scalar>::magnitudeType Magnitude;
  typedef Tpetra::Map<>::local_ordinal_type LO;
  typedef Tpetra::Map<>::global_ordinal_type GO;
  typedef Tpetra::DefaultPlatform::DefaultPlatformType Platform;

  typedef Tpetra::CrsMatrix<Scalar,LO,GO> MAT;
  typedef Tpetra::MultiVector<Scalar,LO,GO> MV;

  using Tpetra::global_size_t;
  using Teuchos::tuple;
  using Teuchos::RCP;
  using Teuchos::rcp;

  Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  Teuchos::RCP<const Teuchos::Comm<int> > comm = platform.getComm();
  size_t myRank = comm->getRank();

  RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));

  *fos << Amesos2::version() << std::endl << std::endl;

  bool printMatrix   = false;
  bool printSolution = false;
  bool printTiming   = false;
  bool printResidual = false;
  bool printLUStats  = false;
  bool verbose       = false;
  std::string solver_name = "SuperLU";
  std::string filedir = "../test/matrices/";
  std::string filename = "arc130.mtx";
  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("filedir",&filedir,"Directory where matrix-market files are located");
  cmdp.setOption("filename",&filename,"Filename for Matrix-Market test matrix.");
  cmdp.setOption("print-matrix","no-print-matrix",&printMatrix,"Print the full matrix after reading it.");
  cmdp.setOption("print-solution","no-print-solution",&printSolution,"Print solution vector after solve.");
  cmdp.setOption("print-timing","no-print-timing",&printTiming,"Print solver timing statistics");
  cmdp.setOption("print-residual","no-print-residual",&printResidual,"Print solution residual");
  cmdp.setOption("print-lu-stats","no-print-lu-stats",&printLUStats,"Print nnz in L and U factors");
  cmdp.setOption("solver", &solver_name, "Which TPL solver library to use.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }

  // Before we do anything, check that the solver is enabled
  if( !Amesos2::query(solver_name) ){
    std::cerr << solver_name << " not enabled.  Exiting..." << std::endl;
    return EXIT_SUCCESS;        // Otherwise CTest will pick it up as
                                // failure, which it isn't really
  }

  const size_t numVectors = 1;

  // create a Map
  global_size_t nrows = 6;
  RCP<Tpetra::Map<LO,GO> > map
    = rcp( new Tpetra::Map<LO,GO>(nrows,0,comm) );

  std::string mat_pathname = filedir + filename;
  RCP<MAT> A = Tpetra::MatrixMarket::Reader<MAT>::readSparseFile(mat_pathname,comm);

  if( printMatrix ){
    A->describe(*fos, Teuchos::VERB_EXTREME);
  }
  else if( verbose && myRank==0 ){
    *fos << std::endl << A->description() << std::endl << std::endl;
  }

  // get the maps
  RCP<const Tpetra::Map<LO,GO> > dmnmap = A->getDomainMap();
  RCP<const Tpetra::Map<LO,GO> > rngmap = A->getRangeMap();

  // Create random X
  RCP<MV> Xhat = rcp( new MV(dmnmap,numVectors) );
  RCP<MV> X = rcp( new MV(dmnmap,numVectors) );
  X->setObjectLabel("X");
  Xhat->setObjectLabel("Xhat");
  X->randomize();

  RCP<MV> B = rcp(new MV(rngmap,numVectors));
  A->apply(*X, *B);

  // Constructor from Factory
  RCP<Amesos2::Solver<MAT,MV> > solver;
  try{
    solver = Amesos2::create<MAT,MV>(solver_name, A, Xhat, B);
  } catch (std::invalid_argument e){
    *fos << e.what() << std::endl;
    return 0;
  }

  #ifdef SHYLU_NODEBASKER
  if( Amesos2::query("Basker") ) {
    Teuchos::ParameterList amesos2_params("Amesos2");
      amesos2_params.sublist(solver_name).set("num_threads", 1, "Number of threads");
    solver->setParameters( Teuchos::rcpFromRef(amesos2_params) );
  }
  #endif

  solver->numericFactorization();

  if( printLUStats && myRank == 0 ){
    Amesos2::Status solver_status = solver->getStatus();
    *fos << "L+U nnz = " << solver_status.getNnzLU() << std::endl;
  }

  solver->solve();

  if( printSolution ){
    // Print the solution
    Xhat->describe(*fos,Teuchos::VERB_EXTREME);
    X->describe(*fos,Teuchos::VERB_EXTREME);
  }

  if( printTiming ){
    // Print some timing statistics
    solver->printTiming(*fos);
  }

  if( printResidual ){
    Teuchos::Array<Magnitude> xhatnorms(numVectors);
    Xhat->update(-1.0, *X, 1.0);
    Xhat->norm2(xhatnorms());
    if( myRank == 0 ){
      *fos << "Norm2 of Ax - b = " << xhatnorms << std::endl;
    }
  }

  return EXIT_SUCCESS;
}
int main(int argc, char *argv[]) {
  //
  int MyPID = 0;
#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  MyPID = Comm.MyPID();
#else
  Epetra_SerialComm Comm;
#endif
  //
  typedef double                            ST;
  typedef Teuchos::ScalarTraits<ST>        SCT;
  typedef SCT::magnitudeType                MT;
  typedef Epetra_MultiVector                MV;
  typedef Epetra_Operator                   OP;
  typedef Belos::MultiVecTraits<ST,MV>     MVT;
  typedef Belos::OperatorTraits<ST,MV,OP>  OPT;

  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

  bool success = false;
  bool verbose = false;
  try {
    bool proc_verbose = false;
    int frequency = -1;        // frequency of status test output.
    int blocksize = 1;         // blocksize
    int numrhs = 1;            // number of right-hand sides to solve for
    int maxrestarts = 15;      // maximum number of restarts allowed
    int maxiters = -1;         // maximum number of iterations allowed per linear system
    int maxsubspace = 25;      // maximum number of blocks the solver can use for the subspace
    std::string filename("orsirr1.hb");
    MT tol = 1.0e-5;           // relative residual tolerance

    // Specify whether to use RHS as initial guess. If false, use zero.
    bool useRHSAsInitialGuess = false;

    Teuchos::CommandLineProcessor cmdp(false,true);
    cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
    cmdp.setOption("use-rhs","use-zero",&useRHSAsInitialGuess,"Use RHS as initial guess.");
    cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
    cmdp.setOption("filename",&filename,"Filename for test matrix.  Acceptable file extensions: *.hb,*.mtx,*.triU,*.triS");
    cmdp.setOption("tol",&tol,"Relative residual tolerance used by GMRES solver.");
    cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
    cmdp.setOption("block-size",&blocksize,"Block size used by GMRES.");
    cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
    cmdp.setOption("max-subspace",&maxsubspace,"Maximum number of blocks the solver can use for the subspace.");
    cmdp.setOption("max-restarts",&maxrestarts,"Maximum number of restarts allowed for GMRES solver.");
    if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      return EXIT_FAILURE;
    }

    if (!verbose)
      frequency = -1;  // reset frequency if test is not verbose

    //
    // *************Get the problem*********************
    //
    RCP<Epetra_Map> Map;
    RCP<Epetra_CrsMatrix> A;
    RCP<Epetra_MultiVector> B, X;
    RCP<Epetra_Vector> vecB, vecX;
    EpetraExt::readEpetraLinearSystem(filename, Comm, &A, &Map, &vecX, &vecB);
    A->OptimizeStorage();
    proc_verbose = verbose && (MyPID==0);  /* Only print on the zero processor */

    // Check to see if the number of right-hand sides is the same as requested.
    if (numrhs>1) {
      X = rcp( new Epetra_MultiVector( *Map, numrhs ) );
      B = rcp( new Epetra_MultiVector( *Map, numrhs ) );
      X->Random();
      OPT::Apply( *A, *X, *B );
      X->PutScalar( 0.0 );
    }
    else {
      X = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecX);
      B = Teuchos::rcp_implicit_cast<Epetra_MultiVector>(vecB);
    }

    // If requested, use a copy of B as initial guess
    if (useRHSAsInitialGuess)
    {
      X->Update(1.0, *B, 0.0);
    }

    //
    // ************Construct preconditioner*************
    //
    ParameterList ifpackList;

    // allocates an IFPACK factory. No data is associated
    // to this object (only method Create()).
    Ifpack Factory;

    // create the preconditioner. For valid PrecType values,
    // please check the documentation
    std::string PrecType = "ILU"; // incomplete LU
    int OverlapLevel = 1; // must be >= 0. If Comm.NumProc() == 1,
    // it is ignored.

    RCP<Ifpack_Preconditioner> Prec = Teuchos::rcp( Factory.Create(PrecType, &*A, OverlapLevel) );
    assert(Prec != Teuchos::null);

    // specify parameters for ILU
    ifpackList.set("fact: level-of-fill", 1);
    // the combine mode is on the following:
    // "Add", "Zero", "Insert", "InsertAdd", "Average", "AbsMax"
    // Their meaning is as defined in file Epetra_CombineMode.h
    ifpackList.set("schwarz: combine mode", "Add");
    // sets the parameters
    IFPACK_CHK_ERR(Prec->SetParameters(ifpackList));

    // initialize the preconditioner. At this point the matrix must
    // have been FillComplete()'d, but actual values are ignored.
    IFPACK_CHK_ERR(Prec->Initialize());

    // Builds the preconditioners, by looking for the values of
    // the matrix.
    IFPACK_CHK_ERR(Prec->Compute());

    // Create the Belos preconditioned operator from the Ifpack preconditioner.
    // NOTE:  This is necessary because Belos expects an operator to apply the
    //        preconditioner with Apply() NOT ApplyInverse().
    RCP<Belos::EpetraPrecOp> belosPrec = rcp( new Belos::EpetraPrecOp( Prec ) );

    //
    // *****Create parameter list for the block GMRES solver manager*****
    //
    const int NumGlobalElements = B->GlobalLength();
    if (maxiters == -1)
      maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run
    //
    ParameterList belosList;
    belosList.set( "Flexible Gmres", true );               // Flexible Gmres will be used to solve this problem
    belosList.set( "Num Blocks", maxsubspace );            // Maximum number of blocks in Krylov factorization
    belosList.set( "Block Size", blocksize );              // Blocksize to be used by iterative solver
    belosList.set( "Maximum Iterations", maxiters );       // Maximum number of iterations allowed
    belosList.set( "Maximum Restarts", maxrestarts );      // Maximum number of restarts allowed
    belosList.set( "Convergence Tolerance", tol );         // Relative convergence tolerance requested
    if (numrhs > 1) {
      belosList.set( "Show Maximum Residual Norm Only", true );  // Show only the maximum residual norm
    }
    if (verbose) {
      belosList.set( "Verbosity", Belos::Errors + Belos::Warnings +
          Belos::TimingDetails + Belos::StatusTestDetails );
      if (frequency > 0)
        belosList.set( "Output Frequency", frequency );
    }
    else
      belosList.set( "Verbosity", Belos::Errors + Belos::Warnings );
    //
    // *******Construct a preconditioned linear problem********
    //
    RCP<Belos::LinearProblem<double,MV,OP> > problem
      = rcp( new Belos::LinearProblem<double,MV,OP>( A, X, B ) );
    problem->setRightPrec( belosPrec );

    bool set = problem->setProblem();
    if (set == false) {
      if (proc_verbose)
        std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return EXIT_FAILURE;
    }

    // Create an iterative solver manager.
    RCP< Belos::SolverManager<double,MV,OP> > solver
      = rcp( new Belos::BlockGmresSolMgr<double,MV,OP>(problem, rcp(&belosList,false)));

    //
    // *******************************************************************
    // *************Start the block Gmres iteration*************************
    // *******************************************************************
    //
    if (proc_verbose) {
      std::cout << std::endl << std::endl;
      std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
      std::cout << "Number of right-hand sides: " << numrhs << std::endl;
      std::cout << "Block size used by solver: " << blocksize << std::endl;
      std::cout << "Number of restarts allowed: " << maxrestarts << std::endl;
      std::cout << "Max number of Gmres iterations per restart cycle: " << maxiters << std::endl;
      std::cout << "Relative residual tolerance: " << tol << std::endl;
      std::cout << std::endl;
    }
    //
    // Perform solve
    //
    Belos::ReturnType ret = solver->solve();
    //
    // Compute actual residuals.
    //
    bool badRes = false;
    std::vector<double> actual_resids( numrhs );
    std::vector<double> rhs_norm( numrhs );
    Epetra_MultiVector resid(*Map, numrhs);
    OPT::Apply( *A, *X, resid );
    MVT::MvAddMv( -1.0, resid, 1.0, *B, resid );
    MVT::MvNorm( resid, actual_resids );
    MVT::MvNorm( *B, rhs_norm );
    if (proc_verbose) {
      std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
      for ( int i=0; i<numrhs; i++) {
        double actRes = actual_resids[i]/rhs_norm[i];
        std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
        if (actRes > tol) badRes = true;
      }
    }

    success = ret==Belos::Converged && !badRes;
    if (success) {
      if (proc_verbose)
        std::cout << "End Result: TEST PASSED" << std::endl;
    } else {
      if (proc_verbose)
        std::cout << "End Result: TEST FAILED" << std::endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Esempio n. 6
0
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  typedef Tpetra::Vector<SC,LO,GO,NO>                  TVEC;
  typedef Tpetra::MultiVector<SC,LO,GO,NO>             TMV;
  typedef Tpetra::CrsMatrix<SC,LO,GO,NO,LMO>           TCRS;
  typedef Xpetra::CrsMatrix<SC,LO,GO,NO,LMO>           XCRS;
  typedef Xpetra::TpetraCrsMatrix<SC,LO,GO,NO,LMO>     XTCRS;
  typedef Xpetra::Matrix<SC,LO,GO,NO,LMO>              XMAT;
  typedef Xpetra::CrsMatrixWrap<SC,LO,GO,NO,LMO>       XWRAP;

  typedef Belos::OperatorT<TMV>                        TOP;
  typedef Belos::OperatorTraits<SC,TMV,TOP>            TOPT;
  typedef Belos::MultiVecTraits<SC,TMV>                TMVT;
  typedef Belos::LinearProblem<SC,TMV,TOP>             TProblem;
  typedef Belos::SolverManager<SC,TMV,TOP>             TBelosSolver;
  typedef Belos::BlockGmresSolMgr<SC,TMV,TOP>          TBelosGMRES;

  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  Teuchos::CommandLineProcessor clp(false);

  GO nx,ny,nz;
  nx=100; ny=100; nz=100;
  double stretchx, stretchy, stretchz, h, delta;
  stretchx=1.0; stretchy=1.0; stretchz=1.0;
  h=0.01; delta=2.0;
  int PMLXL, PMLXR, PMLYL, PMLYR, PMLZL, PMLZR;
  PMLXL=10; PMLXR=10; PMLYL=10; PMLYR=10; PMLZL=10; PMLZR=10;
  double omega, shift;
  omega=20.0*M_PI;
  shift=0.5;

  Galeri::Xpetra::Parameters<GO> matrixParameters(clp, nx, ny, nz, "Helmholtz1D", 0, stretchx, stretchy, stretchz,
						  h, delta, PMLXL, PMLXR, PMLYL, PMLYR, PMLZL, PMLZR, omega, shift);
  Xpetra::Parameters             xpetraParameters(clp);

  RCP<TimeMonitor> globalTimeMonitor = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));
  RCP<TimeMonitor> tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")));

  Teuchos::ParameterList pl = matrixParameters.GetParameterList();
  RCP<MultiVector> coordinates;
  Teuchos::ParameterList galeriList;
  galeriList.set("nx", pl.get("nx", nx));
  galeriList.set("ny", pl.get("ny", ny));
  galeriList.set("nz", pl.get("nz", nz));
  RCP<const Map> map;

  if (matrixParameters.GetMatrixType() == "Helmholtz1D") {
    map = MapFactory::Build(xpetraParameters.GetLib(), matrixParameters.GetNumGlobalElements(), 0, comm);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("1D", map, matrixParameters.GetParameterList());
  }
  else if (matrixParameters.GetMatrixType() == "Helmholtz2D") {
    map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("2D", map, matrixParameters.GetParameterList());
  }
  else if (matrixParameters.GetMatrixType() == "Helmholtz3D") {
    map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList);
    coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC, LO, GO, Map, MultiVector>("3D", map, matrixParameters.GetParameterList());
  }

  RCP<const Tpetra::Map<LO, GO, NO> > tmap = Xpetra::toTpetra(map);

  Teuchos::ParameterList matrixParams = matrixParameters.GetParameterList();

  // Build problem
  RCP<Galeri::Xpetra::Problem_Helmholtz<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem_Helmholtz<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(matrixParameters.GetMatrixType(), map, matrixParams);
  RCP<Matrix> A = Pr->BuildMatrix();

  RCP<MultiVector> nullspace = MultiVectorFactory::Build(map,1);
  nullspace->putScalar( (SC) 1.0);
 
  comm->barrier();

  tm = Teuchos::null;

  // Construct a multigrid preconditioner
  tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup")));

  // Multigrid Hierarchy
  RCP<Hierarchy> H = rcp(new Hierarchy(A));
  H->GetLevel(0)->Set("Nullspace",nullspace);
  FactoryManager Manager;
  H->Setup(Manager, 0, 5);
  //H->Write(-1,-1);

  tm = Teuchos::null;

  // Solve Ax = b
  tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - LHS and RHS initialization")));
  RCP<TVEC> X = Tpetra::createVector<SC,LO,GO,NO>(tmap);
  RCP<TVEC> B = Tpetra::createVector<SC,LO,GO,NO>(tmap);  
  X->putScalar((SC) 0.0);
  B->putScalar((SC) 0.0);
  if(comm->getRank()==0) {
    B->replaceGlobalValue(0, (SC) 1.0);
  }

  tm = Teuchos::null;

  tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Belos Solve")));

  // Define Operator and Preconditioner
  RCP<TOP> belosOp   = rcp(new Belos::XpetraOp<SC,LO,GO,NO,LMO> (A) );    // Turns a Xpetra::Matrix object into a Belos operator
  RCP<TOP> belosPrec = rcp(new Belos::MueLuOp<SC,LO,GO,NO,LMO>  (H) );    // Turns a MueLu::Hierarchy object into a Belos operator

  // Construct a Belos LinearProblem object
  RCP<TProblem> belosProblem = rcp(new TProblem(belosOp,X,B));
  belosProblem->setRightPrec(belosPrec); 
  bool set = belosProblem->setProblem();
  if (set == false) {
    if(comm->getRank()==0)
      std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;    
    return EXIT_FAILURE;
  }
    
  // Belos parameter list
  int maxIts = 100;
  double tol = 1e-6;
  Teuchos::ParameterList belosList;
  belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
  belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
  belosList.set("Flexible Gmres", false);         // set flexible GMRES on/off
  belosList.set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
  belosList.set("Output Frequency",1);
  belosList.set("Output Style",Belos::Brief);

  // Create solver manager
  RCP<TBelosSolver> solver = rcp( new TBelosGMRES(belosProblem, rcp(&belosList, false)) );

  // Perform solve
  Belos::ReturnType ret=Belos::Unconverged;
  try {
    ret = solver->solve();
    if (comm->getRank() == 0)
      std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;
  }

  catch(...) {
    if (comm->getRank() == 0)
      std::cout << std::endl << "ERROR:  Belos threw an error! " << std::endl;
  }
  
  // Check convergence
  if (ret != Belos::Converged) {
    if (comm->getRank() == 0) std::cout << std::endl << "ERROR:  Belos did not converge! " << std::endl;
  } else {
    if (comm->getRank() == 0) std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
  }

  // Get the number of iterations for this solve.
  if(comm->getRank()==0)
    std::cout << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;
 
  tm = Teuchos::null;

  globalTimeMonitor = Teuchos::null;

  TimeMonitor::summarize();

} //main
Esempio n. 7
0
int main(int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ArrayRCP;
  using Teuchos::TimeMonitor;
  using Teuchos::ParameterList;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  typedef Teuchos::ScalarTraits<SC> STS;
  SC zero = STS::zero(), one = STS::one();

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  Teuchos::CommandLineProcessor clp(false);

  GO nx = 100, ny = 100, nz = 100;
  Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case
  Xpetra::Parameters             xpetraParameters(clp);                          // manage parameters of Xpetra

  std::string xmlFileName       = "scalingTest.xml"; clp.setOption("xml",                   &xmlFileName,      "read parameters from a file [default = 'scalingTest.xml']");
  bool        printTimings      = true;              clp.setOption("timings", "notimings",  &printTimings,     "print timings to screen");
  int         writeMatricesOPT  = -2;                clp.setOption("write",                 &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)");
  std::string dsolveType        = "cg", solveType;   clp.setOption("solver",                &dsolveType,       "solve type: (none | cg | gmres | standalone)");
  double      dtol              = 1e-12, tol;        clp.setOption("tol",                   &dtol,             "solver convergence tolerance");

  std::string mapFile;                               clp.setOption("map",                   &mapFile,          "map data file");
  std::string matrixFile;                            clp.setOption("matrix",                &matrixFile,       "matrix data file");
  std::string coordFile;                             clp.setOption("coords",                &coordFile,        "coordinates data file");
  int         numRebuilds       = 0;                 clp.setOption("rebuild",               &numRebuilds,      "#times to rebuild hierarchy");
  int         maxIts            = 200;               clp.setOption("its",                   &maxIts,           "maximum number of solver iterations");
  bool        scaleResidualHistory = true;              clp.setOption("scale", "noscale",  &scaleResidualHistory, "scaled Krylov residual history");

  switch (clp.parse(argc, argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
  }

  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  ParameterList paramList;
  Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr<ParameterList>(&paramList), *comm);
  bool isDriver = paramList.isSublist("Run1");
  if (isDriver) {
    // update galeriParameters with the values from the XML file
    ParameterList& realParams = galeriParameters.GetParameterList();

    for (ParameterList::ConstIterator it = realParams.begin(); it != realParams.end(); it++) {
      const std::string& name = realParams.name(it);
      if (paramList.isParameter(name))
        realParams.setEntry(name, paramList.getEntry(name));
    }
  }

  // Retrieve matrix parameters (they may have been changed on the command line)
  // [for instance, if we changed matrix type from 2D to 3D we need to update nz]
  ParameterList galeriList = galeriParameters.GetParameterList();

  // =========================================================================
  // Problem construction
  // =========================================================================
  std::ostringstream galeriStream;
  comm->barrier();
  RCP<TimeMonitor> globalTimeMonitor = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: S - Global Time")));
  RCP<TimeMonitor> tm                = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1 - Matrix Build")));

  RCP<Matrix>      A;
  RCP<const Map>   map;
  RCP<MultiVector> coordinates;
  RCP<MultiVector> nullspace;
  if (matrixFile.empty()) {
    galeriStream << "========================================================\n" << xpetraParameters << galeriParameters;

    // Galeri will attempt to create a square-as-possible distribution of subdomains di, e.g.,
    //                                 d1  d2  d3
    //                                 d4  d5  d6
    //                                 d7  d8  d9
    //                                 d10 d11 d12
    // A perfect distribution is only possible when the #processors is a perfect square.
    // This *will* result in "strip" distribution if the #processors is a prime number or if the factors are very different in
    // size. For example, np=14 will give a 7-by-2 distribution.
    // If you don't want Galeri to do this, specify mx or my on the galeriList.
    std::string matrixType = galeriParameters.GetMatrixType();

    // Create map and coordinates
    // In the future, we hope to be able to first create a Galeri problem, and then request map and coordinates from it
    // At the moment, however, things are fragile as we hope that the Problem uses same map and coordinates inside
    if (matrixType == "Laplace1D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian1D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", map, galeriList);

    } else if (matrixType == "Laplace2D" || matrixType == "Star2D" ||
               matrixType == "BigStar2D" || matrixType == "Elasticity2D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian2D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("2D", map, galeriList);

    } else if (matrixType == "Laplace3D" || matrixType == "Brick3D" || matrixType == "Elasticity3D") {
      map = Galeri::Xpetra::CreateMap<LO, GO, Node>(xpetraParameters.GetLib(), "Cartesian3D", comm, galeriList);
      coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("3D", map, galeriList);
    }

    // Expand map to do multiple DOF per node for block problems
    if (matrixType == "Elasticity2D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 2);
    if (matrixType == "Elasticity3D")
      map = Xpetra::MapFactory<LO,GO,Node>::Build(map, 3);

    galeriStream << "Processor subdomains in x direction: " << galeriList.get<int>("mx") << std::endl
                 << "Processor subdomains in y direction: " << galeriList.get<int>("my") << std::endl
                 << "Processor subdomains in z direction: " << galeriList.get<int>("mz") << std::endl
                 << "========================================================" << std::endl;

    if (matrixType == "Elasticity2D" || matrixType == "Elasticity3D") {
      // Our default test case for elasticity: all boundaries of a square/cube have Neumann b.c. except left which has Dirichlet
      galeriList.set("right boundary" , "Neumann");
      galeriList.set("bottom boundary", "Neumann");
      galeriList.set("top boundary"   , "Neumann");
      galeriList.set("front boundary" , "Neumann");
      galeriList.set("back boundary"  , "Neumann");
    }

    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
        Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(galeriParameters.GetMatrixType(), map, galeriList);
    A = Pr->BuildMatrix();

    if (matrixType == "Elasticity2D" ||
        matrixType == "Elasticity3D") {
      nullspace = Pr->BuildNullspace();
      A->SetFixedBlockSize((galeriParameters.GetMatrixType() == "Elasticity2D") ? 2 : 3);
    }

  } else {
    if (!mapFile.empty())
      map = Utils2::ReadMap(mapFile, xpetraParameters.GetLib(), comm);
    comm->barrier();

    if (lib == Xpetra::UseEpetra) {
      A = Utils::Read(matrixFile, map);

    } else {
      // Tpetra matrix reader is still broken, so instead we read in
      // a matrix in a binary format and then redistribute it
      const bool binaryFormat = true;
      A = Utils::Read(matrixFile, lib, comm, binaryFormat);

      RCP<Matrix> newMatrix = MatrixFactory::Build(map, 1);
      RCP<Import> importer  = ImportFactory::Build(A->getRowMap(), map);
      newMatrix->doImport(*A, *importer, Xpetra::INSERT);
      newMatrix->fillComplete();

      A.swap(newMatrix);
    }

    comm->barrier();

    if (!coordFile.empty())
      coordinates = Utils2::ReadMultiVector(coordFile, map);
  }

  comm->barrier();
  tm = Teuchos::null;

  galeriStream << "Galeri complete.\n========================================================" << std::endl;

  int numReruns = 1;
  if (paramList.isParameter("number of reruns"))
    numReruns = paramList.get<int>("number of reruns");

  const bool mustAlreadyExist = true;
  for (int rerunCount = 1; rerunCount <= numReruns; rerunCount++) {
    ParameterList mueluList, runList;

    bool stop = false;
    if (isDriver) {
      runList   = paramList.sublist("Run1",  mustAlreadyExist);
      mueluList = runList  .sublist("MueLu", mustAlreadyExist);
    } else {
      mueluList = paramList;
      stop = true;
    }

    if (nullspace.is_null()) {
      int blkSize = 1;
      if (mueluList.isSublist("Matrix")) {
        // Factory style parameter list
        const Teuchos::ParameterList& operatorList = paramList.sublist("Matrix");
        if (operatorList.isParameter("PDE equations"))
          blkSize = operatorList.get<int>("PDE equations");

      } else if (paramList.isParameter("number of equations")) {
        // Easy style parameter list
        blkSize = paramList.get<int>("number of equations");
      }

      nullspace = MultiVectorFactory::Build(map, blkSize);
      for (int i = 0; i < blkSize; i++) {
        RCP<const Map> domainMap = A->getDomainMap();
        GO             indexBase = domainMap->getIndexBase();

        ArrayRCP<SC> nsData = nullspace->getDataNonConst(i);
        for (int j = 0; j < nsData.size(); j++) {
          GO GID = domainMap->getGlobalElement(j) - indexBase;

          if ((GID-i) % blkSize == 0)
            nsData[j] = Teuchos::ScalarTraits<SC>::one();
        }
      }
    }

    int runCount = 1;
    do {
      A->SetMaxEigenvalueEstimate(-one);

      solveType = dsolveType;
      tol       = dtol;

      int   savedOut  = -1;
      FILE* openedOut = NULL;
      if (isDriver) {
        if (runList.isParameter("filename")) {
          // Redirect all output into a filename We have to redirect all output,
          // including printf's, therefore we cannot simply replace C++ cout
          // buffers, and have to use heavy machinary (dup2)
          std::string filename = runList.get<std::string>("filename");
          if (numReruns > 1)
            filename += "_run" + MueLu::toString(rerunCount);
          filename += (lib == Xpetra::UseEpetra ? ".epetra" : ".tpetra");

          savedOut  = dup(STDOUT_FILENO);
          openedOut = fopen(filename.c_str(), "w");
          dup2(fileno(openedOut), STDOUT_FILENO);
        }
        if (runList.isParameter("solver")) solveType = runList.get<std::string>("solver");
        if (runList.isParameter("tol"))    tol       = runList.get<double>     ("tol");
      }

      // Instead of checking each time for rank, create a rank 0 stream
      RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
      Teuchos::FancyOStream& out = *fancy;
      out.setOutputToRootOnly(0);

      out << galeriStream.str();

      // =========================================================================
      // Preconditioner construction
      // =========================================================================
      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 1.5 - MueLu read XML")));

      RCP<HierarchyManager> mueLuFactory = rcp(new ParameterListInterpreter(mueluList));

      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 2 - MueLu Setup")));

      RCP<Hierarchy> H;
      for (int i = 0; i <= numRebuilds; i++) {
        A->SetMaxEigenvalueEstimate(-one);

        H = mueLuFactory->CreateHierarchy();
        H->GetLevel(0)->Set("A",           A);
        H->GetLevel(0)->Set("Nullspace",   nullspace);
        if (!coordinates.is_null())
          H->GetLevel(0)->Set("Coordinates", coordinates);
        mueLuFactory->SetupHierarchy(*H);
      }

      comm->barrier();
      tm = Teuchos::null;

      // =========================================================================
      // System solution (Ax = b)
      // =========================================================================
      comm->barrier();
      tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3 - LHS and RHS initialization")));

      RCP<Vector> X = VectorFactory::Build(map);
      RCP<Vector> B = VectorFactory::Build(map);

      {
        // we set seed for reproducibility
        Utils::SetRandomSeed(*comm);
        X->randomize();
        A->apply(*X, *B, Teuchos::NO_TRANS, one, zero);

        Teuchos::Array<STS::magnitudeType> norms(1);
        B->norm2(norms);
        B->scale(one/norms[0]);
        X->putScalar(zero);
      }
      tm = Teuchos::null;

      if (writeMatricesOPT > -2) {
        tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 3.5 - Matrix output")));
        H->Write(writeMatricesOPT, writeMatricesOPT);
        tm = Teuchos::null;
      }

      comm->barrier();
      if (solveType == "none") {
        // Do not perform a solve

      } else if (solveType == "standalone") {
        tm = rcp (new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 4 - Fixed Point Solve")));

        H->IsPreconditioner(false);
        H->Iterate(*B, *X, maxIts);

      } else if (solveType == "cg" || solveType == "gmres") {
#ifdef HAVE_MUELU_BELOS
        tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("ScalingTest: 5 - Belos Solve")));

        // Operator and Multivector type that will be used with Belos
        typedef MultiVector          MV;
        typedef Belos::OperatorT<MV> OP;

        H->IsPreconditioner(true);

        // Define Operator and Preconditioner
        Teuchos::RCP<OP> belosOp   = Teuchos::rcp(new Belos::XpetraOp<SC, LO, GO, NO, LMO>(A)); // Turns a Xpetra::Matrix object into a Belos operator
        Teuchos::RCP<OP> belosPrec = Teuchos::rcp(new Belos::MueLuOp <SC, LO, GO, NO, LMO>(H)); // Turns a MueLu::Hierarchy object into a Belos operator

        // Construct a Belos LinearProblem object
        RCP< Belos::LinearProblem<SC, MV, OP> > belosProblem = rcp(new Belos::LinearProblem<SC, MV, OP>(belosOp, X, B));
        belosProblem->setRightPrec(belosPrec);

        bool set = belosProblem->setProblem();
        if (set == false) {
          out << "\nERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
          return EXIT_FAILURE;
        }

        // Belos parameter list
        Teuchos::ParameterList belosList;
        belosList.set("Maximum Iterations",    maxIts); // Maximum number of iterations allowed
        belosList.set("Convergence Tolerance", tol);    // Relative convergence tolerance requested
        belosList.set("Verbosity",             Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
        belosList.set("Output Frequency",      1);
        belosList.set("Output Style",          Belos::Brief);
        if (!scaleResidualHistory) 
          belosList.set("Implicit Residual Scaling", "None");

        // Create an iterative solver manager
        RCP< Belos::SolverManager<SC, MV, OP> > solver;
        if (solveType == "cg") {
          solver = rcp(new Belos::PseudoBlockCGSolMgr   <SC, MV, OP>(belosProblem, rcp(&belosList, false)));
        } else if (solveType == "gmres") {
          solver = rcp(new Belos::BlockGmresSolMgr<SC, MV, OP>(belosProblem, rcp(&belosList, false)));
        }

        // Perform solve
        Belos::ReturnType ret = Belos::Unconverged;
        try {
          ret = solver->solve();

          // Get the number of iterations for this solve.
          out << "Number of iterations performed for this solve: " << solver->getNumIters() << std::endl;

        } catch(...) {
          out << std::endl << "ERROR:  Belos threw an error! " << std::endl;
        }

        // Check convergence
        if (ret != Belos::Converged)
          out << std::endl << "ERROR:  Belos did not converge! " << std::endl;
        else
          out << std::endl << "SUCCESS:  Belos converged!" << std::endl;
#endif //ifdef HAVE_MUELU_BELOS
      } else {
        throw MueLu::Exceptions::RuntimeError("Unknown solver type: \"" + solveType + "\"");
      }
      comm->barrier();
      tm = Teuchos::null;
      globalTimeMonitor = Teuchos::null;

      if (printTimings)
        TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, false, true, false, Teuchos::Union);

      TimeMonitor::clearCounters();

      if (isDriver) {
        if (openedOut != NULL) {
          dup2(savedOut, STDOUT_FILENO);
          fclose(openedOut);
          openedOut = NULL;
        }
        try {
          runList   = paramList.sublist("Run" + MueLu::toString(++runCount), mustAlreadyExist);
          mueluList = runList  .sublist("MueLu", mustAlreadyExist);
        } catch (std::exception) {
          stop = true;
        }
      }

    } while (stop == false);
  }


  return 0;
} //main
void
SupportGraph<MatrixType>::findSupport ()
{
  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
                            global_ordinal_type, node_type> crs_matrix_type;
  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
                         global_ordinal_type, node_type> vec_type;
  
typedef std::pair<int, int> E;
  typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS, 
                                boost::no_property, 
                                boost::property<boost::edge_weight_t, 
                                                magnitude_type> > graph_type;
  typedef typename boost::graph_traits<graph_type>::edge_descriptor edge_type;
  typedef typename boost::graph_traits<graph_type>::vertex_descriptor 
    vertex_type;

  const scalar_type zero = STS::zero();
  const scalar_type one = STS::one();

  //Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::getFancyOStream (Teuchos::rcpFromRef (std::cout));

  size_t num_verts = A_local_->getNodeNumRows();
  size_t num_edges  
    = (A_local_->getNodeNumEntries() - A_local_->getNodeNumDiags())/2;

  // Create data structures for the BGL code
  // and temp data structures for extraction
  E *edge_array = new E[num_edges];
  magnitude_type *weights = new magnitude_type[num_edges];

  size_t num_entries;
  size_t max_num_entries = A_local_->getNodeMaxNumRowEntries();

  std::vector<scalar_type> valuestemp (max_num_entries);
  std::vector<local_ordinal_type> indicestemp (max_num_entries);
  
  std::vector<magnitude_type> diagonal (num_verts);

  Tpetra::ArrayView<scalar_type> values (valuestemp);
  Tpetra::ArrayView<local_ordinal_type> indices (indicestemp);

  // Extract from the tpetra matrix keeping only one edge per pair 
  // (assume symmetric)
  size_t offDiagCount = 0;
  for (size_t row = 0; row < num_verts; ++row) {
    A_local_->getLocalRowCopy (row, indices, values, num_entries);
    for (size_t colIndex = 0; colIndex < num_entries; ++colIndex) {
      if(row == Teuchos::as<size_t>(indices[colIndex])) {
        diagonal[row] = values[colIndex];
      }

      if((row < Teuchos::as<size_t>(indices[colIndex])) 
         && (values[colIndex] < zero)) {
        edge_array[offDiagCount] = E(row, indices[colIndex]);
        weights[offDiagCount] = values[colIndex];
        if (Randomize_) {
          // Add small random pertubation.
          weights[offDiagCount] *= one + 
            STS::magnitude(STS::rmin() * STS::random());
        }

        offDiagCount++;
      }
    }
  }

  // Create BGL graph
  graph_type g(edge_array, edge_array + num_edges, weights, num_verts);
  typedef typename boost::property_map 
    <graph_type, boost::edge_weight_t>::type type;
  type weight = get (boost::edge_weight, g);
  std::vector<edge_type> spanning_tree;

  // Run Kruskal, actually maximal weight ST since edges are negative
  boost::kruskal_minimum_spanning_tree(g, std::back_inserter (spanning_tree));

  // Create array to store the exact number of non-zeros per row
  Teuchos::ArrayRCP<size_t> NumNz (num_verts, 1);

  typedef typename std::vector<edge_type>::iterator edge_iterator_type;

  // Find the degree of all the vertices
  for (edge_iterator_type ei = spanning_tree.begin(); ei != spanning_tree.end();
       ++ei) {
    local_ordinal_type localsource = source(*ei,g);
    local_ordinal_type localtarget = target(*ei,g);

    // We only want upper triangular entries, might need to swap
    if (localsource > localtarget) {
      localsource = target(*ei, g);
      localtarget = source(*ei, g);
    }

    NumNz[localsource] += 1;
  }


  // Create an stl vector of stl vectors to hold indices and values
  std::vector<std::vector<local_ordinal_type> > Indices (num_verts); 
  std::vector<std::vector<magnitude_type> > Values (num_verts); 

  for (size_t i = 0; i < num_verts; ++i) {
    Indices[i].resize(NumNz[i]);
    Values[i].resize(NumNz[i]);
  }

  // The local ordering might be different from the global
  // ordering and we need the local number of non-zeros per row
  // to correctly allocate the preconditioner matrix memory
  Teuchos::ArrayRCP<size_t> localnumnz (num_verts, 1);

  for (size_t i = 0; i < num_verts; ++i) {
     Indices[i][0] = i;
  }


  // Add each spanning forest (tree) to the support graph and
  // remove it from original graph
  for (int i = 0; i < NumForests_; ++i) {
    // If a tree has already been added then we need to rerun Kruskall and
    // update the arrays containing size information
    if (i > 0) {
      spanning_tree.clear();
      boost::kruskal_minimum_spanning_tree 
        (g, std::back_inserter(spanning_tree));

      for (edge_iterator_type ei = spanning_tree.begin(); 
          ei != spanning_tree.end(); ++ei) {
        NumNz[source(*ei,g)] += 1;
      }

      // FIXME (mfh 14 Nov 2013) Are you sure that all this resizing
      // is a good idea?
      for (size_t i = 0; i < num_verts; ++i) {
        Indices[i].resize(NumNz[i]);
        Values[i].resize(NumNz[i]);
      }
    }

    for (edge_iterator_type ei = spanning_tree.begin(); 
        ei != spanning_tree.end(); ++ei) {
      local_ordinal_type localsource = source(*ei, g);
      local_ordinal_type localtarget = target(*ei, g);

      if (localsource > localtarget) {
        localsource = target(*ei,g);
        localtarget = source(*ei,g);
      }

      // Assume standard Laplacian with constant row-sum.
      // Edge weights are negative, so subtract to make diagonal positive
      Values[localtarget][0] -= weight[*ei];
      Values[localsource][0] -= weight[*ei];

      Indices[localsource][localnumnz[localsource]] = localtarget;
      Values[localsource][localnumnz[localsource]] = weight[*ei];
      localnumnz[localsource] += 1;

      remove_edge(*ei,g);
    }
  }

  // Set diagonal to weighted average of Laplacian preconditioner
  // and the original matrix

  // First compute the "diagonal surplus" (in the original input matrix)
  // If input is a (pure, Dirichlet) graph Laplacian , this will be 0
  vec_type ones (A_local_->getDomainMap());
  vec_type surplus (A_local_->getRangeMap());

  ones.putScalar(one);
  A_local_->apply(ones, surplus);

  Teuchos::ArrayRCP<const scalar_type> surplusaccess = surplus.getData(0);

  for (size_t i = 0; i < num_verts; ++i) {
    if (surplusaccess[i] > zero) {
      Values[i][0] += surplusaccess[i];
    }

    // If the original diagonal is less than the row sum then we aren't going to
    // use it regardless of the diagonal option, shouldn't happen for proper 
    // Laplacian
    if (diagonal[i] < Values[i][0]) {
      diagonal[i] = Values[i][0];
    }

    Values[i][0] = KeepDiag_*diagonal[i] + (one-KeepDiag_) * Values[i][0];

    // Modify the diagonal with user specified scaling
    if (Rthresh_) {
      Values[i][0] *= Rthresh_;
    }
    if (Athresh_) {
      Values[i][0] += Athresh_;
    }
  }

  // Create the CrsMatrix for the support graph
  Support_ = rcp (new crs_matrix_type (A_local_->getRowMap(),
                                       A_local_->getColMap(),
                                       localnumnz, Tpetra::StaticProfile));

  // Fill in the matrix with the stl vectors for each row
  for (size_t row = 0; row < num_verts; ++row) {
    Teuchos::ArrayView<local_ordinal_type>
      IndicesInsert (Indices[Teuchos::as<local_ordinal_type> (row)]);
    Teuchos::ArrayView<scalar_type>
      ValuesInsert (Values[Teuchos::as<local_ordinal_type> (row)]);
    Support_->insertLocalValues (row, IndicesInsert, ValuesInsert);
  }

  Support_->fillComplete();

  // Clean up all the memory allocated
  delete edge_array;
  delete weights;
}
void
SupportGraph<MatrixType>::
apply (const Tpetra::MultiVector<scalar_type,
                                 local_ordinal_type,
                                 global_ordinal_type,
                                 node_type>& X,
       Tpetra::MultiVector<scalar_type,
                           local_ordinal_type,
                           global_ordinal_type,
                           node_type>& Y,
       Teuchos::ETransp mode,
       scalar_type alpha,
       scalar_type beta) const
{
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::Time;
  using Teuchos::TimeMonitor;
  typedef scalar_type DomainScalar;
  typedef scalar_type RangeScalar;
  typedef Tpetra::MultiVector<DomainScalar, local_ordinal_type,
    global_ordinal_type, node_type> MV;

  RCP<FancyOStream> out = getFancyOStream(rcpFromRef(std::cout));

  // Create a timer for this method, if it doesn't exist already.
  // TimeMonitor::getNewCounter registers the timer, so that
  // TimeMonitor's class methods like summarize() will report the
  // total time spent in successful calls to this method.
  const std::string timerName ("Ifpack2::SupportGraph::apply");
  RCP<Time> timer = TimeMonitor::lookupCounter(timerName);
  if (timer.is_null()) {
    timer = TimeMonitor::getNewCounter(timerName);
  }

  { // Start timing here.
    Teuchos::TimeMonitor timeMon (*timer);

    TEUCHOS_TEST_FOR_EXCEPTION(
      ! isComputed(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: You must call compute() to compute the "
      "incomplete factorization, before calling apply().");

    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
      "Ifpack2::SupportGraph::apply: X and Y must have the same number of "
      "columns.  X has " << X.getNumVectors() << " columns, but Y has "
      << Y.getNumVectors() << " columns.");

    TEUCHOS_TEST_FOR_EXCEPTION(
      beta != STS::zero(), std::logic_error,
      "Ifpack2::SupportGraph::apply: This method does not currently work when "
      "beta != 0.");

    // If X and Y are pointing to the same memory location,
    // we need to create an auxiliary vector, Xcopy
    RCP<const MV> Xcopy;
    if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
      Xcopy = rcp (new MV(X));
    }
    else {
      Xcopy = rcpFromRef(X);
    }

    if (alpha != STS::one()) {
      Y.scale(alpha);
    }

    RCP<MV> Ycopy = rcpFromRef(Y);

    solver_->setB(Xcopy);
    solver_->setX(Ycopy);

    solver_->solve ();
  } // Stop timing here.

  ++NumApply_;

  // timer->totalElapsedTime() returns the total time over all timer
  // calls.  Thus, we use = instead of +=.
  ApplyTime_ = timer->totalElapsedTime();
}
Esempio n. 10
0
RCP<Basic> cos(const RCP<Basic> &arg)
{
    if (eq(arg, zero)) return one;
    return rcp(new Cos(arg));
}
Esempio n. 11
0
RCP<Basic> sin(const RCP<Basic> &arg)
{
    if (eq(arg, zero)) return zero;
    return rcp(new Sin(arg));
}
Esempio n. 12
0
int main(int argc, char *argv[]) {
  //
#ifdef EPETRA_MPI
  // Initialize MPI
  MPI_Init(&argc,&argv);
#endif
  //
  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

  bool success = false;
  bool verbose = false;
  try {
    //
    // Get test parameters from command-line processor
    //
    bool proc_verbose = false;
    int frequency = -1;                  // frequency of status test output.
    std::string filename("gr_30_30.hb"); // default input filename
    double tol = 1.0e-10;                // relative residual tolerance
    int numBlocks = 30;                  // maximum number of blocks the solver can use for the Krylov subspace
    int recycleBlocks = 3;               // maximum number of blocks the solver can use for the recycle space
    int numrhs = 1;                      // number of right-hand sides to solve for
    int maxiters = -1;                   // maximum number of iterations allowed per linear system

    Teuchos::CommandLineProcessor cmdp(false,true);
    cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
    cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
    cmdp.setOption("filename",&filename,"Filename for test matrix.");
    cmdp.setOption("tol",&tol,"Relative residual tolerance used by the RCG solver.");
    cmdp.setOption("max-subspace",&numBlocks,"Maximum number of vectors in search space (not including recycle space).");
    cmdp.setOption("recycle",&recycleBlocks,"Number of vectors in recycle space.");
    cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
    cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
    if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      return -1;
    }
    if (!verbose)
      frequency = -1;  // reset frequency if test is not verbose
    //
    // Get the problem
    //
    int MyPID;
    RCP<Epetra_CrsMatrix> A;
    RCP<Epetra_MultiVector> B, X;
    int return_val =Belos::createEpetraProblem(filename,NULL,&A,&B,&X,&MyPID);
    if(return_val != 0) return return_val;
    proc_verbose = ( verbose && (MyPID==0) );
    //
    // Solve using Belos
    //
    typedef double                          ST;
    typedef Epetra_Operator                 OP;
    typedef Epetra_MultiVector              MV;
    typedef Belos::OperatorTraits<ST,MV,OP> OPT;
    typedef Belos::MultiVecTraits<ST,MV>    MVT;
    //
    // *****Construct initial guess and right-hand sides *****
    //
    if (numrhs != 1) {
      X = rcp( new Epetra_MultiVector( A->Map(), numrhs ) );
      X->Random();
      B = rcp( new Epetra_MultiVector( A->Map(), numrhs ) );
      OPT::Apply( *A, *X, *B );
      MVT::MvInit( *X, 0.0 );
    }
    else { // initialize exact solution to be vector of ones
      MVT::MvInit( *X, 1.0 );
      OPT::Apply( *A, *X, *B );
      MVT::MvInit( *X, 0.0 );
    }
    //
    // ********Other information used by block solver***********
    // *****************(can be user specified)******************
    //
    const int NumGlobalElements = B->GlobalLength();
    if (maxiters == -1)
      maxiters = NumGlobalElements - 1; // maximum number of iterations to run
    //
    ParameterList belosList;
    belosList.set( "Maximum Iterations", maxiters );       // Maximum number of iterations allowed
    belosList.set( "Num Blocks", numBlocks);               // Maximum number of blocks in Krylov space
    belosList.set( "Num Recycled Blocks", recycleBlocks ); // Number of vectors in recycle space
    belosList.set( "Convergence Tolerance", tol );         // Relative convergence tolerance requested
    if (verbose) {
      belosList.set( "Verbosity", Belos::Errors + Belos::Warnings +
          Belos::TimingDetails + Belos::FinalSummary + Belos::StatusTestDetails );
      if (frequency > 0)
        belosList.set( "Output Frequency", frequency );
    }
    else
      belosList.set( "Verbosity", Belos::Errors + Belos::Warnings );
    //
    // Construct an unpreconditioned linear problem instance.
    //
    Belos::LinearProblem<double,MV,OP> problem( A, X, B );
    bool set = problem.setProblem();
    if (set == false) {
      if (proc_verbose)
        std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return -1;
    }
    //
    // Create an iterative solver manager.
    //
    RCP< Belos::SolverManager<double,MV,OP> > newSolver
      = rcp( new Belos::RCGSolMgr<double,MV,OP>(rcp(&problem,false), rcp(&belosList,false)) );
    //
    // **********Print out information about problem*******************
    //
    if (proc_verbose) {
      std::cout << std::endl << std::endl;
      std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
      std::cout << "Number of right-hand sides: " << numrhs << std::endl;
      std::cout << "Max number of RCG iterations: " << maxiters << std::endl;
      std::cout << "Max number of vectors in Krylov space: " << numBlocks << std::endl;
      std::cout << "Number of vectors in recycle space: " << recycleBlocks << std::endl;
      std::cout << "Relative residual tolerance: " << tol << std::endl;
      std::cout << std::endl;
    }
    //
    // Perform solve
    //
    Belos::ReturnType ret = newSolver->solve();
    //
    // Compute actual residuals.
    //
    bool badRes = false;
    std::vector<double> actual_resids( numrhs );
    std::vector<double> rhs_norm( numrhs );
    Epetra_MultiVector resid(A->Map(), numrhs);
    OPT::Apply( *A, *X, resid );
    MVT::MvAddMv( -1.0, resid, 1.0, *B, resid );
    MVT::MvNorm( resid, actual_resids );
    MVT::MvNorm( *B, rhs_norm );
    if (proc_verbose) {
      std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
      for ( int i=0; i<numrhs; i++) {
        double actRes = actual_resids[i]/rhs_norm[i];
        std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
        if (actRes > tol) badRes = true;
      }
    }

    success = ret==Belos::Converged && !badRes;
    if (success) {
      if (proc_verbose)
        std::cout << std::endl << "End Result: TEST PASSED" << std::endl;
    } else {
      if (proc_verbose)
        std::cout << std::endl << "End Result: TEST FAILED" << std::endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
} // end test_rcg_hb.cpp
Esempio n. 13
0
int main(int argc, char *argv[]) {

  //Check number of arguments
  if (argc < 4) {
    std::cout <<"\n>>> ERROR: Invalid number of arguments.\n\n";
    std::cout <<"Usage:\n\n";
    std::cout <<"  ./Intrepid_example_Drivers_Example_14.exe deg NX NY NZ verbose\n\n";
    std::cout <<" where \n";
    std::cout <<"   int deg             - polynomial degree to be used (assumed >= 1) \n";
    std::cout <<"   int NX              - num intervals in x direction (assumed box domain, 0,1) \n";
    std::cout <<"   int NY              - num intervals in y direction (assumed box domain, 0,1) \n";
    std::cout <<"   int NZ              - num intervals in y direction (assumed box domain, 0,1) \n";
    std::cout <<"   verbose (optional)  - any character, indicates verbose output \n\n";
    exit(1);
  }
  
  // This little trick lets us print to std::cout only if
  // a (dummy) command-line argument is provided.
  int iprint     = argc - 1;
  Teuchos::RCP<std::ostream> outStream;
  Teuchos::oblackholestream bhs; // outputs nothing
  if (iprint > 2)
    outStream = Teuchos::rcp(&std::cout, false);
  else
    outStream = Teuchos::rcp(&bhs, false);
  
  // Save the format state of the original std::cout.
  Teuchos::oblackholestream oldFormatState;
  oldFormatState.copyfmt(std::cout);
  
  *outStream								\
    << "===============================================================================\n" \
    << "|                                                                             |\n" \
    << "|  Example: Apply Stiffness Matrix for                                        |\n" \
    << "|                   Poisson Equation on Hexahedral Mesh                       |\n" \
    << "|                                                                             |\n" \
    << "|  Questions? Contact  Pavel Bochev  ([email protected]),                    |\n" \
    << "|                      Denis Ridzal  ([email protected]),                    |\n" \
    << "|                      Kara Peterson ([email protected]).                    |\n" \
    << "|                                                                             |\n" \
    << "|  Intrepid's website: http://trilinos.sandia.gov/packages/intrepid           |\n" \
    << "|  Trilinos website:   http://trilinos.sandia.gov                             |\n" \
    << "|                                                                             |\n" \
    << "===============================================================================\n";

  
  // ************************************ GET INPUTS **************************************
  
  int deg          = atoi(argv[1]);  // polynomial degree to use
  int NX           = atoi(argv[2]);  // num intervals in x direction (assumed box domain, 0,1)
  int NY           = atoi(argv[3]);  // num intervals in y direction (assumed box domain, 0,1)
  int NZ           = atoi(argv[4]);  // num intervals in y direction (assumed box domain, 0,1)
  

  // *********************************** CELL TOPOLOGY **********************************
  
  // Get cell topology for base hexahedron
  typedef shards::CellTopology    CellTopology;
  CellTopology hex_8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
  
  // Get dimensions 
  int numNodesPerElem = hex_8.getNodeCount();
  int spaceDim = hex_8.getDimension();
  
  // *********************************** GENERATE MESH ************************************
  
  *outStream << "Generating mesh ... \n\n";
  
  *outStream << "   NX" << "   NY" << "   NZ\n";
  *outStream << std::setw(5) << NX <<
    std::setw(5) << NY << std::setw(5) << NZ << "\n\n";
  
  // Print mesh information
  int numElems = NX*NY*NZ;
  int numNodes = (NX+1)*(NY+1)*(NZ+1);
  *outStream << " Number of Elements: " << numElems << " \n";
  *outStream << "    Number of Nodes: " << numNodes << " \n\n";
  
  // Cube
  double leftX = 0.0, rightX = 1.0;
  double leftY = 0.0, rightY = 1.0;
  double leftZ = 0.0, rightZ = 1.0;

  // Mesh spacing
  double hx = (rightX-leftX)/((double)NX);
  double hy = (rightY-leftY)/((double)NY);
  double hz = (rightZ-leftZ)/((double)NZ);

  // Get nodal coordinates
  FieldContainer<double> nodeCoord(numNodes, spaceDim);
  FieldContainer<int> nodeOnBoundary(numNodes);
  int inode = 0;
  for (int k=0; k<NZ+1; k++) 
    {
      for (int j=0; j<NY+1; j++) 
	{
	  for (int i=0; i<NX+1; i++) 
	    {
	      nodeCoord(inode,0) = leftX + (double)i*hx;
	      nodeCoord(inode,1) = leftY + (double)j*hy;
	      nodeCoord(inode,2) = leftZ + (double)k*hz;
	      if (k==0 || k==NZ || j==0 || i==0 || j==NY || i==NX)
		{
		  nodeOnBoundary(inode)=1;
		}
	      else 
		{
		  nodeOnBoundary(inode)=0;
		}
	      inode++;
	    }
	}
    }
//#define DUMP_DATA
#ifdef DUMP_DATA
  // Print nodal coords
  ofstream fcoordout("coords.dat");
  for (int i=0; i<numNodes; i++) {
    fcoordout << nodeCoord(i,0) <<" ";
    fcoordout << nodeCoord(i,1) <<" ";
    fcoordout << nodeCoord(i,2) <<"\n";
  }
  fcoordout.close();
#endif
  
  
  
  // ********************************* CUBATURE AND BASIS *********************************** 
  *outStream << "Getting cubature and basis ... \n\n";
  
  // Get numerical integration points and weights
  // I only need this on the line since I'm doing tensor products 
  Teuchos::RCP<Cubature<double,FieldContainer<double>,FieldContainer<double> > > glcub
    = Teuchos::rcp(new CubaturePolylib<double,FieldContainer<double>,FieldContainer<double> >(2*deg-1,PL_GAUSS_LOBATTO) );
      
  const int numCubPoints1D = glcub->getNumPoints();

  FieldContainer<double> cubPoints1D(numCubPoints1D, 1);
  FieldContainer<double> cubWeights1D(numCubPoints1D);
  
  glcub->getCubature(cubPoints1D,cubWeights1D);

  std::vector<Teuchos::RCP<Cubature<double,FieldContainer<double>,FieldContainer<double> > > >
    cub_to_tensor;  
  cub_to_tensor.push_back( glcub );
  cub_to_tensor.push_back( glcub );
  cub_to_tensor.push_back( glcub );

  Array<RCP<FieldContainer<double> > > wts_by_dim(3);
  wts_by_dim[0] = rcp( &cubWeights1D , false ); wts_by_dim[1] = wts_by_dim[0]; wts_by_dim[2] = wts_by_dim[1];

  CubatureTensor<double,FieldContainer<double>,FieldContainer<double> > cubhex( cub_to_tensor );

  Basis_HGRAD_HEX_Cn_FEM<double, FieldContainer<double> > hexBasis( deg , POINTTYPE_SPECTRAL );

  Array< Array< RCP< Basis< double , FieldContainer<double> > > > > &bases = hexBasis.getBases();

  // get the bases tabulated at the quadrature points, dimension-by-dimension

  Array< RCP< FieldContainer<double> > > basisVals( 3 );
  FieldContainer<double> bvals1D( bases[0][0]->getCardinality() , numCubPoints1D );
  bases[0][0]->getValues( bvals1D , cubPoints1D , OPERATOR_VALUE );
  basisVals[0] = rcp( &bvals1D , false ); basisVals[1] = basisVals[0]; basisVals[2] = basisVals[0];
  
  Array< RCP< FieldContainer<double> > > basisDVals( 3 );
  FieldContainer<double> bdvals1D( bases[0][0]->getCardinality() , numCubPoints1D , 1);
  bases[0][0]->getValues( bdvals1D , cubPoints1D , OPERATOR_D1 );
  basisDVals[0] = rcp( &bdvals1D , false ); basisDVals[1] = basisDVals[0]; basisDVals[2] = basisDVals[0];


  const int numCubPoints = cubhex.getNumPoints();
  FieldContainer<double> cubPoints3D( numCubPoints , 3 );
  FieldContainer<double> cubWeights3D( numCubPoints );
  cubhex.getCubature( cubPoints3D , cubWeights3D );
  

  FieldContainer<int> elemToNode(numElems, numNodesPerElem);
  int ielem = 0;
  for (int k=0; k<NZ; k++) 
    {
      for (int j=0; j<NY; j++) 
        {
          for (int i=0; i<NX; i++) 
            {
              elemToNode(ielem,0) = k * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i;
              elemToNode(ielem,1) = k * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i + 1;
              elemToNode(ielem,2) = k * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i + 1;
              elemToNode(ielem,3) = k * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i;
              elemToNode(ielem,4) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i;
              elemToNode(ielem,5) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + j * ( NX + 1 ) + i + 1;
              elemToNode(ielem,6) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i + 1;
              elemToNode(ielem,7) = ( k + 1 ) * ( NX + 1 ) * ( NY + 1 ) + ( j + 1 ) * ( NX + 1 ) + i;
              ielem++;
            }
        }
    }
#ifdef DUMP_DATA
  // Output connectivity
  ofstream fe2nout("elem2node.dat");
  for (int k=0;k<NZ;k++)
    {
      for (int j=0; j<NY; j++) 
        {
          for (int i=0; i<NX; i++) 
            {
              int ielem = i + j * NX + k * NY * NY;
              for (int m=0; m<numNodesPerElem; m++)
                {
                  fe2nout << elemToNode(ielem,m) <<"  ";
                }
              fe2nout <<"\n";
            }
        }
    }
  fe2nout.close();
#endif


  // ********************************* 3-D LOCAL-TO-GLOBAL MAPPING *******************************
  FieldContainer<int> ltgMapping(numElems,hexBasis.getCardinality());
  const int numDOF = (NX*deg+1)*(NY*deg+1)*(NZ*deg+1);
  ielem=0;
  for (int k=0;k<NZ;k++) 
    {
      for (int j=0;j<NY;j++) 
	{
	  for (int i=0;i<NX;i++) 
	    {
	      const int start = k * ( NY * deg + 1 ) * ( NX * deg + 1 ) + j * ( NX * deg + 1 ) + i * deg;
	      // loop over local dof on this cell
	      int local_dof_cur=0;
	      for (int kloc=0;kloc<=deg;kloc++) 
		{
		  for (int jloc=0;jloc<=deg;jloc++) 
		    {
		      for (int iloc=0;iloc<=deg;iloc++)
			{
			  ltgMapping(ielem,local_dof_cur) = start 
			    + kloc * ( NX * deg + 1 ) * ( NY * deg + 1 )
			    + jloc * ( NX * deg + 1 )
			    + iloc;
			  local_dof_cur++;
			}
		    }
		}
	      ielem++;
	    }
	}
    }
#ifdef DUMP_DATA
  // Output ltg mapping 
  ielem = 0;
  ofstream ltgout("ltg.dat");
  for (int k=0;k<NZ;k++)  
    {
      for (int j=0; j<NY; j++) 
	{
	  for (int i=0; i<NX; i++) 
	    {
	      int ielem = i + j * NX + k * NX * NY;
	      for (int m=0; m<hexBasis.getCardinality(); m++)
		{
		  ltgout << ltgMapping(ielem,m) <<"  ";
		}
	      ltgout <<"\n";
	    }
	}
    }
  ltgout.close();
#endif

  // ********** DECLARE GLOBAL OBJECTS *************
  Epetra_SerialComm Comm;
  Epetra_Map globalMapG(numDOF, 0, Comm);
  Epetra_FEVector u(globalMapG);  u.Random();
  Epetra_FEVector Ku(globalMapG);

  // ************* For Jacobians **********************
  FieldContainer<double> cellVertices(numElems,numNodesPerElem,spaceDim);
  FieldContainer<double> cellJacobian(numElems,numCubPoints,spaceDim,spaceDim);
  FieldContainer<double> cellJacobInv(numElems,numCubPoints,spaceDim,spaceDim);
  FieldContainer<double> cellJacobDet(numElems,numCubPoints);


  // get vertices of cells (for computing Jacobians)
  for (int i=0;i<numElems;i++)
    {
      for (int j=0;j<numNodesPerElem;j++)
        {
          const int nodeCur = elemToNode(i,j);
          for (int k=0;k<spaceDim;k++) 
            {
              cellVertices(i,j,k) = nodeCoord(nodeCur,k);
            }
        }
    }

  // jacobian evaluation 
  CellTools<double>::setJacobian(cellJacobian,cubPoints3D,cellVertices,hex_8);
  CellTools<double>::setJacobianInv(cellJacobInv, cellJacobian );
  CellTools<double>::setJacobianDet(cellJacobDet, cellJacobian );


  // ************* MATRIX-FREE APPLICATION 
  FieldContainer<double> uScattered(numElems,1,hexBasis.getCardinality());
  FieldContainer<double> KuScattered(numElems,1,hexBasis.getCardinality());
  FieldContainer<double> gradU(numElems,1,hexBasis.getCardinality(),3);

  u.GlobalAssemble();



  Ku.PutScalar(0.0);
  Ku.GlobalAssemble();

  double *uVals = u[0];
  double *KuVals = Ku[0];

  Teuchos::Time full_timer( "Time to apply operator matrix-free:" );
  Teuchos::Time scatter_timer( "Time to scatter dof:" );
  Teuchos::Time elementwise_timer( "Time to do elementwise computation:" ); 
  Teuchos::Time grad_timer( "Time to compute gradients:" );
  Teuchos::Time pointwise_timer( "Time to do pointwise transformations:" );
  Teuchos::Time moment_timer( "Time to compute moments:" );
  Teuchos::Time gather_timer( "Time to gather dof:" );

  full_timer.start();

  scatter_timer.start();
  for (int k=0; k<numElems; k++) 
    {
      for (int i=0;i<hexBasis.getCardinality();i++) 
        {
          uScattered(k,0,i) = uVals[ltgMapping(k,i)];
        }
    }
  scatter_timer.stop();

  elementwise_timer.start();

  grad_timer.start();
  Intrepid::TensorProductSpaceTools::evaluateGradient<double>( gradU , uScattered ,basisVals , basisDVals );
  grad_timer.stop();
  pointwise_timer.start();
  Intrepid::FunctionSpaceToolsInPlace::HGRADtransformGRAD<double>( gradU , cellJacobian );
  Intrepid::FunctionSpaceToolsInPlace::HGRADtransformGRADDual<double>( gradU , cellJacobian );
  Intrepid::FunctionSpaceToolsInPlace::multiplyMeasure<double>( gradU , cellJacobDet );
  pointwise_timer.stop();
  moment_timer.start();
  Intrepid::TensorProductSpaceTools::momentsGrad<double>( KuScattered , gradU , basisVals , basisDVals , wts_by_dim );
  moment_timer.stop();
  elementwise_timer.stop();
  gather_timer.start();
  for (int k=0;k<numElems;k++)
    {
      for (int i=0;i<hexBasis.getCardinality();i++)
        {
          KuVals[ltgMapping(k,i)] += KuScattered(k,0,i);
        }
    }
  gather_timer.stop();
  full_timer.stop();

  *outStream << full_timer.name() << " " << full_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t" << scatter_timer.name() << " " << scatter_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t" << elementwise_timer.name() << " " << elementwise_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t\t" << grad_timer.name() << " " << grad_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t\t" << pointwise_timer.name() << " " << pointwise_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t\t" << moment_timer.name() << " " << moment_timer.totalElapsedTime() << " sec\n";
  *outStream << "\t" << gather_timer.name() << " " << gather_timer.totalElapsedTime() << " sec\n";


  *outStream << "End Result: TEST PASSED\n";
  
  // reset format state of std::cout
  std::cout.copyfmt(oldFormatState);
  
  return 0;
}
Esempio n. 14
0
int main(int argc, char *argv[]) {
  typedef double                            ST;
  typedef Teuchos::ScalarTraits<ST>        SCT;
  typedef SCT::magnitudeType                MT;
  typedef Tpetra::MultiVector<>             MV;
  typedef Tpetra::Operator<>                OP;
  typedef Belos::MultiVecTraits<ST,MV>     MVT;
  typedef Belos::OperatorTraits<ST,MV,OP>  OPT;
  typedef Tpetra::CrsMatrix<>              CrsMatrix;
  typedef Ifpack2::Preconditioner<>        Prec;

  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

  // ************************* Initialize MPI **************************
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackhole);

  // ************** Get the default communicator and node **************
  RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
  const int myRank = comm->getRank();

  bool verbose = true;
  bool success = true;
  bool proc_verbose = false;
  bool leftprec = true;      // left preconditioning or right.
  int frequency = -1;        // frequency of status test output.
  int numrhs = 1;            // number of right-hand sides to solve for
  int maxiters = -1;         // maximum number of iterations allowed per linear system
  std::string filename("cage4.mtx");
  MT tol = 1.0e-5;           // relative residual tolerance

  // ***************** Read the command line arguments *****************
  Teuchos::CommandLineProcessor cmdp(false,false);
  cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
  cmdp.setOption("left-prec","right-prec",&leftprec,"Left preconditioning or right.");
  cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
  cmdp.setOption("filename",&filename,"Filename for test matrix.  Acceptable file extensions: *.hb,*.mtx,*.triU,*.triS");
  cmdp.setOption("tol",&tol,"Relative residual tolerance used by GMRES solver.");
  cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
  cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }
  if (!verbose)
    frequency = -1;  // reset frequency if test is not verbose
  proc_verbose = verbose && (myRank==0); /* Only print on the zero processor */

  // ************************* Get the problem *************************
  RCP<CrsMatrix> A = Tpetra::MatrixMarket::Reader<CrsMatrix>::readSparseFile(filename,comm);
  RCP<MV> B = rcp(new MV(A->getRowMap(),numrhs,false));
  RCP<MV> X = rcp(new MV(A->getRowMap(),numrhs,false));
  OPT::Apply(*A, *X, *B);
  MVT::MvInit(*X);
  MVT::MvInit(*B,1);

  // ******************** Construct preconditioner *********************
  Ifpack2::Factory factory;
  RCP<Prec> M = factory.create("RELAXATION", A.getConst());
  ParameterList ifpackParams;
  ifpackParams.set("relaxation: type","Jacobi");
  M->setParameters(ifpackParams);
  M->initialize();
  M->compute();

  // ******* Create parameter list for the Belos solver manager ********
  const int NumGlobalElements = MVT::GetGlobalLength(*B);
  if (maxiters == -1)
    maxiters = NumGlobalElements - 1; // maximum number of iterations to run
  //
  ParameterList belosList;
  belosList.set( "Maximum Iterations", maxiters );       // Maximum number of iterations allowed
  belosList.set( "Convergence Tolerance", tol );         // Relative convergence tolerance requested
  if (numrhs > 1) {
    belosList.set( "Show Maximum Residual Norm Only", true );  // Show only the maximum residual norm
  }
  if (verbose) {
    belosList.set( "Verbosity", Belos::Errors + Belos::Warnings +
		   Belos::TimingDetails + Belos::StatusTestDetails );
    if (frequency > 0)
      belosList.set( "Output Frequency", frequency );
  }
  else
    belosList.set( "Verbosity", Belos::Errors + Belos::Warnings + Belos::FinalSummary );

  // ************ Construct a preconditioned linear problem ************
  RCP<Belos::LinearProblem<double,MV,OP> > problem
    = rcp( new Belos::LinearProblem<double,MV,OP>( A, X, B ) );
  if (leftprec) {
    problem->setLeftPrec( M );
  }
  else {
    problem->setRightPrec( M );
  }
  bool set = problem->setProblem();
  if (set == false) {
    if (proc_verbose)
      std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
    return -1;
  }

  // **************** Create an iterative solver manager ***************
  RCP< Belos::PETScSolMgr<double,MV,OP> > solver
    = rcp( new Belos::PETScSolMgr<double,MV,OP>(problem, rcp(&belosList,false)) );
  solver->setCLA(argc,argv);

  // ****************** Start the block CG iteration *******************
  if (proc_verbose) {
    std::cout << std::endl << std::endl;
    std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
    std::cout << "Number of right-hand sides: " << numrhs << std::endl;
    std::cout << "Max number of Krylov iterations: " << maxiters << std::endl;
    std::cout << "Relative residual tolerance: " << tol << std::endl;
    std::cout << std::endl;
  }

  // ************************** Perform solve **************************
  Belos::ReturnType ret = solver->solve();

  // ********************* Compute actual residuals ********************
  bool badRes = false;
  std::vector<double> actual_resids( numrhs );
  std::vector<double> rhs_norm( numrhs );
  RCP<MV> resid = MVT::Clone(*X, numrhs);
  OPT::Apply( *A, *X, *resid );
  MVT::MvAddMv( -1.0, *resid, 1.0, *B, *resid );
  MVT::MvNorm( *resid, actual_resids );
  MVT::MvNorm( *B, rhs_norm );
  if (proc_verbose) {
    std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
    for ( int i=0; i<numrhs; i++) {
      double actRes = actual_resids[i]/rhs_norm[i];
      std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
      if (actRes > tol) badRes = true;
    }
  }

if (ret!=Belos::Converged || badRes) {
  success = false;
  if (proc_verbose)
    std::cout << std::endl << "ERROR:  Belos did not converge!" << std::endl;
} else {
  success = true;
  if (proc_verbose)
    std::cout << std::endl << "SUCCESS:  Belos converged!" << std::endl;
}

return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
void Piro::RythmosSolver<Scalar>::initialize(
#endif
    const Teuchos::RCP<Teuchos::ParameterList> &appParams,
    const Teuchos::RCP< Thyra::ModelEvaluator<Scalar> > &in_model,
    const Teuchos::RCP<Rythmos::IntegrationObserverBase<Scalar> > &observer)
{

    using Teuchos::ParameterList;
    using Teuchos::parameterList;
    using Teuchos::RCP;
    using Teuchos::rcp;

    // set some internals
    model = in_model;
    num_p = in_model->Np();
    num_g = in_model->Ng();

    //
    *out << "\nA) Get the base parameter list ...\n";
    //


    if (appParams->isSublist("Rythmos")) {
        RCP<Teuchos::ParameterList> rythmosPL = sublist(appParams, "Rythmos", true);
        rythmosPL->validateParameters(*getValidRythmosParameters(),0);

        {
            const std::string verbosity = rythmosPL->get("Verbosity Level", "VERB_DEFAULT");
            if      (verbosity == "VERB_NONE")    solnVerbLevel = Teuchos::VERB_NONE;
            else if (verbosity == "VERB_DEFAULT") solnVerbLevel = Teuchos::VERB_DEFAULT;
            else if (verbosity == "VERB_LOW")     solnVerbLevel = Teuchos::VERB_LOW;
            else if (verbosity == "VERB_MEDIUM")  solnVerbLevel = Teuchos::VERB_MEDIUM;
            else if (verbosity == "VERB_HIGH")    solnVerbLevel = Teuchos::VERB_HIGH;
            else if (verbosity == "VERB_EXTREME") solnVerbLevel = Teuchos::VERB_EXTREME;
            else TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,"Unknown verbosity option specified in Piro_RythmosSolver.");
        }

        t_initial = rythmosPL->get("Initial Time", 0.0);
        t_final = rythmosPL->get("Final Time", 0.1);

        const std::string stepperType = rythmosPL->get("Stepper Type", "Backward Euler");

        //
        *out << "\nC) Create and initalize the forward model ...\n";
        //

        *out << "\nD) Create the stepper and integrator for the forward problem ...\n";
        //

        if (rythmosPL->get<std::string>("Nonlinear Solver Type") == "Rythmos") {
            Teuchos::RCP<Rythmos::TimeStepNonlinearSolver<Scalar> > rythmosTimeStepSolver =
                Rythmos::timeStepNonlinearSolver<Scalar>();
            if (rythmosPL->getEntryPtr("NonLinear Solver")) {
                RCP<Teuchos::ParameterList> nonlinePL =
                    sublist(rythmosPL, "NonLinear Solver", true);
                rythmosTimeStepSolver->setParameterList(nonlinePL);
            }
            fwdTimeStepSolver = rythmosTimeStepSolver;
        }
        else if (rythmosPL->get<std::string>("Nonlinear Solver Type") == "NOX") {
#ifdef HAVE_PIRO_NOX
            Teuchos::RCP<Thyra::NOXNonlinearSolver> nox_solver =  Teuchos::rcp(new Thyra::NOXNonlinearSolver);
            Teuchos::RCP<Teuchos::ParameterList> nox_params = Teuchos::rcp(new Teuchos::ParameterList);
            *nox_params = appParams->sublist("NOX");
            nox_solver->setParameterList(nox_params);
            fwdTimeStepSolver = nox_solver;
#else
            TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,"Requested NOX solver for a Rythmos Transient solve, Trilinos was not built with NOX enabled.  Please rebuild Trilinos or use the native Rythmos nonlinear solver.");
#endif

        }

        if (stepperType == "Backward Euler") {
            fwdStateStepper = Rythmos::backwardEulerStepper<Scalar> (model, fwdTimeStepSolver);
            fwdStateStepper->setParameterList(sublist(rythmosPL, "Rythmos Stepper", true));
        }
        else if (stepperType == "Forward Euler") {
            fwdStateStepper = Rythmos::forwardEulerStepper<Scalar> (model);
            fwdStateStepper->setParameterList(sublist(rythmosPL, "Rythmos Stepper", true));
        }
        else if (stepperType == "Explicit RK") {
            fwdStateStepper = Rythmos::explicitRKStepper<Scalar>(model);
            fwdStateStepper->setParameterList(sublist(rythmosPL, "Rythmos Stepper", true));
        }
        else if (stepperType == "BDF") {
            Teuchos::RCP<Teuchos::ParameterList> BDFparams =
                Teuchos::sublist(rythmosPL, "Rythmos Stepper", true);
            Teuchos::RCP<Teuchos::ParameterList> BDFStepControlPL =
                Teuchos::sublist(BDFparams,"Step Control Settings");

            fwdStateStepper = Teuchos::rcp( new Rythmos::ImplicitBDFStepper<Scalar>(model,fwdTimeStepSolver,BDFparams) );
            fwdStateStepper->setInitialCondition(model->getNominalValues());

        }
        else {
            // first (before failing) check to see if the user has added stepper factory
            typename std::map<std::string,Teuchos::RCP<Piro::RythmosStepperFactory<Scalar> > >::const_iterator
            stepFactItr = stepperFactories.find(stepperType);
            if(stepFactItr!=stepperFactories.end()) {
                // the user has added it, hot dog lets build a new stepper!
                Teuchos::RCP<Teuchos::ParameterList> stepperParams = Teuchos::sublist(rythmosPL, "Rythmos Stepper", true);

                // build the stepper using the factory
                fwdStateStepper = stepFactItr->second->buildStepper(model,fwdTimeStepSolver,stepperParams);

                // the user decided to override the model being used (let them)
                if(fwdStateStepper->getModel()!=model && fwdStateStepper->getModel()!=Teuchos::null) {
                    model = Teuchos::rcp_const_cast<Thyra::ModelEvaluator<Scalar> >(fwdStateStepper->getModel());

                    num_p = in_model->Np();
                    num_g = in_model->Ng();
                }
            }
            else {
                TEUCHOS_TEST_FOR_EXCEPTION(
                    true, Teuchos::Exceptions::InvalidParameter,
                    std::endl << "Error! Piro::RythmosSolver: Invalid Steper Type: "
                    << stepperType << std::endl);
            }
        }

        // Step control strategy
        {
            // If the stepper can accept a step control strategy, then attempt to build one.
            RCP<Rythmos::StepControlStrategyAcceptingStepperBase<Scalar> > scsa_stepper =
                Teuchos::rcp_dynamic_cast<Rythmos::StepControlStrategyAcceptingStepperBase<Scalar> >(fwdStateStepper);

            if (Teuchos::nonnull(scsa_stepper)) {
                const std::string step_control_strategy = rythmosPL->get("Step Control Strategy Type", "None");

                if (step_control_strategy == "None") {
                    // don't do anything, stepper will build default
                } else if (step_control_strategy == "ImplicitBDFRamping") {

                    const RCP<Rythmos::ImplicitBDFStepperRampingStepControl<Scalar> > rscs =
                        rcp(new Rythmos::ImplicitBDFStepperRampingStepControl<Scalar>);

                    const RCP<ParameterList> p = parameterList(rythmosPL->sublist("Rythmos Step Control Strategy"));
                    rscs->setParameterList(p);

                    scsa_stepper->setStepControlStrategy(rscs);
                }
                else {
                    // first (before failing) check to see if the user has added step control factory
                    typename std::map<std::string,Teuchos::RCP<Piro::RythmosStepControlFactory<Scalar> > >::const_iterator
                    stepControlFactItr = stepControlFactories.find(step_control_strategy);
                    if (stepControlFactItr != stepControlFactories.end())
                    {

                        const RCP<Rythmos::StepControlStrategyBase<Scalar> > rscs = stepControlFactItr->second->buildStepControl();

                        const RCP<ParameterList> p = parameterList(rythmosPL -> sublist("Rythmos Step Control Strategy"));

                        rscs->setParameterList(p);

                        scsa_stepper->setStepControlStrategy(rscs);
                    }
                    else {
                        TEUCHOS_TEST_FOR_EXCEPTION(
                            true, std::logic_error,
                            "Error! Piro::RythmosSolver: Invalid step control strategy type: "
                            << step_control_strategy << std::endl);
                    }
                }
            }
        }
        {
            const RCP<Teuchos::ParameterList> integrationControlPL =
                Teuchos::sublist(rythmosPL, "Rythmos Integration Control", true);

            RCP<Rythmos::DefaultIntegrator<Scalar> > defaultIntegrator;
            if (rythmosPL->get("Rythmos Integration Control Strategy", "Simple") == "Simple") {
                defaultIntegrator = Rythmos::controlledDefaultIntegrator<Scalar>(Rythmos::simpleIntegrationControlStrategy<Scalar>(integrationControlPL));
            }
            else if(rythmosPL->get<std::string>("Rythmos Integration Control Strategy") == "Ramping") {
                defaultIntegrator = Rythmos::controlledDefaultIntegrator<Scalar>(Rythmos::rampingIntegrationControlStrategy<Scalar>(integrationControlPL));
            }
            fwdStateIntegrator = defaultIntegrator;
        }

        fwdStateIntegrator->setParameterList(sublist(rythmosPL, "Rythmos Integrator", true));

        if (Teuchos::nonnull(observer)) {
            fwdStateIntegrator->setIntegrationObserver(observer);
        }
    }

    else if (appParams->isSublist("Rythmos Solver")) {
        /** New parameter list format **/
        RCP<Teuchos::ParameterList> rythmosSolverPL = sublist(appParams, "Rythmos Solver", true);
        RCP<Teuchos::ParameterList> rythmosPL = sublist(rythmosSolverPL, "Rythmos", true);

        {
            const std::string verbosity = rythmosSolverPL->get("Verbosity Level", "VERB_DEFAULT");
            if      (verbosity == "VERB_NONE")    solnVerbLevel = Teuchos::VERB_NONE;
            else if (verbosity == "VERB_DEFAULT") solnVerbLevel = Teuchos::VERB_DEFAULT;
            else if (verbosity == "VERB_LOW")     solnVerbLevel = Teuchos::VERB_LOW;
            else if (verbosity == "VERB_MEDIUM")  solnVerbLevel = Teuchos::VERB_MEDIUM;
            else if (verbosity == "VERB_HIGH")    solnVerbLevel = Teuchos::VERB_HIGH;
            else if (verbosity == "VERB_EXTREME") solnVerbLevel = Teuchos::VERB_EXTREME;
            else TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
                                                "Unknown verbosity option specified in Piro_RythmosSolver.");
        }

        t_initial = rythmosPL->sublist("Integrator Settings").get("Initial Time", 0.0);
        t_final = rythmosPL->sublist("Integrator Settings").get("Final Time", 0.1);

        const std::string stepperType = rythmosPL->sublist("Stepper Settings")
                                        .sublist("Stepper Selection").get("Stepper Type", "Backward Euler");
        //
        //    *out << "\nB) Create the Stratimikos linear solver factory ...\n";
        //
        // This is the linear solve strategy that will be used to solve for the
        // linear system with the W.
        //
        Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;

#ifdef HAVE_PIRO_IFPACK2
        typedef Thyra::PreconditionerFactoryBase<double> Base;
#ifdef ALBANY_BUILD
        typedef Thyra::Ifpack2PreconditionerFactory<Tpetra::CrsMatrix<double, LocalOrdinal, GlobalOrdinal, Node> > Impl;
#else
        typedef Thyra::Ifpack2PreconditionerFactory<Tpetra::CrsMatrix<double> > Impl;
#endif
        linearSolverBuilder.setPreconditioningStrategyFactory(Teuchos::abstractFactoryStd<Base, Impl>(), "Ifpack2");
#endif
#ifdef HAVE_PIRO_MUELU
#ifdef ALBANY_BUILD
        Stratimikos::enableMueLu<LocalOrdinal, GlobalOrdinal, Node>(linearSolverBuilder);
#else
        Stratimikos::enableMueLu(linearSolverBuilder);
#endif
#endif

        linearSolverBuilder.setParameterList(sublist(rythmosSolverPL, "Stratimikos", true));
        rythmosSolverPL->validateParameters(*getValidRythmosSolverParameters(),0);
        RCP<Thyra::LinearOpWithSolveFactoryBase<double> > lowsFactory =
            createLinearSolveStrategy(linearSolverBuilder);
        //
        *out << "\nC) Create and initalize the forward model ...\n";
        //
        // C.1) Create the underlying EpetraExt::ModelEvaluator
        // already constructed as "model". Decorate if needed.
        // TODO: Generelize to any explicit method, option to invert mass matrix
        if (stepperType == "Explicit RK") {
            if (rythmosSolverPL->get("Invert Mass Matrix", false)) {
                Teuchos::RCP<Thyra::ModelEvaluator<Scalar> > origModel = model;
                rythmosSolverPL->get("Lump Mass Matrix", false);  //JF line does not do anything
                model = Teuchos::rcp(new Piro::InvertMassMatrixDecorator<Scalar>(
                                         sublist(rythmosSolverPL,"Stratimikos", true), origModel,
                                         true,rythmosSolverPL->get("Lump Mass Matrix", false),false));
            }
        }
        // C.2) Create the Thyra-wrapped ModelEvaluator

        thyraModel = rcp(new Thyra::DefaultModelEvaluatorWithSolveFactory<Scalar>(model, lowsFactory));

        const RCP<const Thyra::VectorSpaceBase<double> > x_space =
            thyraModel->get_x_space();

        //
        *out << "\nD) Create the stepper and integrator for the forward problem ...\n";
        //
        fwdTimeStepSolver = Rythmos::timeStepNonlinearSolver<double>();

        if (rythmosSolverPL->getEntryPtr("NonLinear Solver")) {
            const RCP<Teuchos::ParameterList> nonlinePL =
                sublist(rythmosSolverPL, "NonLinear Solver", true);
            fwdTimeStepSolver->setParameterList(nonlinePL);
        }
        // Force Default Integrator since this is needed for Observers
        rythmosPL->sublist("Integrator Settings").sublist("Integrator Selection").
        set("Integrator Type","Default Integrator");

        RCP<Rythmos::IntegratorBuilder<double> > ib = Rythmos::integratorBuilder<double>();
        ib->setParameterList(rythmosPL);
        Thyra::ModelEvaluatorBase::InArgs<double> ic = thyraModel->getNominalValues();
        RCP<Rythmos::IntegratorBase<double> > integrator = ib->create(thyraModel,ic,fwdTimeStepSolver);
        fwdStateIntegrator = Teuchos::rcp_dynamic_cast<Rythmos::DefaultIntegrator<double> >(integrator,true);

        fwdStateStepper = fwdStateIntegrator->getNonconstStepper();

        if (Teuchos::nonnull(observer))
            fwdStateIntegrator->setIntegrationObserver(observer);

    }
    else {
        TEUCHOS_TEST_FOR_EXCEPTION(
            appParams->isSublist("Rythmos") || appParams->isSublist("Rythmos Solver"),
            Teuchos::Exceptions::InvalidParameter, std::endl <<
            "Error! Piro::RythmosSolver: must have either Rythmos or Rythmos Solver sublist ");

    }

    isInitialized = true;
}
 MueLuPreconditionerFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::MueLuPreconditionerFactory() :
     paramList_(rcp(new ParameterList()))
 {}
Teuchos::RCP< std::vector< Teuchos::RCP<PHX::Evaluator<panzer::Traits> > > > 
user_app::MyModelFactory<EvalT>::
buildClosureModels(const std::string& model_id,
		   const Teuchos::ParameterList& models, 
		   const panzer::FieldLayoutLibrary& fl,
		   const Teuchos::RCP<panzer::IntegrationRule>& ir,
		   const Teuchos::ParameterList& default_params,
		   const Teuchos::ParameterList& user_data,
		   const Teuchos::RCP<panzer::GlobalData>& global_data,
		   PHX::FieldManager<panzer::Traits>& fm) const
{

  using std::string;
  using std::vector;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ParameterList;
  using PHX::Evaluator;

  RCP< vector< RCP<Evaluator<panzer::Traits> > > > evaluators = 
    rcp(new vector< RCP<Evaluator<panzer::Traits> > > );

  if (!models.isSublist(model_id)) {
    models.print(std::cout);
    std::stringstream msg;
    msg << "Falied to find requested model, \"" << model_id 
	<< "\", for equation set:\n" << std::endl;
    TEUCHOS_TEST_FOR_EXCEPTION(!models.isSublist(model_id), std::logic_error, msg.str());
  }

  std::vector<Teuchos::RCP<const panzer::PureBasis> > bases;
  fl.uniqueBases(bases);

  const ParameterList& my_models = models.sublist(model_id);

  for (ParameterList::ConstIterator model_it = my_models.begin(); 
       model_it != my_models.end(); ++model_it) {
    
    bool found = false;
    
    const std::string key = model_it->first;
    ParameterList input;
    const Teuchos::ParameterEntry& entry = model_it->second;
    const ParameterList& plist = Teuchos::getValue<Teuchos::ParameterList>(entry);

    #ifdef HAVE_STOKHOS
    if (plist.isType<double>("Value") && plist.isType<double>("UQ") 
                           && plist.isParameter("Expansion")
                           && (typeid(EvalT)==typeid(panzer::Traits::SGResidual) || 
                               typeid(EvalT)==typeid(panzer::Traits::SGJacobian)) ) {
      { // at IP
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	input.set("UQ", plist.get<double>("UQ"));
	input.set("Expansion", plist.get<Teuchos::RCP<Stokhos::OrthogPolyExpansion<int,double> > >("Expansion"));
	input.set("Data Layout", ir->dl_scalar);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new user_app::ConstantModel<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      
      for (std::vector<Teuchos::RCP<const panzer::PureBasis> >::const_iterator basis_itr = bases.begin();
	   basis_itr != bases.end(); ++basis_itr) { // at BASIS
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	input.set("UQ", plist.get<double>("UQ"));
	input.set("Expansion", plist.get<Teuchos::RCP<Stokhos::OrthogPolyExpansion<int,double> > >("Expansion"));
	Teuchos::RCP<const panzer::BasisIRLayout> basis = basisIRLayout(*basis_itr,*ir);
	input.set("Data Layout", basis->functional);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new user_app::ConstantModel<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      found = true;
    }
    else 
    #endif
    if (plist.isType<std::string>("Type")) {
      
      if (plist.get<std::string>("Type") == "Parameter") {
	{ // at IP
	  RCP< Evaluator<panzer::Traits> > e = 
	    rcp(new panzer::Parameter<EvalT,panzer::Traits>(key,ir->dl_scalar,plist.get<double>("Value"),*global_data->pl));
	  evaluators->push_back(e);
	}
	
	for (std::vector<Teuchos::RCP<const panzer::PureBasis> >::const_iterator basis_itr = bases.begin();
	   basis_itr != bases.end(); ++basis_itr) { // at BASIS
	  Teuchos::RCP<const panzer::BasisIRLayout> basis = basisIRLayout(*basis_itr,*ir);
	  RCP< Evaluator<panzer::Traits> > e = 
	    rcp(new panzer::Parameter<EvalT,panzer::Traits>(key,basis->functional,plist.get<double>("Value"),*global_data->pl));
	  evaluators->push_back(e);
	}
	
	found = true;
      }
  
    }
    else if (plist.isType<double>("Value")) {
      { // at IP
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	input.set("Data Layout", ir->dl_scalar);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new user_app::ConstantModel<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      // at BASIS
      for (std::vector<Teuchos::RCP<const panzer::PureBasis> >::const_iterator basis_itr = bases.begin();
	   basis_itr != bases.end(); ++basis_itr) {
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	Teuchos::RCP<const panzer::BasisIRLayout> basis = basisIRLayout(*basis_itr,*ir);
	input.set("Data Layout", basis->functional);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new user_app::ConstantModel<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      found = true;
    }

    if (plist.isType<std::string>("Value")) {
    
      const std::string value = plist.get<std::string>("Value");

      if (key == "Global Statistics") {
	if (typeid(EvalT) == typeid(panzer::Traits::Residual)) {
	  input.set("Comm", user_data.get<Teuchos::RCP<const Teuchos::Comm<int> > >("Comm"));
	  input.set("Names", value);
	  input.set("IR", ir);
	  input.set("Global Data", global_data);
	  RCP< panzer::GlobalStatistics<EvalT,panzer::Traits> > e = 
	    rcp(new panzer::GlobalStatistics<EvalT,panzer::Traits>(input));
	  evaluators->push_back(e);
	  
	  // Require certain fields be evaluated
	  fm.template requireField<EvalT>(e->getRequiredFieldTag());
	}
	found = true;
      }

    }

    if (key == "Volume Integral") {

        {
           ParameterList input;
	   input.set("Name", "Unit Value");
	   input.set("Value", 1.0);
	   input.set("Data Layout", ir->dl_scalar);
	   RCP< Evaluator<panzer::Traits> > e = 
   	     rcp(new user_app::ConstantModel<EvalT,panzer::Traits>(input));
   	   evaluators->push_back(e);
        }

        {
           ParameterList input;
	   input.set("Integral Name", "Volume_Integral");
	   input.set("Integrand Name", "Unit Value");
	   input.set("IR", ir);

	   RCP< Evaluator<panzer::Traits> > e = 
   	     rcp(new panzer::Integrator_Scalar<EvalT,panzer::Traits>(input));
   	   evaluators->push_back(e);
        }

	found = true;
    }

    if (key == "Coordinates") {
      std::string dim_str[3] = {"X","Y","Z"};
      panzer::CellData cell_data(ir->workset_size,ir->topology);
      panzer::PureBasis basis("HGrad",1,cell_data);

      for(int i=0;i<basis.dimension();i++) {
        ParameterList input;
        input.set("Field Name", "COORD"+dim_str[i]);
        input.set("Data Layout", basis.functional);
        input.set("Dimension", i);

        RCP< Evaluator<panzer::Traits> > e = 
          rcp(new panzer::CoordinatesEvaluator<EvalT,panzer::Traits>(input));
        evaluators->push_back(e);
      }

      found = true;
    }


    if (!found) {
      std::stringstream msg;
      msg << "ClosureModelFactory failed to build evaluator for key \"" << key 
	  << "\"\nin model \"" << model_id 
	  << "\".  Please correct the type or add support to the \nfactory." <<std::endl;
      TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error, msg.str());
    }

  }

  return evaluators;
}
  void MueLuPreconditionerFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
  initializePrec(const RCP<const LinearOpSourceBase<Scalar> >& fwdOpSrc, PreconditionerBase<Scalar>* prec, const ESupportSolveUse supportSolveUse) const {
    using Teuchos::rcp_dynamic_cast;

    // we are using typedefs here, since we are using objects from different packages (Xpetra, Thyra,...)
    typedef Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node>                     XpMap;
    typedef Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>      XpOp;
    typedef Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>       XpThyUtils;
    typedef Xpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>        XpCrsMat;
    typedef Xpetra::BlockedCrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> XpBlockedCrsMat;
    typedef Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>           XpMat;
    typedef Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>      XpMultVec;
    typedef Xpetra::MultiVector<double,LocalOrdinal,GlobalOrdinal,Node>      XpMultVecDouble;
    typedef Thyra::LinearOpBase<Scalar>                                      ThyLinOpBase;
#ifdef HAVE_MUELU_TPETRA
    typedef MueLu::TpetraOperator<Scalar,LocalOrdinal,GlobalOrdinal,Node> MueTpOp;
    typedef Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node>      TpOp;
    typedef Thyra::TpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node> ThyTpLinOp;
#endif

    // Check precondition
    TEUCHOS_ASSERT(Teuchos::nonnull(fwdOpSrc));
    TEUCHOS_ASSERT(this->isCompatible(*fwdOpSrc));
    TEUCHOS_ASSERT(prec);

    // Create a copy, as we may remove some things from the list
    ParameterList paramList = *paramList_;

    // Retrieve wrapped concrete Xpetra matrix from FwdOp
    const RCP<const ThyLinOpBase> fwdOp = fwdOpSrc->getOp();
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(fwdOp));

    // Check whether it is Epetra/Tpetra
    bool bIsEpetra  = XpThyUtils::isEpetra(fwdOp);
    bool bIsTpetra  = XpThyUtils::isTpetra(fwdOp);
    bool bIsBlocked = XpThyUtils::isBlockedOperator(fwdOp);
    TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == true  && bIsTpetra == true));
    TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra == bIsTpetra) && bIsBlocked == false);
    TEUCHOS_TEST_FOR_EXCEPT((bIsEpetra != bIsTpetra) && bIsBlocked == true);

    RCP<XpMat> A = Teuchos::null;
    if(bIsBlocked) {
      Teuchos::RCP<const Thyra::BlockedLinearOpBase<Scalar> > ThyBlockedOp =
          Teuchos::rcp_dynamic_cast<const Thyra::BlockedLinearOpBase<Scalar> >(fwdOp);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(ThyBlockedOp));

      TEUCHOS_TEST_FOR_EXCEPT(ThyBlockedOp->blockExists(0,0)==false);

      Teuchos::RCP<const LinearOpBase<Scalar> > b00 = ThyBlockedOp->getBlock(0,0);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(b00));

      RCP<const XpCrsMat > xpetraFwdCrsMat00 = XpThyUtils::toXpetra(b00);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(xpetraFwdCrsMat00));

      // MueLu needs a non-const object as input
      RCP<XpCrsMat> xpetraFwdCrsMatNonConst00 = Teuchos::rcp_const_cast<XpCrsMat>(xpetraFwdCrsMat00);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(xpetraFwdCrsMatNonConst00));

      // wrap the forward operator as an Xpetra::Matrix that MueLu can work with
      RCP<XpMat> A00 = rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xpetraFwdCrsMatNonConst00));
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A00));

      RCP<const XpMap> rowmap00 = A00->getRowMap();
      RCP< const Teuchos::Comm< int > > comm = rowmap00->getComm();

      // create a Xpetra::BlockedCrsMatrix which derives from Xpetra::Matrix that MueLu can work with
      RCP<XpBlockedCrsMat> bMat = Teuchos::rcp(new XpBlockedCrsMat(ThyBlockedOp, comm));
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(bMat));

      // save blocked matrix
      A = bMat;
    } else {
      RCP<const XpCrsMat > xpetraFwdCrsMat = XpThyUtils::toXpetra(fwdOp);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(xpetraFwdCrsMat));

      // MueLu needs a non-const object as input
      RCP<XpCrsMat> xpetraFwdCrsMatNonConst = Teuchos::rcp_const_cast<XpCrsMat>(xpetraFwdCrsMat);
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(xpetraFwdCrsMatNonConst));

      // wrap the forward operator as an Xpetra::Matrix that MueLu can work with
      A = rcp(new Xpetra::CrsMatrixWrap<Scalar,LocalOrdinal,GlobalOrdinal,Node>(xpetraFwdCrsMatNonConst));
    }
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A));

    // Retrieve concrete preconditioner object
    const Teuchos::Ptr<DefaultPreconditioner<Scalar> > defaultPrec = Teuchos::ptr(dynamic_cast<DefaultPreconditioner<Scalar> *>(prec));
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(defaultPrec));

    // extract preconditioner operator
    RCP<ThyLinOpBase> thyra_precOp = Teuchos::null;
    thyra_precOp = rcp_dynamic_cast<Thyra::LinearOpBase<Scalar> >(defaultPrec->getNonconstUnspecifiedPrecOp(), true);

    // Variable for multigrid hierarchy: either build a new one or reuse the existing hierarchy
    RCP<MueLu::Hierarchy<Scalar,LocalOrdinal,GlobalOrdinal,Node> > H = Teuchos::null;

    // make a decision whether to (re)build the multigrid preconditioner or reuse the old one
    // rebuild preconditioner if startingOver == true
    // reuse preconditioner if startingOver == false
    const bool startingOver = (thyra_precOp.is_null() || !paramList.isParameter("reuse: type") || paramList.get<std::string>("reuse: type") == "none");

    if (startingOver == true) {
      // extract coordinates from parameter list
      Teuchos::RCP<XpMultVecDouble> coordinates = Teuchos::null;
      coordinates = MueLu::Utilities<Scalar,LocalOrdinal,GlobalOrdinal,Node>::ExtractCoordinatesFromParameterList(paramList);

      // TODO check for Xpetra or Thyra vectors?
      RCP<XpMultVec> nullspace = Teuchos::null;
#ifdef HAVE_MUELU_TPETRA
      if (bIsTpetra) {
        typedef Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tMV;
        RCP<tMV> tpetra_nullspace = Teuchos::null;
        if (paramList.isType<Teuchos::RCP<tMV> >("Nullspace")) {
          tpetra_nullspace = paramList.get<RCP<tMV> >("Nullspace");
          paramList.remove("Nullspace");
          nullspace = MueLu::TpetraMultiVector_To_XpetraMultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(tpetra_nullspace);
          TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(nullspace));
        }
      }
#endif
      // build a new MueLu hierarchy
      H = MueLu::CreateXpetraPreconditioner(A, paramList, coordinates, nullspace);

    } else {
      // reuse old MueLu hierarchy stored in MueLu Tpetra/Epetra operator and put in new matrix

      // get old MueLu hierarchy
#if defined(HAVE_MUELU_TPETRA)
      if (bIsTpetra) {

        RCP<ThyTpLinOp> tpetr_precOp = rcp_dynamic_cast<ThyTpLinOp>(thyra_precOp);
        RCP<MueTpOp>    muelu_precOp = rcp_dynamic_cast<MueTpOp>(tpetr_precOp->getTpetraOperator(),true);

        H = muelu_precOp->GetHierarchy();
      }
#endif
      // TODO add the blocked matrix case here...

      TEUCHOS_TEST_FOR_EXCEPTION(!H->GetNumLevels(), MueLu::Exceptions::RuntimeError,
                                 "Thyra::MueLuPreconditionerFactory: Hierarchy has no levels in it");
      TEUCHOS_TEST_FOR_EXCEPTION(!H->GetLevel(0)->IsAvailable("A"), MueLu::Exceptions::RuntimeError,
                                 "Thyra::MueLuPreconditionerFactory: Hierarchy has no fine level operator");
      RCP<MueLu::Level> level0 = H->GetLevel(0);
      RCP<XpOp>    O0 = level0->Get<RCP<XpOp> >("A");
      RCP<XpMat>   A0 = rcp_dynamic_cast<XpMat>(O0);

      if (!A0.is_null()) {
        // If a user provided a "number of equations" argument in a parameter list
        // during the initial setup, we must honor that settings and reuse it for
        // all consequent setups.
        A->SetFixedBlockSize(A0->GetFixedBlockSize());
      }

      // set new matrix
      level0->Set("A", A);

      H->SetupRe();
    }

    // wrap hierarchy H in thyraPrecOp
    RCP<ThyLinOpBase > thyraPrecOp = Teuchos::null;
#if defined(HAVE_MUELU_TPETRA)
    if (bIsTpetra) {
      RCP<MueTpOp> muelu_tpetraOp = rcp(new MueTpOp(H));
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(muelu_tpetraOp));
      RCP<TpOp> tpOp = Teuchos::rcp_dynamic_cast<TpOp>(muelu_tpetraOp);
      thyraPrecOp = Thyra::createLinearOp<Scalar, LocalOrdinal, GlobalOrdinal, Node>(tpOp);
    }
#endif

    if(bIsBlocked) {
      TEUCHOS_TEST_FOR_EXCEPT(Teuchos::nonnull(thyraPrecOp));

      typedef MueLu::XpetraOperator<Scalar,LocalOrdinal,GlobalOrdinal,Node>    MueXpOp;
      //typedef Thyra::XpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>    ThyXpLinOp; // unused
      const RCP<MueXpOp> muelu_xpetraOp = rcp(new MueXpOp(H));

      RCP<const VectorSpaceBase<Scalar> > thyraRangeSpace  = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyra(muelu_xpetraOp->getRangeMap());
      RCP<const VectorSpaceBase<Scalar> > thyraDomainSpace = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyra(muelu_xpetraOp->getDomainMap());

      RCP <Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node> > xpOp = Teuchos::rcp_dynamic_cast<Xpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(muelu_xpetraOp);
      thyraPrecOp = Thyra::xpetraLinearOp(thyraRangeSpace, thyraDomainSpace,xpOp);
    }

    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(thyraPrecOp));

    defaultPrec->initializeUnspecified(thyraPrecOp);

  }
OverlappingRowMatrix<MatrixType>::
OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
                      const int overlapLevel) :
  A_ (A),
  OverlapLevel_ (overlapLevel),
  UseSubComm_ (false)
{
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::Array;
  using Teuchos::outArg;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::rcp_implicit_cast;
  using Teuchos::REDUCE_SUM;
  using Teuchos::reduceAll;
  typedef Tpetra::global_size_t GST;
  typedef Tpetra::CrsGraph<local_ordinal_type,
                           global_ordinal_type, node_type> crs_graph_type;
  TEUCHOS_TEST_FOR_EXCEPTION(
    OverlapLevel_ <= 0, std::runtime_error,
    "Ifpack2::OverlappingRowMatrix: OverlapLevel must be > 0.");
  TEUCHOS_TEST_FOR_EXCEPTION(
    A_->getComm()->getSize() == 1, std::runtime_error,
    "Ifpack2::OverlappingRowMatrix: Matrix must be "
    "distributed over more than one MPI process.");

  RCP<const crs_matrix_type> ACRS =
    rcp_dynamic_cast<const crs_matrix_type, const row_matrix_type> (A_);
  TEUCHOS_TEST_FOR_EXCEPTION(
    ACRS.is_null (), std::runtime_error,
    "Ifpack2::OverlappingRowMatrix: The input matrix must be a Tpetra::"
    "CrsMatrix with matching template parameters.  This class currently "
    "requires that CrsMatrix's fifth template parameter be the default.");
  RCP<const crs_graph_type> A_crsGraph = ACRS->getCrsGraph ();

  const size_t numMyRowsA = A_->getNodeNumRows ();
  const global_ordinal_type global_invalid =
    Teuchos::OrdinalTraits<global_ordinal_type>::invalid ();

  // Temp arrays
  Array<global_ordinal_type> ExtElements;
  RCP<map_type>        TmpMap;
  RCP<crs_graph_type>  TmpGraph;
  RCP<import_type>     TmpImporter;
  RCP<const map_type>  RowMap, ColMap;

  // The big import loop
  for (int overlap = 0 ; overlap < OverlapLevel_ ; ++overlap) {
    // Get the current maps
    if (overlap == 0) {
      RowMap = A_->getRowMap ();
      ColMap = A_->getColMap ();
    }
    else {
      RowMap = TmpGraph->getRowMap ();
      ColMap = TmpGraph->getColMap ();
    }

    const size_t size = ColMap->getNodeNumElements () - RowMap->getNodeNumElements ();
    Array<global_ordinal_type> mylist (size);
    size_t count = 0;

    // define the set of rows that are in ColMap but not in RowMap
    for (local_ordinal_type i = 0 ; (size_t) i < ColMap->getNodeNumElements() ; ++i) {
      const global_ordinal_type GID = ColMap->getGlobalElement (i);
      if (A_->getRowMap ()->getLocalElement (GID) == global_invalid) {
        typedef typename Array<global_ordinal_type>::iterator iter_type;
        const iter_type end = ExtElements.end ();
        const iter_type pos = std::find (ExtElements.begin (), end, GID);
        if (pos == end) {
          ExtElements.push_back (GID);
          mylist[count] = GID;
          ++count;
        }
      }
    }

    // mfh 24 Nov 2013: We don't need TmpMap, TmpGraph, or
    // TmpImporter after this loop, so we don't have to construct them
    // on the last round.
    if (overlap + 1 < OverlapLevel_) {
      // Allocate & import new matrices, maps, etc.
      //
      // FIXME (mfh 24 Nov 2013) Do we always want to use index base
      // zero?  It doesn't really matter, since the actual index base
      // (in the current implementation of Map) will always be the
      // globally least GID.
      TmpMap = rcp (new map_type (global_invalid, mylist (0, count),
                                  Teuchos::OrdinalTraits<global_ordinal_type>::zero (),
                                  A_->getComm (), A_->getNode ()));
      TmpGraph = rcp (new crs_graph_type (TmpMap, 0));
      TmpImporter = rcp (new import_type (A_->getRowMap (), TmpMap));

      TmpGraph->doImport (*A_crsGraph, *TmpImporter, Tpetra::INSERT);
      TmpGraph->fillComplete (A_->getDomainMap (), TmpMap);
    }
  }

  // build the map containing all the nodes (original
  // matrix + extended matrix)
  Array<global_ordinal_type> mylist (numMyRowsA + ExtElements.size ());
  for (local_ordinal_type i = 0; (size_t)i < numMyRowsA; ++i) {
    mylist[i] = A_->getRowMap ()->getGlobalElement (i);
  }
  for (local_ordinal_type i = 0; i < ExtElements.size (); ++i) {
    mylist[i + numMyRowsA] = ExtElements[i];
  }

  RowMap_ = rcp (new map_type (global_invalid, mylist (),
                               Teuchos::OrdinalTraits<global_ordinal_type>::zero (),
                               A_->getComm (), A_->getNode ()));
  ColMap_ = RowMap_;

  // now build the map corresponding to all the external nodes
  // (with respect to A().RowMatrixRowMap().
  ExtMap_ = rcp (new map_type (global_invalid, ExtElements (),
                               Teuchos::OrdinalTraits<global_ordinal_type>::zero (),
                               A_->getComm (), A_->getNode ()));
  ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
  ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_));

  RCP<crs_matrix_type> ExtMatrixCRS =
    rcp_dynamic_cast<crs_matrix_type, row_matrix_type> (ExtMatrix_);
  ExtMatrixCRS->doImport (*ACRS, *ExtImporter_, Tpetra::INSERT);
  ExtMatrixCRS->fillComplete (A_->getDomainMap (), RowMap_);

  Importer_ = rcp (new import_type (A_->getRowMap (), RowMap_));

  // fix indices for overlapping matrix
  const size_t numMyRowsB = ExtMatrix_->getNodeNumRows ();

  GST NumMyNonzeros_tmp = A_->getNodeNumEntries () + ExtMatrix_->getNodeNumEntries ();
  GST NumMyRows_tmp = numMyRowsA + numMyRowsB;
  {
    GST inArray[2], outArray[2];
    inArray[0] = NumMyNonzeros_tmp;
    inArray[1] = NumMyRows_tmp;
    outArray[0] = 0;
    outArray[1] = 0;
    reduceAll<int, GST> (* (A_->getComm ()), REDUCE_SUM, 2, inArray, outArray);
    NumGlobalNonzeros_ = outArray[0];
    NumGlobalRows_ = outArray[1];
  }
  // reduceAll<int, GST> (* (A_->getComm ()), REDUCE_SUM, NumMyNonzeros_tmp,
  //                      outArg (NumGlobalNonzeros_));
  // reduceAll<int, GST> (* (A_->getComm ()), REDUCE_SUM, NumMyRows_tmp,
  //                      outArg (NumGlobalRows_));

  MaxNumEntries_ = A_->getNodeMaxNumRowEntries ();
  if (MaxNumEntries_ < ExtMatrix_->getNodeMaxNumRowEntries ()) {
    MaxNumEntries_ = ExtMatrix_->getNodeMaxNumRowEntries ();
  }

  // Create the graph (returned by getGraph()).
  typedef Details::OverlappingRowGraph<row_graph_type> row_graph_impl_type;
  RCP<row_graph_impl_type> graph =
    rcp (new row_graph_impl_type (A_->getGraph (),
                                  ExtMatrix_->getGraph (),
                                  RowMap_,
                                  ColMap_,
                                  NumGlobalRows_,
                                  NumGlobalRows_, // # global cols == # global rows
                                  NumGlobalNonzeros_,
                                  MaxNumEntries_,
                                  rcp_const_cast<const import_type> (Importer_),
                                  rcp_const_cast<const import_type> (ExtImporter_)));
  graph_ = rcp_const_cast<const row_graph_type> (rcp_implicit_cast<row_graph_type> (graph));
  // Resize temp arrays
  Indices_.resize (MaxNumEntries_);
  Values_.resize (MaxNumEntries_);
}
int main(int argc, char *argv[]) {
#include "MueLu_UseShortNames.hpp"

  using Teuchos::RCP;
  using Teuchos::rcp;
  using namespace MueLuTests;
  using namespace Teuchos;

  typedef Xpetra::StridedMap<int,int>        StridedMap;
  typedef Xpetra::StridedMapFactory<int,int> StridedMapFactory;

  oblackholestream blackhole;
  GlobalMPISession mpiSession(&argc,&argv,&blackhole);

  bool success = false;
  bool verbose = true;
  try {
    RCP<const Comm<int> > comm = DefaultComm<int>::getComm();
    RCP<FancyOStream> out = fancyOStream(rcpFromRef(std::cout));
    out->setOutputToRootOnly(0);
    *out << MueLu::MemUtils::PrintMemoryUsage() << std::endl;

    // Timing
    Time myTime("global");
    TimeMonitor MM(myTime);

#ifndef HAVE_XPETRA_INT_LONG_LONG
    *out << "Warning: scaling test was not compiled with long long int support" << std::endl;
#endif

    // read in input parameters

    // default parameters
    LO BS_nSweeps = 100;
    Scalar BS_omega = 1.7;
    LO SC_nSweeps = 1;
    Scalar SC_omega = 1.0;
    int SC_bUseDirectSolver = 0;

    // Note: use --help to list available options.
    CommandLineProcessor clp(false);
    clp.setOption("BraessSarazin_sweeps",&BS_nSweeps,"number of sweeps with BraessSarazin smoother");
    clp.setOption("BraessSarazin_omega", &BS_omega,  "scaling factor for BraessSarazin smoother");
    clp.setOption("SchurComp_sweeps",    &SC_nSweeps,"number of sweeps for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_omega",     &SC_omega,  "damping parameter for BraessSarazin internal SchurComp solver/smoother (GaussSeidel)");
    clp.setOption("SchurComp_solver",    &SC_bUseDirectSolver,  "if 1: use direct solver for SchurComp equation, otherwise use GaussSeidel smoother (=default)");

    switch (clp.parse(argc,argv)) {
      case CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case CommandLineProcessor::PARSE_ERROR:
      case CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    int globalNumDofs = 1500;  // used for the maps
    //int nDofsPerNode = 3;      // used for generating the fine level null-space

    // build strided maps
    // striding information: 2 velocity dofs and 1 pressure dof = 3 dofs per node
    std::vector<size_t> stridingInfo;
    stridingInfo.push_back(2);
    stridingInfo.push_back(1);

    /////////////////////////////////////// build strided maps
    // build strided maps:
    // xstridedfullmap: full map (velocity and pressure dof gids), continous
    // xstridedvelmap: only velocity dof gid maps (i.e. 0,1,3,4,6,7...)
    // xstridedpremap: only pressure dof gid maps (i.e. 2,5,8,...)
    Xpetra::UnderlyingLib lib = Xpetra::UseEpetra;
    RCP<StridedMap> xstridedfullmap = StridedMapFactory::Build(lib,globalNumDofs,0,stridingInfo,comm,-1);
    RCP<StridedMap> xstridedvelmap  = StridedMapFactory::Build(xstridedfullmap,0);
    RCP<StridedMap> xstridedpremap  = StridedMapFactory::Build(xstridedfullmap,1);

    /////////////////////////////////////// transform Xpetra::Map objects to Epetra
    // this is needed for our splitting routine
    const RCP<const Epetra_Map> fullmap = rcpFromRef(Xpetra::toEpetra(*xstridedfullmap));
    RCP<const Epetra_Map>       velmap  = rcpFromRef(Xpetra::toEpetra(*xstridedvelmap));
    RCP<const Epetra_Map>       premap  = rcpFromRef(Xpetra::toEpetra(*xstridedpremap));

    /////////////////////////////////////// import problem matrix and RHS from files (-> Epetra)

    // read in problem
    Epetra_CrsMatrix * ptrA = 0;
    Epetra_Vector * ptrf = 0;
    Epetra_MultiVector* ptrNS = 0;

    *out << "Reading matrix market file" << std::endl;

    EpetraExt::MatrixMarketFileToCrsMatrix("A_re1000_5932.txt",*fullmap,*fullmap,*fullmap,ptrA);
    EpetraExt::MatrixMarketFileToVector("b_re1000_5932.txt",*fullmap,ptrf);

    RCP<Epetra_CrsMatrix> epA = rcp(ptrA);
    RCP<Epetra_Vector> epv = rcp(ptrf);
    RCP<Epetra_MultiVector> epNS = rcp(ptrNS);


    /////////////////////////////////////// split system into 2x2 block system

    *out << "Split matrix into 2x2 block matrix" << std::endl;

    // split fullA into A11,..., A22
    RCP<Epetra_CrsMatrix> A11;
    RCP<Epetra_CrsMatrix> A12;
    RCP<Epetra_CrsMatrix> A21;
    RCP<Epetra_CrsMatrix> A22;

    if(SplitMatrix2x2(epA,*velmap,*premap,A11,A12,A21,A22)==false)
      *out << "Problem with splitting matrix"<< std::endl;

    /////////////////////////////////////// transform Epetra objects to Xpetra (needed for MueLu)

    // build Xpetra objects from Epetra_CrsMatrix objects
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA11 = rcp(new Xpetra::EpetraCrsMatrix(A11));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA12 = rcp(new Xpetra::EpetraCrsMatrix(A12));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA21 = rcp(new Xpetra::EpetraCrsMatrix(A21));
    RCP<Xpetra::CrsMatrix<Scalar,LO,GO,Node> > xA22 = rcp(new Xpetra::EpetraCrsMatrix(A22));

    /////////////////////////////////////// generate MapExtractor object

    std::vector<RCP<const Xpetra::Map<LO,GO,Node> > > xmaps;

    xmaps.push_back(xstridedvelmap);
    xmaps.push_back(xstridedpremap);

    RCP<const Xpetra::MapExtractor<Scalar,LO,GO,Node> > map_extractor = Xpetra::MapExtractorFactory<Scalar,LO,GO>::Build(xstridedfullmap,xmaps);

    /////////////////////////////////////// build blocked transfer operator
    // using the map extractor
    RCP<Xpetra::BlockedCrsMatrix<Scalar,LO,GO,Node> > bOp = rcp(new Xpetra::BlockedCrsMatrix<Scalar,LO,GO>(map_extractor,map_extractor,10));
    bOp->setMatrix(0,0,xA11);
    bOp->setMatrix(0,1,xA12);
    bOp->setMatrix(1,0,xA21);
    bOp->setMatrix(1,1,xA22);

    bOp->fillComplete();
    //////////////////////////////////////////////////////// finest Level
    RCP<MueLu::Level> Finest = rcp(new Level());
    Finest->setDefaultVerbLevel(VERB_NONE);
    Finest->Set("A",rcp_dynamic_cast<Matrix>(bOp));

    ///////////////////////////////////
    // Test Braess Sarazin Smoother as a solver

    *out << "Test: Creating Braess Sarazin Smoother" << std::endl;
    *out << "Test: Omega for BraessSarazin = " << BS_omega << std::endl;
    *out << "Test: Number of sweeps for BraessSarazin = " << BS_nSweeps << std::endl;
    *out << "Test: Omega for Schur Complement solver= " << SC_omega << std::endl;
    *out << "Test: Number of Schur Complement solver= " << SC_nSweeps << std::endl;
    *out << "Test: Setting up Braess Sarazin Smoother" << std::endl;

    // define BraessSarazin Smoother with BS_nSweeps and BS_omega as scaling factor
    // AFact_ = null (= default) for the 2x2 blocked operator
    RCP<BraessSarazinSmoother> BraessSarazinSm = rcp( new BraessSarazinSmoother() );
    BraessSarazinSm->SetParameter("Sweeps", Teuchos::ParameterEntry(BS_nSweeps));
    BraessSarazinSm->SetParameter("Damping factor", Teuchos::ParameterEntry(BS_omega));

    RCP<SmootherFactory>   smootherFact          = rcp( new SmootherFactory(BraessSarazinSm) );

    /*note that omega must be the same in the SchurComplementFactory and in the BraessSarazinSmoother*/
    // define SchurComplement Factory
    // SchurComp gets a RCP to AFact_ which has to be the 2x2 blocked operator
    // and the scaling/damping factor omega that is used for BraessSarazin
    // It stores the resulting SchurComplement operator as "A" generated by the SchurComplementFactory
    // Instead of F^{-1} it uses the approximation \hat{F}^{-1} with \hat{F} = diag(F)
    RCP<SchurComplementFactory> SFact = rcp(new SchurComplementFactory());
    SFact->SetParameter("omega", ParameterEntry(BS_omega));
    SFact->SetFactory("A",MueLu::NoFactory::getRCP());

    // define smoother/solver for BraessSarazin
    RCP<SmootherPrototype> smoProtoSC = null;
    if(SC_bUseDirectSolver != 1) {
      //Smoother Factory, using SFact as a factory for A
      std::string ifpackSCType;
      ParameterList ifpackSCList;
      ifpackSCList.set("relaxation: sweeps", SC_nSweeps );
      ifpackSCList.set("relaxation: damping factor", SC_omega );
      ifpackSCType = "RELAXATION";
      ifpackSCList.set("relaxation: type", "Gauss-Seidel");
      smoProtoSC     = rcp( new TrilinosSmoother(ifpackSCType, ifpackSCList, 0) );
      smoProtoSC->SetFactory("A", SFact);
    }
    else {
      ParameterList ifpackDSList;
      std::string ifpackDSType;
      smoProtoSC     = rcp( new DirectSolver(ifpackDSType,ifpackDSList) ); smoProtoSC->SetFactory("A", SFact);
    }

    RCP<SmootherFactory> SmooSCFact = rcp( new SmootherFactory(smoProtoSC) );

    // define temporary FactoryManager that is used as input for BraessSarazin smoother
    RCP<FactoryManager> MB = rcp(new FactoryManager());
    MB->SetFactory("A",                 SFact);         // SchurComplement operator for correction step (defined as "A")
    MB->SetFactory("Smoother",          SmooSCFact);    // solver/smoother for correction step
    MB->SetFactory("PreSmoother",               SmooSCFact);
    MB->SetFactory("PostSmoother",              SmooSCFact);
    MB->SetIgnoreUserData(true);               // always use data from factories defined in factory manager
    BraessSarazinSm->AddFactoryManager(MB,0);  // set temporary factory manager in BraessSarazin smoother

    // setup main factory manager
    RCP<FactoryManager> M = rcp(new FactoryManager());
    M->SetFactory("A",               MueLu::NoFactory::getRCP()); // this is the 2x2 blocked operator
    M->SetFactory("Smoother",        smootherFact);               // BraessSarazin block smoother
    M->SetFactory("PreSmoother",     smootherFact);
    M->SetFactory("PostSmoother",    smootherFact);

    MueLu::SetFactoryManager SFMCoarse(Finest, M);
    Finest->Request(MueLu::TopSmootherFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>(M, "Smoother"));

    // call setup (= extract blocks and extract diagonal of F)
    BraessSarazinSm->Setup(*Finest);

    RCP<MultiVector> xtest = MultiVectorFactory::Build(xstridedfullmap,1);
    xtest->putScalar( (SC) 0.0);

    RCP<Vector> xR = rcp(new Xpetra::EpetraVector(epv));
    // calculate initial (absolute) residual
    Array<ScalarTraits<SC>::magnitudeType> norms(1);

    xR->norm2(norms);
    *out << "Test: ||x_0|| = " << norms[0] << std::endl;
    *out << "Test: Applying Braess-Sarazin Smoother" << std::endl;
    *out << "Test: START DATA" << std::endl;
    *out << "iterations\tVelocity_residual\tPressure_residual" << std::endl;
    BraessSarazinSm->Apply(*xtest,*xR);
    xtest->norm2(norms);
    *out << "Test: ||x_1|| = " << norms[0] << std::endl;

    Array<ScalarTraits<double>::magnitudeType> test = MueLu::Utils<double, int, int>::ResidualNorm(*bOp, *xtest, *xR);
    *out << "residual norm: " << test[0] << std::endl;

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
Esempio n. 21
0
int main(int argc, char *argv[]) {

#if defined(HAVE_MUELU_TPETRA) && defined(HAVE_MUELU_IFPACK2)

#include <MueLu_UseShortNames.hpp>

  typedef Tpetra::Map<LO,GO,NO>               TMap;
  typedef Tpetra::MultiVector<SC,LO,GO,NO>    TMV;
  typedef Tpetra::CrsMatrix<SC,LO,GO,NO>      TCRS;
  typedef Tpetra::Operator<SC,LO,GO,NO>       OP;
  typedef Belos::LinearProblem<SC,TMV,OP>     BelosProblem;
  typedef Belos::SolverManager<SC,TMV,OP>     BelosManager;
  typedef Belos::SolverFactory<SC,TMV,OP>     BelosFactory;

  using Teuchos::RCP;
  using Teuchos::rcp;

  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);

  bool success = false;
  bool verbose = true;
  try {
    RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
    int commrank = comm->getRank();

    // Read matrices in from files
    std::ifstream inputfile;
    int nnz_grad=1080, nnz_nodes=4096, nnz_edges=13440;
    int nedges=540, nnodes=216, row, col;
    double entry, x, y, z;
    // maps for nodal and edge matrices
    RCP<TMap> edge_map = rcp( new TMap(nedges,0,comm) );
    RCP<TMap> node_map = rcp( new TMap(nnodes,0,comm) );
    // edge stiffness matrix
    RCP<TCRS> S_Matrix = rcp( new TCRS(edge_map,100) );
    inputfile.open("S.txt");
    for(int i=0; i<nnz_edges; i++) {
      inputfile >> row >> col >> entry ;
      row=row-1;
      col=col-1;
      std::complex<double> centry(entry,0.0);
      if(edge_map->isNodeGlobalElement(row)) {
        S_Matrix->insertGlobalValues(row,
            Teuchos::ArrayView<LO>(&col,1),
            Teuchos::ArrayView<SC>(&centry,1));
      }
    }
    S_Matrix->fillComplete();
    inputfile.close();
    // edge mass matrix
    RCP<TCRS> M1_Matrix = rcp( new TCRS(edge_map,100) );
    inputfile.open("M1.txt");
    for(int i=0; i<nnz_edges; i++) {
      inputfile >> row >> col >> entry ;
      row=row-1;
      col=col-1;
      std::complex<double> centry(entry,0.0);
      if(edge_map->isNodeGlobalElement(row)) {
        M1_Matrix->insertGlobalValues(row,
            Teuchos::ArrayView<LO>(&col,1),
            Teuchos::ArrayView<SC>(&centry,1));
      }
    }
    M1_Matrix->fillComplete();
    inputfile.close();
    // nodal mass matrix
    RCP<TCRS> M0_Matrix = rcp( new TCRS(node_map,100) );
    inputfile.open("M0.txt");
    for(int i=0; i<nnz_nodes; i++) {
      inputfile >> row >> col >> entry ;
      row=row-1;
      col=col-1;
      std::complex<double> centry(entry,0.0);
      if(node_map->isNodeGlobalElement(row)) {
        M0_Matrix->insertGlobalValues(row,
            Teuchos::ArrayView<LO>(&col,1),
            Teuchos::ArrayView<SC>(&centry,1));
      }
    }
    M0_Matrix->fillComplete();
    inputfile.close();
    // gradient matrix
    RCP<TCRS> D0_Matrix = rcp( new TCRS(edge_map,2) );
    inputfile.open("D0.txt");
    for(int i=0; i<nnz_grad; i++) {
      inputfile >> row >> col >> entry ;
      row=row-1;
      col=col-1;
      std::complex<double> centry(entry,0.0);
      if(edge_map->isNodeGlobalElement(row)) {
        D0_Matrix->insertGlobalValues(row,
            Teuchos::ArrayView<LO>(&col,1),
            Teuchos::ArrayView<SC>(&centry,1));
      }
    }
    D0_Matrix->fillComplete(node_map,edge_map);
    inputfile.close();
    // coordinates
    RCP<TMV> coords = rcp( new TMV(node_map,3) );
    inputfile.open("coords.txt");
    for(int i=0; i<nnodes; i++) {
      inputfile >> x >> y >> z ;
      std::complex<double> cx(x,0.0), cy(y,0.0), cz(z,0.0);
      if(node_map->isNodeGlobalElement(i)) {
        coords->replaceGlobalValue(i,0,cx);
        coords->replaceGlobalValue(i,1,cy);
        coords->replaceGlobalValue(i,2,cz);
      }
    }
    inputfile.close();
    // build lumped mass matrix inverse (M0inv_Matrix)
    RCP<TMV> ones = rcp( new TMV(node_map,1) );
    RCP<TMV> diag = rcp( new TMV(node_map,1) );
    RCP<TMV> invdiag = rcp( new TMV(node_map,1) );
    ones->putScalar((SC)1.0);
    M0_Matrix->apply(*ones,*diag);
    invdiag->reciprocal(*diag);
    Teuchos::ArrayRCP<const SC> invdiags = invdiag->getData(0);
    RCP<TCRS> M0inv_Matrix = rcp( new TCRS(node_map,1) );
    for(int i=0; i<nnodes; i++) {
      row = i;
      col = i;
      if(node_map->isNodeGlobalElement(i)) {
        LocalOrdinal lclidx = node_map->getLocalElement(i);
        std::complex<double> centry = invdiags[lclidx];
        M0inv_Matrix -> insertGlobalValues(row,
            Teuchos::ArrayView<LO>(&col,1),
            Teuchos::ArrayView<SC>(&centry,1));
      }
    }
    M0inv_Matrix->fillComplete();
    // build stiffness plus mass matrix (SM_Matrix)
    RCP<TCRS> SM_Matrix = rcp( new TCRS(edge_map,100) );
    std::complex<double> omega(0.0,2.0*M_PI);
    Tpetra::MatrixMatrix::Add(*S_Matrix,false,(SC)1.0,*M1_Matrix,false,omega,SM_Matrix);
    SM_Matrix->fillComplete();

    // set parameters
    Teuchos::ParameterList params, params11, params22;
    params.set("refmaxwell: disable add-on",false);
    params.set("refmaxwell: max coarse size",25);
    params.set("max levels",4);
    params11.set("smoother: type","KRYLOV");
    params11.set("smoother: type","KRYLOV");
    //    params11.set("krylov: number of iterations",3);
    //    params22.set("krylov: number of iterations",3);
    params.set("refmaxwell: 11list",params11);
    params.set("refmaxwell: 22list",params22);
    // construct preconditioner
    RCP<MueLu::RefMaxwell<SC,LO,GO,NO> > preconditioner
      = rcp( new MueLu::RefMaxwell<SC,LO,GO,NO>(SM_Matrix,D0_Matrix,M0inv_Matrix,
            M1_Matrix,Teuchos::null,coords,params) );

    // setup LHS, RHS
    RCP<TMV> vec = rcp( new TMV(edge_map,1) );
    vec -> putScalar((SC)1.0);
    RCP<TMV> B = rcp( new TMV(edge_map,1) );
    SM_Matrix->apply(*vec,*B);
    RCP<TMV> X = rcp( new TMV(edge_map,1) );
    X -> putScalar((SC)0.0);
    // Belos linear problem
    RCP<BelosProblem> problem = rcp( new BelosProblem() );
    problem -> setOperator( SM_Matrix );
    problem -> setRightPrec( preconditioner );
    problem -> setProblem( X, B );
    // Belos solver
    RCP<BelosManager> solver;
    RCP<BelosFactory> factory = rcp( new BelosFactory() );
    RCP<Teuchos::ParameterList> belosParams
      = rcp( new Teuchos::ParameterList() );
    belosParams->set("Maximum Iterations", 100);
    belosParams->set("Convergence Tolerance",1e-9);
    belosParams->set("Verbosity", Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
    belosParams->set("Output Frequency",1);
    belosParams->set("Output Style",Belos::Brief);
    solver = factory->create("Flexible GMRES",belosParams);
    // set problem and solve
    solver -> setProblem( problem );
    Belos::ReturnType status = solver -> solve();
    int iters = solver -> getNumIters();
    success = (iters<20 && status == Belos::Converged);
    if (commrank == 0) {
      if (success)
        std::cout << "SUCCESS! Belos converged in " << iters << " iterations." << std::endl;
      else
        std::cout << "FAILURE! Belos did not converge fast enough." << std::endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
#endif
} // main
Esempio n. 22
0
TEUCHOS_UNIT_TEST(tEpetraGather, constructor)
{

   using Teuchos::RCP;
   using Teuchos::rcp;

   typedef panzer::Traits::Residual Residual;
   typedef panzer::Traits::Jacobian Jacobian;

   Teuchos::RCP<shards::CellTopology> topo 
    = Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData< shards::Quadrilateral<4> >()));

   // auxiliary information needed to construct basis object
   std::size_t numCells = 10;
   std::string basisType = "Q1";
   panzer::CellData cellData(numCells,topo);

   // build DOF names
   RCP<std::vector<std::string> > dofNames = rcp(new std::vector<std::string>); 
   dofNames->push_back("ux"); // in practice these probably would not be gathered together!
   dofNames->push_back("p");

   // build basis
   RCP<panzer::PureBasis> basis = rcp(new panzer::PureBasis(basisType,1,cellData));

   // build gather parameter list
   Teuchos::ParameterList gatherParams;
   gatherParams.set<RCP<std::vector<std::string> > >("DOF Names",dofNames);
   gatherParams.set<RCP<std::vector<std::string> > >("Indexer Names",dofNames);
   gatherParams.set<RCP<panzer::PureBasis> >("Basis",basis);

   // test residual gather evaluator
   {
      panzer::GatherSolution_Epetra<Residual,panzer::Traits,int,int> gatherResidual(Teuchos::null,gatherParams);

      const std::vector<RCP<PHX::FieldTag> > & fields = gatherResidual.evaluatedFields();
      TEST_EQUALITY(fields.size(),2);
 
      TEST_EQUALITY(fields[0]->name(),"ux");
      TEST_EQUALITY(fields[1]->name(),"p");

      TEST_EQUALITY(fields[0]->dataLayout().dimension(0),Teuchos::as<int>(numCells));
      TEST_EQUALITY(fields[0]->dataLayout().dimension(1),Teuchos::as<int>(4)); // for Q1

      TEST_EQUALITY(fields[1]->dataLayout().dimension(0),Teuchos::as<int>(numCells));
      TEST_EQUALITY(fields[1]->dataLayout().dimension(1),Teuchos::as<int>(4)); // for Q1
   }

   // test jacobian gather evaluator
   {
      panzer::GatherSolution_Epetra<Jacobian,panzer::Traits,int,int> gatherJacobian(Teuchos::null,gatherParams);

      const std::vector<RCP<PHX::FieldTag> > & fields = gatherJacobian.evaluatedFields();
      TEST_EQUALITY(fields.size(),2);
 
      TEST_EQUALITY(fields[0]->name(),"ux");
      TEST_EQUALITY(fields[1]->name(),"p");

      TEST_EQUALITY(fields[0]->dataLayout().dimension(0),Teuchos::as<int>(numCells));
      TEST_EQUALITY(fields[0]->dataLayout().dimension(1),Teuchos::as<int>(4)); // for Q1

      TEST_EQUALITY(fields[1]->dataLayout().dimension(0),Teuchos::as<int>(numCells));
      TEST_EQUALITY(fields[1]->dataLayout().dimension(1),Teuchos::as<int>(4)); // for Q1
   }
}
Teuchos::RCP< std::vector< Teuchos::RCP<PHX::Evaluator<panzer::Traits> > > > 
user_app::MyModelFactory<EvalT>::
buildClosureModels(const std::string& model_id,
		   const Teuchos::ParameterList& models,
		   const panzer::FieldLayoutLibrary& fl,
		   const Teuchos::RCP<panzer::IntegrationRule>& ir, 
		   const Teuchos::ParameterList& default_params,
		   const Teuchos::ParameterList& user_data,
                   const Teuchos::RCP<panzer::GlobalData>& global_data,
		   PHX::FieldManager<panzer::Traits>& fm) const
{

  using std::string;
  using std::vector;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ParameterList;
  using PHX::Evaluator;

  RCP< vector< RCP<Evaluator<panzer::Traits> > > > evaluators = 
    rcp(new vector< RCP<Evaluator<panzer::Traits> > > );

  if (!models.isSublist(model_id)) {
    std::stringstream msg;
    msg << "Falied to find requested model, \"" << model_id 
	<< "\", for equation set:\n" << std::endl;
    TEUCHOS_TEST_FOR_EXCEPTION(!models.isSublist(model_id), std::logic_error, msg.str());
  }

  std::vector<Teuchos::RCP<const panzer::PureBasis> > bases;
  fl.uniqueBases(bases);

  const ParameterList& my_models = models.sublist(model_id);

  for (ParameterList::ConstIterator model_it = my_models.begin(); 
       model_it != my_models.end(); ++model_it) {
    
    bool found = false;
    
    const std::string key = model_it->first;
    ParameterList input;
    const Teuchos::ParameterEntry& entry = model_it->second;
    const ParameterList& plist = Teuchos::getValue<Teuchos::ParameterList>(entry);
    
    if (plist.isType<double>("Value")) {
      { // at IP
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	input.set("Data Layout", ir->dl_scalar);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new panzer::Constant<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      for (std::vector<Teuchos::RCP<const panzer::PureBasis> >::const_iterator basis_itr = bases.begin();
	   basis_itr != bases.end(); ++basis_itr) { // at BASIS
	input.set("Name", key);
	input.set("Value", plist.get<double>("Value"));
	Teuchos::RCP<const panzer::BasisIRLayout> basis = basisIRLayout(*basis_itr,*ir);
	input.set("Data Layout", basis->functional);
	RCP< Evaluator<panzer::Traits> > e = 
	  rcp(new panzer::Constant<EvalT,panzer::Traits>(input));
	evaluators->push_back(e);
      }
      found = true;
    }

    if (plist.isType<std::string>("Value")) {
    
      const std::string value = plist.get<std::string>("Value");

      if (key == "Global Statistics") {
	if (typeid(EvalT) == typeid(panzer::Traits::Residual)) {
	  input.set("Comm", user_data.get<Teuchos::RCP<const Teuchos::Comm<int> > >("Comm"));
	  input.set("Names", value);
	  input.set("IR", ir);
	  input.set("Global Data", global_data);
	  RCP< panzer::GlobalStatistics<EvalT,panzer::Traits> > e = 
	    rcp(new panzer::GlobalStatistics<EvalT,panzer::Traits>(input));
	  evaluators->push_back(e);
	  
	  // Require certain fields be evaluated
	  fm.template requireField<EvalT>(e->getRequiredFieldTag());
	}
	found = true;
      }



    }

    if (!found) {
      std::stringstream msg;
      msg << "ClosureModelFactory failed to build evaluator for key \"" << key 
	  << "\"\nin model \"" << model_id 
	  << "\".  Please correct the type or add support to the \nfactory." <<std::endl;
      TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error, msg.str());
    }

  }

  return evaluators;
}
Esempio n. 24
0
int
main (int argc, char *argv[])
{
  // These "using" declarations make the code more concise, in that
  // you don't have to write the namespace along with the class or
  // object name.  This is especially helpful with commonly used
  // things like std::endl or Teuchos::RCP.
  using std::cout;
  using std::endl;
  using Teuchos::Comm;
  using Teuchos::MpiComm;
  using Teuchos::RCP;
  using Teuchos::rcp;

  // We assume that your code calls MPI_Init.  It's bad form
  // to ignore the error codes returned by MPI functions, but
  // we do so here for brevity.
  (void) MPI_Init (&argc, &argv);

  // This code takes the place of whatever you do to get an MPI_Comm.
  MPI_Comm yourComm = MPI_COMM_WORLD;

  // If your code plans to use MPI on its own, as well as through
  // Trilinos, you should strongly consider giving Trilinos a copy
  // of your MPI_Comm (created via MPI_Comm_dup).  Trilinos may in
  // the future duplicate the MPI_Comm automatically, but it does
  // not currently do this.

  // Wrap the MPI_Comm.  If you wrap it in this way, you are telling
  // Trilinos that you are responsible for calling MPI_Comm_free on
  // your MPI_Comm after use, if necessary.  (It's not necessary for
  // MPI_COMM_WORLD.)  There is a way to tell Trilinos to call
  // MPI_Comm_free itself; we don't show it here.  (It involves
  // passing the result of Teuchos::opaqueWrapper to MpiComm's
  // constructor.)

  RCP<const Comm<int> > comm = rcp (new MpiComm<int> (yourComm));

  // In old versions of Trilinos, the above line of code might not
  // compile.  You might have to do the following:
  //
  // using Teuchos::opaqueWrapper;
  // RCP<const Comm<int> > comm = rcp (new MpiComm<int> (opaqueWrapper (yourComm)));

  // Get my process' rank, and the total number of processes.
  // Equivalent to MPI_Comm_rank resp. MPI_Comm_size.
  const int myRank = comm->getRank ();
  const int numProcs = comm->getSize ();

  if (myRank == 0) {
    cout << "Total number of processes: " << numProcs << endl;
  }

  // Do something with the new communicator.
  exampleRoutine (comm);

  // This tells the Trilinos test framework that the test passed.
  if (myRank == 0) {
    cout << "End Result: TEST PASSED" << endl;
  }

  // If you need to call MPI_Comm_free on your MPI_Comm, now would
  // be the time to do so, before calling MPI_Finalize.  You may also
  // automate this process; ask the tutorial presenter for more information.

  // Since you called MPI_Init, you are responsible for calling MPI_Finalize.
  (void) MPI_Finalize ();
  return 0;
}
Esempio n. 25
0
int main(int argc, char *argv[]) {
  //
  Teuchos::GlobalMPISession session(&argc, &argv, NULL);
  //
  typedef double                            ST;
  typedef Teuchos::ScalarTraits<ST>        SCT;
  typedef SCT::magnitudeType                MT;
  typedef Epetra_MultiVector                MV;
  typedef Epetra_Operator                   OP;
  typedef Belos::MultiVecTraits<ST,MV>     MVT;
  typedef Belos::OperatorTraits<ST,MV,OP>  OPT;

  using Teuchos::ParameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;

  bool verbose = false;
  bool success = true;
  try {
    bool proc_verbose = false;
    bool leftprec = true; // use left preconditioning to solve these linear systems
    int frequency = -1;  // how often residuals are printed by solver
    int blocksize = 4;
    int numrhs = 15;
    int maxrestarts = 15; // number of restarts allowed
    int length = 25;
    int maxiters = -1;    // maximum iterations allowed
    std::string filename("orsirr1.hb");
    MT tol = 1.0e-5;  // relative residual tolerance

    Teuchos::CommandLineProcessor cmdp(false,true);
    cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
    cmdp.setOption("left-prec","right-prec",&leftprec,"Left preconditioning or right.");
    cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters).");
    cmdp.setOption("filename",&filename,"Filename for Harwell-Boeing test matrix.");
    cmdp.setOption("tol",&tol,"Relative residual tolerance used by GMRES solver.");
    cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for.");
    cmdp.setOption("max-restarts",&maxrestarts,"Maximum number of restarts allowed for GMRES solver.");
    cmdp.setOption("blocksize",&blocksize,"Block size used by GMRES.");
    cmdp.setOption("maxiters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size).");
    cmdp.setOption("subspace-size",&length,"Dimension of Krylov subspace used by GMRES.");
    if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      return -1;
    }
    if (!verbose)
      frequency = -1;  // reset frequency if test is not verbose
    //
    // Get the problem
    //
    int MyPID;
    RCP<Epetra_CrsMatrix> A;
    int return_val =Belos::createEpetraProblem(filename,NULL,&A,NULL,NULL,&MyPID);
    const Epetra_Map &Map = A->RowMap();
    if(return_val != 0) return return_val;
    proc_verbose = verbose && (MyPID==0); /* Only print on zero processor */
    //
    // *****Construct the Preconditioner*****
    //
    if (proc_verbose) std::cout << std::endl << std::endl;
    if (proc_verbose) std::cout << "Constructing ILU preconditioner" << std::endl;
    int Lfill = 2;
    // if (argc > 2) Lfill = atoi(argv[2]);
    if (proc_verbose) std::cout << "Using Lfill = " << Lfill << std::endl;
    int Overlap = 2;
    // if (argc > 3) Overlap = atoi(argv[3]);
    if (proc_verbose) std::cout << "Using Level Overlap = " << Overlap << std::endl;
    double Athresh = 0.0;
    // if (argc > 4) Athresh = atof(argv[4]);
    if (proc_verbose) std::cout << "Using Absolute Threshold Value of " << Athresh << std::endl;
    double Rthresh = 1.0;
    // if (argc >5) Rthresh = atof(argv[5]);
    if (proc_verbose) std::cout << "Using Relative Threshold Value of " << Rthresh << std::endl;
    //
    Teuchos::RCP<Ifpack_IlukGraph> ilukGraph;
    Teuchos::RCP<Ifpack_CrsRiluk> ilukFactors;
    //
    if (Lfill > -1) {
      ilukGraph = Teuchos::rcp(new Ifpack_IlukGraph(A->Graph(), Lfill, Overlap));
      int info = ilukGraph->ConstructFilledGraph();
      assert( info == 0 );
      ilukFactors = Teuchos::rcp(new Ifpack_CrsRiluk(*ilukGraph));
      int initerr = ilukFactors->InitValues(*A);
      if (initerr != 0) std::cout << "InitValues error = " << initerr;
      info = ilukFactors->Factor();
      assert( info == 0 );

    }
    //
    bool transA = false;
    double Cond_Est;
    ilukFactors->Condest(transA, Cond_Est);
    if (proc_verbose) {
      std::cout << "Condition number estimate for this preconditoner = " << Cond_Est << std::endl;
      std::cout << std::endl;
    }

    //
    // Create the Belos preconditioned operator from the Ifpack preconditioner.
    // NOTE:  This is necessary because Belos expects an operator to apply the
    //        preconditioner with Apply() NOT ApplyInverse().
    RCP<Belos::EpetraPrecOp> Prec = rcp( new Belos::EpetraPrecOp( ilukFactors ) );

    //
    // ********Other information used by block solver***********
    // **************(can be user specified)********************
    //
    const int NumGlobalElements = Map.NumGlobalElements();
    if (maxiters == -1)
      maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run
    //
    ParameterList innerBelosList;
    innerBelosList.set( "Solver", "BlockGmres" );               // Set the inner solver to use block Gmres
    innerBelosList.set( "Num Blocks", length );                 // Maximum number of blocks in Krylov factorization
    innerBelosList.set( "Block Size", blocksize );              // Blocksize to be used by iterative solver
    innerBelosList.set( "Maximum Iterations", maxiters );       // Maximum number of iterations allowed
    innerBelosList.set( "Maximum Restarts", maxrestarts );      // Maximum number of restarts allowed
    innerBelosList.set( "Convergence Tolerance", 1.0e-2 );       // Relative convergence tolerance requested
    innerBelosList.set( "Verbosity", Belos::Errors + Belos::Warnings );
    innerBelosList.set( "Timer Label", "Belos Preconditioner Solve" );// Choose a different label for the inner solve
    //
    // *****Construct linear problem using A and Prec*****
    // ***The solution and RHS vectors will be set later**
    Belos::LinearProblem<double,MV,OP> innerProblem;
    innerProblem.setOperator( A );
    if (leftprec)
      innerProblem.setLeftPrec( Prec );
    else
      innerProblem.setRightPrec( Prec );
    innerProblem.setLabel( "Belos Preconditioner Solve" );
    //
    // *****Create the inner block Gmres iteration********
    //
    RCP<Belos::EpetraOperator> innerSolver;
    innerSolver = rcp( new Belos::EpetraOperator( rcp(&innerProblem,false) , rcp(&innerBelosList,false), true ) );
    //
    // *****Construct solution std::vector and random right-hand-sides *****
    //
    RCP<Epetra_MultiVector> X = rcp( new Epetra_MultiVector(Map, numrhs) );
    X->PutScalar( 0.0 );
    RCP<Epetra_MultiVector> B = rcp( new Epetra_MultiVector(Map, numrhs) );
    B->Random();
    Belos::LinearProblem<double,MV,OP> problem( A, X, B );
    problem.setRightPrec( innerSolver );
    problem.setLabel( "Belos Flexible Gmres Solve" );
    bool set = problem.setProblem();
    if (set == false) {
      if (proc_verbose)
        std::cout << std::endl << "ERROR:  Belos::LinearProblem failed to set up correctly!" << std::endl;
      return -1;
    }
    //
    // Copy the list for the inner solver
    //
    Teuchos::ParameterList belosList( innerBelosList );
    belosList.set( "Flexible Gmres" , true );              // Use flexible Gmres to solve this problem
    belosList.set( "Timer Label", "Belos Flexible Gmres Solve" );// Choose a different label for the outer solve
    belosList.set( "Convergence Tolerance", tol );         // Relative convergence tolerance requested
    if (verbose) {
      belosList.set( "Verbosity", Belos::Errors + Belos::Warnings +
          Belos::TimingDetails + Belos::StatusTestDetails );
      if (frequency > 0)
        belosList.set( "Output Frequency", frequency );
    }
    //
    // *******************************************************************
    // **********Create the flexible, block Gmres iteration***************
    // *******************************************************************
    //
    RCP< Belos::SolverManager<double,MV,OP> > solver;
    solver = rcp( new Belos::BlockGmresSolMgr<double,MV,OP>( rcp(&problem,false), rcp(&belosList,false) ) );
    //
    //
    // **********Print out information about problem*******************
    //
    if (proc_verbose) {
      std::cout << std::endl << std::endl;
      std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl;
      std::cout << "Number of right-hand sides: " << numrhs << std::endl;
      std::cout << "Block size used by solver: " << blocksize << std::endl;
      std::cout << "Number of restarts allowed: " << maxrestarts << std::endl;
      std::cout << "Length of block Arnoldi factorization: " << length*blocksize << " ( "<< length << " blocks ) " <<std::endl;
      std::cout << "Max number of Gmres iterations: " << maxiters << std::endl;
      std::cout << "Relative residual tolerance: " << tol << std::endl;
      std::cout << std::endl;
    }
    //
    // Perform solve
    //
    Belos::ReturnType ret = solver->solve();
    //
    // Compute actual residuals.
    //
    bool badRes = false;
    std::vector<double> actual_resids( numrhs );
    std::vector<double> rhs_norm( numrhs );
    Epetra_MultiVector R(Map, numrhs);
    OPT::Apply( *A, *X, R );
    MVT::MvAddMv( -1.0, R, 1.0, *B, R );
    MVT::MvNorm( R, actual_resids );
    MVT::MvNorm( *B, rhs_norm );
    if (proc_verbose) {
      std::cout<< "---------- Actual Residuals (normalized) ----------"<<std::endl<<std::endl;
      for ( int i=0; i<numrhs; i++) {
        double actRes = actual_resids[i]/rhs_norm[i];
        std::cout<<"Problem "<<i<<" : \t"<< actRes <<std::endl;
        if (actRes > tol ) badRes = true;
      }
    }

    success = ret==Belos::Converged && !badRes;

    if (success) {
      if (proc_verbose)
        std::cout << "End Result: TEST PASSED" << std::endl;
    } else {
      if (proc_verbose)
        std::cout << "End Result: TEST FAILED" << std::endl;
    }
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose,std::cerr,success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
} // end test_bl_fgmres_hb.cpp
void DefaultStateEliminationModelEvaluator<Scalar>::evalModelImpl(
  const ModelEvaluatorBase::InArgs<Scalar> &inArgs,
  const ModelEvaluatorBase::OutArgs<Scalar> &outArgs
  ) const
{
  typedef ModelEvaluatorBase MEB;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::OSTab;

  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  const Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  Teuchos::OSTab tab(out);
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nEntering Thyra::DefaultStateEliminationModelEvaluator<Scalar>::evalModel(...) ...\n";

  const Teuchos::RCP<const ModelEvaluator<Scalar> >
    thyraModel = this->getUnderlyingModel();

  const int Np = outArgs.Np(), Ng = outArgs.Ng();

  // Get the intial state guess if not already gotten
  if (is_null(x_guess_solu_)) {
    const ModelEvaluatorBase::InArgs<Scalar>
      nominalValues = thyraModel->getNominalValues();
    if(nominalValues.get_x().get()) {
      x_guess_solu_ = nominalValues.get_x()->clone_v();
    }
    else {
      x_guess_solu_ = createMember(thyraModel->get_x_space());
      assign(&*x_guess_solu_,Scalar(0.0));
    }
  }

  // Reset the nominal values
  MEB::InArgs<Scalar> wrappedNominalValues = thyraModel->getNominalValues();
  wrappedNominalValues.setArgs(inArgs,true);
  wrappedNominalValues.set_x(x_guess_solu_);
  
  typedef Teuchos::VerboseObjectTempState<ModelEvaluatorBase> VOTSME;
  //VOTSME thyraModel_outputTempState(rcp(&wrappedThyraModel,false),out,verbLevel);

  typedef Teuchos::VerboseObjectTempState<NonlinearSolverBase<Scalar> > VOTSNSB;
  VOTSNSB statSolver_outputTempState(
    stateSolver_,out
    ,static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW) ? Teuchos::VERB_LOW : Teuchos::VERB_NONE 
    );

  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_EXTREME))
    *out
      << "\ninArgs =\n" << Teuchos::describe(inArgs,verbLevel)
      << "\noutArgs on input =\n" << Teuchos::describe(outArgs,Teuchos::VERB_LOW);

  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nSolving f(x,...) for x ...\n";

  wrappedThyraModel_->setNominalValues(
    rcp(new MEB::InArgs<Scalar>(wrappedNominalValues))
    );
  
  SolveStatus<Scalar> solveStatus = stateSolver_->solve(&*x_guess_solu_,NULL);

  if( solveStatus.solveStatus == SOLVE_STATUS_CONVERGED ) {
    
    if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
      *out << "\nComputing the output functions at the solved state solution ...\n";

    MEB::InArgs<Scalar>   wrappedInArgs  = thyraModel->createInArgs();
    MEB::OutArgs<Scalar>  wrappedOutArgs = thyraModel->createOutArgs();
    wrappedInArgs.setArgs(inArgs,true);
    wrappedInArgs.set_x(x_guess_solu_);
    wrappedOutArgs.setArgs(outArgs,true);
    
    for( int l = 0; l < Np; ++l ) {
      for( int j = 0; j < Ng; ++j ) {
        if(
          outArgs.supports(MEB::OUT_ARG_DgDp,j,l).none()==false
          && outArgs.get_DgDp(j,l).isEmpty()==false
          )
        {
          // Set DfDp(l) and DgDx(j) to be computed!
          //wrappedOutArgs.set_DfDp(l,...);
          //wrappedOutArgs.set_DgDx(j,...);
          TEST_FOR_EXCEPT(true);
        }
      }
    }
    
    thyraModel->evalModel(wrappedInArgs,wrappedOutArgs);

    //
    // Compute DgDp(j,l) using direct sensitivties
    //
    for( int l = 0; l < Np; ++l ) {
      if(
        wrappedOutArgs.supports(MEB::OUT_ARG_DfDp,l).none()==false
        && wrappedOutArgs.get_DfDp(l).isEmpty()==false
        )
      {
        //
        // Compute:  D(l) = -inv(DfDx)*DfDp(l)
        //
        TEST_FOR_EXCEPT(true);
        for( int j = 0; j < Ng; ++j ) {
          if(
            outArgs.supports(MEB::OUT_ARG_DgDp,j,l).none()==false
            && outArgs.get_DgDp(j,l).isEmpty()==false
            )
          {
            //
            // Compute:  DgDp(j,l) = DgDp(j,l) + DgDx(j)*D
            //
            TEST_FOR_EXCEPT(true);
          }
        }
      }
    }
    // ToDo: Add a mode to compute DgDp(l) using adjoint sensitivities?
    
  }
  else {
    
    if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
      *out << "\nFailed to converge, returning NaNs ...\n";
    outArgs.setFailed();
    
  }
  
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_EXTREME))
    *out
      << "\noutArgs on output =\n" << Teuchos::describe(outArgs,verbLevel);

  totalTimer.stop();
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal evaluation time = "<<totalTimer.totalElapsedTime()<<" sec\n"
      << "\nLeaving Thyra::DefaultStateEliminationModelEvaluator<Scalar>::evalModel(...) ...\n";
  
}
Esempio n. 27
0
// Main function.
int main(int argc, char* argv[])
{
  info("Desired eigenfunction to calculate: %d.", TARGET_EIGENFUNCTION);

  // Load the mesh.
  Mesh mesh;
  H2DReader mloader;
  mloader.load(mesh_file, &mesh);

  // Perform initial mesh refinements (optional).
  for (int i = 0; i < INIT_REF_NUM; i++) 
    mesh.refine_all_elements();
  
  // Initialize boundary conditions.
  DefaultEssentialBCConst bc_essential("Bdy", 0.0);
  EssentialBCs bcs(&bc_essential);

  // Create an H1 space with default shapeset.
  H1Space space(&mesh, &bcs, P_INIT);
  int ndof = Space::get_num_dofs(&space);

  // Initialize the weak formulation for the left hand side.
  WeakFormS wf_S;
  WeakFormM wf_M;

  // Initialize refinement selector.
  H1ProjBasedSelector selector(CAND_LIST, CONV_EXP, H2DRS_DEFAULT_ORDER);

  // Initialize views.
  ScalarView sview("", new WinGeom(0, 0, 440, 350));
  sview.fix_scale_width(50);
  OrderView oview("", new WinGeom(450, 0, 410, 350));

  // DOF convergence graph.
  SimpleGraph graph_dof;

  // Initialize matrices and matrix solver.
  SparseMatrix* matrix_S = create_matrix(matrix_solver);
  SparseMatrix* matrix_M = create_matrix(matrix_solver);

  // Assemble the matrices.
  DiscreteProblem dp_S(&wf_S, &space);
  dp_S.assemble(matrix_S);
  DiscreteProblem dp_M(&wf_M, &space);
  dp_M.assemble(matrix_M);

  // Initialize matrices.
  RCP<SparseMatrix> matrix_rcp_S = rcp(matrix_S);
  RCP<SparseMatrix> matrix_rcp_M = rcp(matrix_M);

  EigenSolver es(matrix_rcp_S, matrix_rcp_M);
  info("Calling Pysparse...");
  es.solve(DIMENSION_SUBSPACE, PYSPARSE_TARGET_VALUE, PYSPARSE_TOL, PYSPARSE_MAX_ITER);
  info("Pysparse finished.");
  es.print_eigenvalues();

  
  // Initialize subspace - coefficients for all computed eigenfunctions
  double* coeff_vec = new double[ndof];
  double** coeff_space = new double*[DIMENSION_SUBSPACE];
  for (int i = 0; i < DIMENSION_SUBSPACE; i++) { 
    coeff_space[i] = new double[ndof];
  }

  // Read solution vectors from file and visualize it.
  double* eigenval =new double[DIMENSION_SUBSPACE];
  
  int neig = es.get_n_eigs();
  //if (neig != DIMENSION_SUBSPACE) error("Mismatched number of eigenvectors in the eigensolver output file.");  
  for (int ieig = 0; ieig < neig; ieig++) {
    // Get next eigenvalue from the file
    eigenval[ieig] = es.get_eigenvalue(ieig);         
    int n;
    es.get_eigenvector(ieig, &coeff_vec, &n);
    for (int i = 0; i < ndof; i++){
      coeff_space[ieig][i] = coeff_vec[i];
    }
    // Normalize the eigenvector.
    normalize((UMFPackMatrix*)matrix_M, coeff_space[ieig], ndof);
  }
  //fclose(file);

  // Retrieve desired eigenvalue.
  double lambda = eigenval[TARGET_EIGENFUNCTION-1];
  info("Eigenvalue on coarse mesh: %g", lambda);

  // Convert eigenvector into eigenfunction. After this, the 
  // eigenvector on the coarse mesh will not be needed anymore.
  Solution sln;
  Solution::vector_to_solution(coeff_space[TARGET_EIGENFUNCTION-1], &space, &sln);
  Solution* sln_space = new Solution[DIMENSION_SUBSPACE];
  for (int i = 0; i < DIMENSION_SUBSPACE; i++) {  
    Solution::vector_to_solution(coeff_space[i], &space, &sln_space[i]);
  }
  for (int i = 0; i < DIMENSION_SUBSPACE; i++) { 
    delete [] coeff_space[i];
  }
  delete [] coeff_vec;

  // Visualize the eigenfunction.
  info("Plotting initial eigenfunction on coarse mesh.");
  char title[100];
  sprintf(title, "Eigenfunction %d on initial mesh", neig);
  sview.set_title(title);
  sview.show_mesh(false);
  sview.show(&sln);
  sprintf(title, "Initial mesh");
  oview.set_title(title);
  oview.show(&space);
  View::wait(HERMES_WAIT_KEYPRESS);

  /*** Begin adaptivity ***/

  // Adaptivity loop:
  Solution ref_sln;
  Solution* ref_sln_space = new Solution[DIMENSION_SUBSPACE];
  Space* ref_space = NULL;  
  int as = 1; 
  bool done = false;
  do
  {
    info("---- Adaptivity step %d:", as);

    // Construct globally refined reference mesh and setup reference space.
    ref_space = Space::construct_refined_space(&space);
    int ndof_ref = Space::get_num_dofs(ref_space);
    info("ndof: %d, ndof_ref: %d", ndof, ndof_ref);

    // Obtain initial approximation on new reference mesh.
    double* coeff_vec_ref = new double[ndof_ref];
    double** coeff_space_ref = new double*[DIMENSION_SUBSPACE];
    for (int i = 0; i < DIMENSION_SUBSPACE; i++) { 
      coeff_space_ref[i] = new double[ndof_ref];
    }
    if (as == 1) {
      // Project the coarse mesh eigenfunction to the reference mesh.
      info("Projecting coarse mesh solution to reference mesh.");
      OGProjection::project_global(ref_space, &sln, coeff_vec_ref, matrix_solver);     
      for (int i = 0; i < DIMENSION_SUBSPACE; i++) {  
        OGProjection::project_global(ref_space, &sln_space[i], coeff_space_ref[i], matrix_solver);
      }
    }
    else {
      // Project the last reference mesh solution to the reference mesh.
      info("Projecting last reference mesh solution to new reference mesh.");
      OGProjection::project_global(ref_space, &ref_sln, coeff_vec_ref, matrix_solver);     
      for (int i = 0; i < DIMENSION_SUBSPACE; i++) {  
        OGProjection::project_global(ref_space, &ref_sln_space[i], coeff_space_ref[i], matrix_solver);
      }
    }
    Solution::vector_to_solution(coeff_vec_ref, ref_space, &ref_sln); 
    for (int i = 0; i < DIMENSION_SUBSPACE; i++) {  
      Solution::vector_to_solution(coeff_space_ref[i], ref_space, &ref_sln_space[i]); 
    }     

    // Initialize matrices and matrix solver on reference mesh.
    SparseMatrix* matrix_S_ref = create_matrix(matrix_solver);
    SparseMatrix* matrix_M_ref = create_matrix(matrix_solver);

    // Assemble matrices S and M on reference mesh.
    info("Assembling matrices S and M on reference mesh.");
    DiscreteProblem dp_S_ref(&wf_S, ref_space);
    dp_S_ref.assemble(matrix_S_ref);
    DiscreteProblem dp_M_ref(&wf_M, ref_space);
    dp_M_ref.assemble(matrix_M_ref);

    // Calculate eigenvalue corresponding to the new reference solution.
    lambda = calc_mass_product((UMFPackMatrix*)matrix_S_ref, coeff_space_ref[TARGET_EIGENFUNCTION-1], ndof_ref)
             / calc_mass_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[TARGET_EIGENFUNCTION-1], ndof_ref);
    info("Initial guess for eigenvalue on reference mesh: %.12f", lambda);

    if (ITERATIVE_METHOD == 1) {
      // Newton's method on the reference mesh.
      lambda = calc_mass_product((UMFPackMatrix*)matrix_S_ref, coeff_space_ref[0], ndof_ref)
             / calc_mass_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[0], ndof_ref);
      // Newton's method on the reference mesh for the first eigenfunction in the eigenspace.
      if(!solve_newton_eigen(ref_space, (UMFPackMatrix*)matrix_S_ref, (UMFPackMatrix*)matrix_M_ref, 
	  		     coeff_space_ref[0], lambda, matrix_solver, NEWTON_TOL, NEWTON_MAX_ITER))
        error("Newton's method failed.");
      for (int i = 1; i < DIMENSION_SUBSPACE; i++) {  
        lambda = calc_mass_product((UMFPackMatrix*)matrix_S_ref, coeff_space_ref[i], ndof_ref)
             / calc_mass_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[i], ndof_ref);
        if(!solve_newton_eigen_ortho(ref_space, (UMFPackMatrix*)matrix_S_ref, (UMFPackMatrix*)matrix_M_ref, 
	  		     coeff_space_ref[i], lambda, matrix_solver, PICARD_TOL, PICARD_MAX_ITER,USE_ORTHO,
                             coeff_space_ref,i,DIMENSION_SUBSPACE))
          error("Newton's method failed.");
      }
    }
    else if (ITERATIVE_METHOD == 2) {
      lambda = calc_mass_product((UMFPackMatrix*)matrix_S_ref, coeff_space_ref[0], ndof_ref)
             / calc_mass_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[0], ndof_ref);
      // Picard's method on the reference mesh for the first eigenfunction in the eigenspace.
      if(!solve_picard_eigen(ref_space, (UMFPackMatrix*)matrix_S_ref, (UMFPackMatrix*)matrix_M_ref, 
	  		     coeff_space_ref[0], lambda, matrix_solver, PICARD_TOL, PICARD_MAX_ITER, USE_SHIFT))
        error("Picard's method failed.");
      for (int i = 1; i < DIMENSION_SUBSPACE; i++) {  
        lambda = calc_mass_product((UMFPackMatrix*)matrix_S_ref, coeff_space_ref[i], ndof_ref)
             / calc_mass_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[i], ndof_ref);
        if(!solve_picard_eigen_ortho(ref_space, (UMFPackMatrix*)matrix_S_ref, (UMFPackMatrix*)matrix_M_ref, 
	  		     coeff_space_ref[i], lambda, matrix_solver, PICARD_TOL, PICARD_MAX_ITER,USE_ORTHO, USE_SHIFT,
                             coeff_space_ref,i,DIMENSION_SUBSPACE))
          error("Picard's method failed.");
      }
    }
    else {

        RCP<SparseMatrix> matrix_ref_rcp_S = rcp(matrix_S_ref);
        RCP<SparseMatrix> matrix_ref_rcp_M = rcp(matrix_M_ref);

        EigenSolver es_ref(matrix_ref_rcp_S, matrix_ref_rcp_M);
        info("Calling Pysparse...");
        es_ref.solve(DIMENSION_SUBSPACE, PYSPARSE_TARGET_VALUE, PYSPARSE_TOL, PYSPARSE_MAX_ITER);
        info("Pysparse finished.");
        es_ref.print_eigenvalues();

        // Read solution vectors from file and visualize it.
        double* eigenval_ref =new double[DIMENSION_SUBSPACE];

        int neig_ref = es_ref.get_n_eigs();
        //if (neig != DIMENSION_SUBSPACE) error("Mismatched number of eigenvectors in the eigensolver output file.");
        for (int ieig = 0; ieig < neig_ref; ieig++) {
          // Get next eigenvalue from the file
          eigenval_ref[ieig] = es_ref.get_eigenvalue(ieig);
          int n_ref;

          es_ref.get_eigenvector(ieig, &coeff_vec_ref, &n_ref);
          for (int i = 0; i < ndof_ref; i++){
            coeff_space_ref[ieig][i] = coeff_vec_ref[i];
          }
          // Normalize the eigenvector.
          normalize((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[ieig], ndof_ref);
        }
        //fclose(file);

        // Retrieve desired eigenvalue.
        double lambda = eigenval_ref[TARGET_EIGENFUNCTION-1];
        info("Eigenvalue on fine mesh: %g", lambda);


        /*
        // Initialize matrices.
        RCP<SparseMatrix> matrix_ref_rcp_S = rcp(matrix_S_ref);
        RCP<SparseMatrix> matrix_ref_rcp_M = rcp(matrix_M_ref);

        EigenSolver es(matrix_ref_rcp_S, matrix_ref_rcp_M);
        info("Calling Pysparse...");
        es.solve(DIMENSION_SUBSPACE, PYSPARSE_TARGET_VALUE, PYSPARSE_TOL, PYSPARSE_MAX_ITER);
        info("Pysparse finished.");
        es.print_eigenvalues();

        // Read solution vectors from file and visualize it.
        double* coeff_vec_tmp = new double[ndof_ref];
        double* eigenval_ref = new double[DIMENSION_SUBSPACE];
        int neig = es.get_n_eigs(); 
        for (int ieig = 0; ieig < neig; ieig++) {
          info("ieig: %d", ieig);
          // Get next eigenvalue from the file
          eigenval_ref[ieig] = es.get_eigenvalue(ieig);  
          int n;
          es.get_eigenvector(ieig, &coeff_vec_tmp, &n);
          for (int i = 0; i < ndof_ref; i++){
            coeff_space_ref[ieig][i] = coeff_vec_tmp[i];
          }
          // Normalize the eigenvector.
          normalize((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[ieig], ndof_ref);
        }
        delete [] coeff_vec_tmp;
        
        // Write matrix_S in MatrixMarket format.
        write_matrix_mm("mat_S.mtx", matrix_S_ref);

        // Write matrix_M in MatrixMarket format.
        write_matrix_mm("mat_M.mtx", matrix_M_ref);

        // Call Python eigensolver. Solution will be written to "eivecs.dat".
        info("Calling Pysparse.");
        char call_cmd[255];
        // Compute the approximation of all discrete eigenfunctions corresponding to the eigenvlaue of the target eigenfunction
        sprintf(call_cmd, "python solveGenEigenFromMtx.py mat_S.mtx mat_M.mtx %g %d %g %d", 
	       PYSPARSE_TARGET_VALUE, DIMENSION_SUBSPACE, PYSPARSE_TOL, PYSPARSE_MAX_ITER);
        system(call_cmd);
        info("Pysparse finished.");

        // Read solution vectors from file and visualize it.
        eigenval_ref = new double[DIMENSION_SUBSPACE];
        FILE *file = fopen("eivecs.dat", "r");
        char line [64];                  // Maximum line size.
        fgets(line, sizeof line, file);  // ndof
        int n = atoi(line);            
        if (n != ndof_ref) error("Mismatched ndof in the eigensolver output file.");  
        fgets(line, sizeof line, file);  // Number of eigenvectors in the file.
        neig = atoi(line); 
        if (neig != DIMENSION_SUBSPACE) error("Mismatched number of eigenvectors in the eigensolver output file.");  
        for (int ieig = 0; ieig < neig; ieig++) {
          // Get next eigenvalue from the file
          fgets(line, sizeof line, file);  // eigenval
          eigenval_ref[ieig] = atof(line);            
          // Get the corresponding eigenvector.
          for (int i = 0; i < ndof_ref; i++) {  
            fgets(line, sizeof line, file);
            coeff_space_ref[ieig][i] = atof(line);
          }
        // Normalize the eigenvector.
          normalize((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[ieig], ndof_ref);
        }
        fclose(file);
    */
    }

    for (int i = 0; i < DIMENSION_SUBSPACE; i++)
      Solution::vector_to_solution(coeff_space_ref[i], ref_space, &ref_sln_space[i]);

    // Perform eigenfunction reconstruction.
    if (RECONSTRUCTION_ON == false)
      Solution::vector_to_solution(coeff_space_ref[TARGET_EIGENFUNCTION-1], ref_space, &ref_sln);

    else {
      double* inners = new double[DIMENSION_TARGET_EIGENSPACE];
      double* coeff_vec_rec = new double[ndof_ref];
      for (int i = 0; i < DIMENSION_TARGET_EIGENSPACE; i++)
         inners[i] = calc_inner_product((UMFPackMatrix*)matrix_M_ref, coeff_space_ref[FIRST_INDEX_EIGENSPACE-1+i], coeff_vec_ref, ndof_ref);
      
      for (int j = 0; j < ndof_ref; j++) {
        coeff_vec_rec[j] = 0.0;
        for (int i = 0; i < DIMENSION_TARGET_EIGENSPACE; i++)
          coeff_vec_rec[j] += inners[i] * coeff_space_ref[FIRST_INDEX_EIGENSPACE-1+i][j];
      }

      Solution::vector_to_solution(coeff_vec_rec, ref_space, &ref_sln);

      delete [] coeff_vec_rec;
      delete [] inners;
    }

    // Clean up.
    delete matrix_S_ref;
    delete matrix_M_ref;
    delete [] coeff_vec_ref;
    for (int i = 0; i < DIMENSION_SUBSPACE; i++) { 
      delete [] coeff_space_ref[i];
    }
    delete [] coeff_space_ref;

    // Project reference solution to coarse mesh for error estimation.
    if (as > 1) {
      // Project reference solution to coarse mesh.
      info("Projecting reference solution to coarse mesh for error calculation.");
      OGProjection::project_global(&space, &ref_sln, &sln, matrix_solver); 
    }

    // Calculate element errors and total error estimate.
    info("Calculating error estimate."); 
    Adapt* adaptivity = new Adapt(&space);
    double err_est_rel = adaptivity->calc_err_est(&sln, &ref_sln) * 100;

    // Report results.
    info("ndof_coarse: %d, ndof_fine: %d, err_est_rel: %g%%", 
      Space::get_num_dofs(&space), Space::get_num_dofs(ref_space), err_est_rel);

    // Add entry to DOF and CPU convergence graphs.
    graph_dof.add_values(Space::get_num_dofs(&space), err_est_rel);
    graph_dof.save("conv_dof_est.dat");

    // If err_est too large, adapt the mesh.
    if (err_est_rel < ERR_STOP) done = true;
    else 
    {
      info("Adapting coarse mesh.");
      done = adaptivity->adapt(&selector, THRESHOLD, STRATEGY, MESH_REGULARITY);
    }
    ndof = Space::get_num_dofs(&space);
    if (ndof >= NDOF_STOP) done = true;

    // Clean up.
    delete adaptivity;

    //delete ref_space->get_mesh();
    delete ref_space;

    // Visualize the projection.
    info("Plotting projection of reference solution to new coarse mesh.");
    char title[100];
    sprintf(title, "Coarse mesh projection");
    sview.set_title(title);
    sview.show_mesh(false);
    sview.show(&sln);
    sprintf(title, "Coarse mesh, step %d", as);
    oview.set_title(title);
    oview.show(&space);

    // Increase the counter of performed adaptivity steps.
    if (done == false) as++;

    // Wait for keypress.
    View::wait(HERMES_WAIT_KEYPRESS);
  }
  while (done == false);

  // Wait for all views to be closed.
  info("Computation finished.");
  View::wait();

  return 0; 
};
int main (int argc, char *argv[])
{
  // Initialize MPI
  Teuchos::GlobalMPISession (&argc, &argv, NULL);

  // Create output stream. (Handy for multicore output.)
  const RCP<Teuchos::FancyOStream> out =
    Teuchos::VerboseObjectBase::getDefaultOStream();

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  RCP<Epetra_MpiComm> eComm =
    rcp<Epetra_MpiComm> (new Epetra_MpiComm (MPI_COMM_WORLD));
#else
  RCP<Epetra_SerialComm> eComm =
    rcp<Epetra_SerialComm> (new Epetra_SerialComm());
#endif

  bool success = true;
  try {
    // Create map.
    // Do strong scaling tests, so keep numGlobalElements independent of
    // the number of processes.
    int numGlobalElements = 5e7;
    int indexBase = 0;
    RCP<Epetra_Map> map =
      rcp(new Epetra_Map (numGlobalElements, indexBase, *eComm));

    //// Create map with overlay.
    //int numMyOverlapNodes = 3;

    //// Get an approximation of my nodes.
    //int numMyElements = numGlobalElements / eComm->NumProc();
    //int startIndex = eComm->MyPID() * numMyElements;
    //// Calculate the resulting number of total nodes.
    //int numTotalNodes = numMyElements * eComm->NumProc();
    //// Add one node to the first numGlobalElements-numTotalNodes processes.
    //if (eComm->MyPID() < numGlobalElements - numTotalNodes)
    //{
    //    numMyElements++;
    //    startIndex += eComm->MyPID();
    //}
    //else
    //{
    //    startIndex += numGlobalElements - numTotalNodes;
    //}

    //Teuchos::Array<int> indices(numMyElements);
    //for (int k = 0;  k<numMyElements; k++)
    //    indices[k] = startIndex + k;

    //std::cout << numGlobalElements << std::endl;
    //std::cout << numMyElements << std::endl;

    //RCP<Epetra_Map> overlapMap =
    //    rcp(new Epetra_Map (numGlobalElements, numMyElements, indices.getRawPtr(), indexBase, *eComm));

    //overlapMap->Print(std::cout);

    //throw 1;
    // tests on one vector
    RCP<Epetra_Vector> u = rcp(new Epetra_Vector(*map));
    u->Random();

    RCP<Teuchos::Time> meanValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MeanValue");
    {
      Teuchos::TimeMonitor tm(*meanValueTime);
      double meanVal;
      TEUCHOS_ASSERT_EQUALITY(0, u->MeanValue(&meanVal));
    }

    RCP<Teuchos::Time> maxValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MaxValue");
    {
      Teuchos::TimeMonitor tm(*maxValueTime);
      double maxValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MaxValue(&maxValue));
    }

    RCP<Teuchos::Time> minValueTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::MinValue");
    {
      Teuchos::TimeMonitor tm(*minValueTime);
      double minValue;
      TEUCHOS_ASSERT_EQUALITY(0, u->MinValue(&minValue));
    }

    RCP<Teuchos::Time> norm1Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm1");
    {
      Teuchos::TimeMonitor tm(*norm1Time);
      double norm1;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm1(&norm1));
    }

    RCP<Teuchos::Time> norm2Time =
      Teuchos::TimeMonitor::getNewTimer("Vector::Norm2");
    {
      Teuchos::TimeMonitor tm(*norm2Time);
      double norm2;
      TEUCHOS_ASSERT_EQUALITY(0, u->Norm2(&norm2));
    }

    RCP<Teuchos::Time> normInfTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::NormInf");
    {
      Teuchos::TimeMonitor tm(*normInfTime);
      double normInf;
      TEUCHOS_ASSERT_EQUALITY(0, u->NormInf(&normInf));
    }

    RCP<Teuchos::Time> scaleTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Scale");
    {
      Teuchos::TimeMonitor tm(*scaleTime);
      double alpha = 0.5;
      TEUCHOS_ASSERT_EQUALITY(0, u->Scale(0.5));
    }
    // tests involving two vectors
    RCP<Epetra_Vector> v = rcp(new Epetra_Vector(*map));
    v->Random();

    RCP<Teuchos::Time> dotTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Dot");
    {
      Teuchos::TimeMonitor tm(*dotTime);
      double dot;
      TEUCHOS_ASSERT_EQUALITY(0, u->Dot(*v, &dot));
    }

    RCP<Teuchos::Time> multiplyTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Multiply");
    {
      Teuchos::TimeMonitor tm(*multiplyTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Multiply(1.0, *u, *v, 1.0));
    }

    RCP<Teuchos::Time> updateTime =
      Teuchos::TimeMonitor::getNewTimer("Vector::Update");
    {
      Teuchos::TimeMonitor tm(*updateTime);
      TEUCHOS_ASSERT_EQUALITY(0, u->Update(1.0, *v, 1.0));
    }

    // matrix-vector tests
    // diagonal test matrix
    RCP<Epetra_CrsMatrix> D =
      rcp(new Epetra_CrsMatrix(Copy, *map, 1));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int col = map->GID(k);
      double val = 1.0 / (col+1);
      //TEUCHOS_ASSERT_EQUALITY(0, D->InsertMyValues(k, 1, &val, &col));
      TEUCHOS_ASSERT_EQUALITY(0, D->InsertGlobalValues(col, 1, &val, &col));
    }
    TEUCHOS_ASSERT_EQUALITY(0, D->FillComplete());

    // tridiagonal test matrix
    RCP<Epetra_CrsMatrix> T =
      rcp(new Epetra_CrsMatrix(Copy, *map, 3));
    for (int k = 0; k < map->NumMyElements(); k++) {
      int row = map->GID(k);
      if (row > 0) {
        int col = row-1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      {
        int col = row;
        double val = 2.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
      if (row < numGlobalElements-1) {
        int col = row+1;
        double val = -1.0;
        //TEUCHOS_ASSERT_EQUALITY(0, T->InsertMyValues(k, 1, &val, &col));
        TEUCHOS_ASSERT_EQUALITY(0, T->InsertGlobalValues(row, 1, &val, &col));
      }
    }
    TEUCHOS_ASSERT_EQUALITY(0, T->FillComplete());

    // start timings
    RCP<Teuchos::Time> mNorm1Time =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Norm1");
    {
      Teuchos::TimeMonitor tm(*mNorm1Time);
      double dNorm1 = D->NormOne();
      double tNorm1 = T->NormOne();
    }

    RCP<Teuchos::Time> mNormInfTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormInf");
    {
      Teuchos::TimeMonitor tm(*mNormInfTime);
      double dNormInf = D->NormInf();
      double tNormInf = T->NormInf();
    }

    RCP<Teuchos::Time> mNormFrobTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::NormFrobenius");
    {
      Teuchos::TimeMonitor tm(*mNormFrobTime);
      double dNormFrob = D->NormFrobenius();
      double tNormFrob = T->NormFrobenius();
    }

    RCP<Teuchos::Time> mScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Scale");
    {
      Teuchos::TimeMonitor tm(*mScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Scale(2.0));
      TEUCHOS_ASSERT_EQUALITY(0, T->Scale(2.0));
    }

    RCP<Teuchos::Time> leftScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::LeftScale");
    {
      Teuchos::TimeMonitor tm(*leftScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->LeftScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->LeftScale(*v));
    }

    RCP<Teuchos::Time> rightScaleTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::RightScale");
    {
      Teuchos::TimeMonitor tm(*rightScaleTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->RightScale(*v));
      TEUCHOS_ASSERT_EQUALITY(0, T->RightScale(*v));
    }

    RCP<Teuchos::Time> applyTime =
      Teuchos::TimeMonitor::getNewTimer("CrsMatrix::Apply");
    {
      Teuchos::TimeMonitor tm(*applyTime);
      TEUCHOS_ASSERT_EQUALITY(0, D->Apply(*u, *v));
      TEUCHOS_ASSERT_EQUALITY(0, T->Apply(*u, *v));
    }
    // print timing data
    Teuchos::TimeMonitor::summarize();
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, *out, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
}
Esempio n. 29
0
  //
  // Basic TimeMonitor test: create and exercise a timer on all (MPI)
  // processes, and make sure that TimeMonitor::summarize() reports it.
  //
  TEUCHOS_UNIT_TEST( TimeMonitor, FUNC_TIME_MONITOR  )
  {
    using Teuchos::ParameterList;
    using Teuchos::parameterList;
    using Teuchos::RCP;

    func_time_monitor1 (); // Function to time.

    { // Repeat test for default output format.
      std::ostringstream oss;
      TimeMonitor::summarize (oss);

      // Echo summarize() output to the FancyOStream out (which is a
      // standard unit test argument).  Output should only appear in
      // show-all-test-details mode.
      out << oss.str () << std::endl;

      // Make sure that the timer's name shows up in the output.
      const size_t substr_i = oss.str ().find ("FUNC_TIME_MONITOR1");
      TEST_INEQUALITY(substr_i, std::string::npos);
    }

    { // Repeat test for YAML output, compact style.
      std::ostringstream oss;
      RCP<ParameterList> reportParams =
        parameterList (* (TimeMonitor::getValidReportParameters ()));
      reportParams->set ("Report format", "YAML");
      reportParams->set ("YAML style", "compact");
      TimeMonitor::report (oss, reportParams);

      // Echo output to the FancyOStream out (which is a standard unit
      // test argument).  Output should only appear in "show all test
      // details" mode.
      out << oss.str () << std::endl;

      // Make sure that the timer's name shows up in the output.
      const size_t substr_i = oss.str ().find ("FUNC_TIME_MONITOR1");
      TEST_INEQUALITY(substr_i, std::string::npos);
    }

    { // Repeat test for YAML output, spacious style.
      std::ostringstream oss;
      RCP<ParameterList> reportParams =
        parameterList (* (TimeMonitor::getValidReportParameters ()));
      reportParams->set ("Report format", "YAML");
      reportParams->set ("YAML style", "spacious");
      TimeMonitor::report (oss, reportParams);

      // Echo output to the FancyOStream out (which is a standard unit
      // test argument).  Output should only appear in "show all test
      // details" mode.
      out << oss.str () << std::endl;

      // Make sure that the timer's name shows up in the output.
      const size_t substr_i = oss.str ().find ("FUNC_TIME_MONITOR1");
      TEST_INEQUALITY(substr_i, std::string::npos);
    }

    // This sets up for the next unit test.
    TimeMonitor::clearCounters ();
  }
Esempio n. 30
0
      /// \brief Run the test for the Scalar type.
      ///
      /// \param comm [in] Communicator over which to run the test.
      /// \param node [in] Kokkos Node instance.
      /// \param testParams [in/out] Parameters for the test.  May
      ///   be modified by each test in turn.
      /// \param randomSeed [in/out] On input: the random seed for
      ///   LAPACK's pseudorandom number generator.  On output: the
      ///   updated random seed.
      static void
      run (const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
           const Teuchos::RCP<node_type>& node,
           const Teuchos::RCP<Teuchos::ParameterList>& testParams,
           std::vector<int>& randomSeed)
      {
        using std::cerr;
        using std::cout;
        using std::endl;
        using Teuchos::arcp;
        using Teuchos::ParameterList;
        using Teuchos::parameterList;
        using Teuchos::RCP;
        using Teuchos::rcp;
        using Teuchos::rcp_const_cast;
        using Teuchos::rcp_implicit_cast;
        typedef Matrix<ordinal_type, scalar_type> matrix_type;
        typedef MatView<ordinal_type, scalar_type> mat_view_type;
        typedef typename tsqr_type::FactorOutput factor_output_type;

        const int myRank = Teuchos::rank (*comm);
        const int numProcs = Teuchos::size (*comm);

        // Construct TSQR implementation instance.
        RCP<tsqr_type> tsqr = getTsqr (testParams, node, comm);

        // Fetch test parameters from the input parameter list.
        const ordinal_type numRowsLocal = testParams->get<ordinal_type> ("numRowsLocal");
        const ordinal_type numCols = testParams->get<ordinal_type> ("numCols");
        const int numCores = testParams->get<int> ("numCores");
        const bool contiguousCacheBlocks = testParams->get<bool> ("contiguousCacheBlocks");
        const bool testFactorExplicit = testParams->get<bool> ("testFactorExplicit");
        const bool testRankRevealing = testParams->get<bool> ("testRankRevealing");
        const bool debug = testParams->get<bool> ("debug");

        // Space for each node's local part of the test problem.
        // A_local, A_copy, and Q_local are distributed matrices, and
        // R is replicated on all processes sharing the communicator.
        matrix_type A_local (numRowsLocal, numCols);
        matrix_type A_copy (numRowsLocal, numCols);
        matrix_type Q_local (numRowsLocal, numCols);
        matrix_type R (numCols, numCols);

        // Start out by filling the test problem with zeros.
        typedef Teuchos::ScalarTraits<scalar_type> STS;
        A_local.fill (STS::zero());
        A_copy.fill (STS::zero());
        Q_local.fill (STS::zero());
        R.fill (STS::zero());

        // Create some reasonable singular values for the test problem:
        // 1, 1/2, 1/4, 1/8, ...
        typedef typename STS::magnitudeType magnitude_type;
        std::vector<magnitude_type> singularValues (numCols);
        typedef Teuchos::ScalarTraits<magnitude_type> STM;
        {
          const magnitude_type scalingFactor = STM::one() + STM::one();
          magnitude_type curVal = STM::one();
          typedef typename std::vector<magnitude_type>::iterator iter_type;
          for (iter_type it = singularValues.begin();
               it != singularValues.end(); ++it)
            {
              *it = curVal;
              curVal = curVal / scalingFactor;
            }
        }

        // Construct a normal(0,1) pseudorandom number generator with
        // the given random seed.
        using TSQR::Random::NormalGenerator;
        typedef NormalGenerator<ordinal_type, scalar_type> generator_type;
        generator_type gen (randomSeed);

        // We need a Messenger for Ordinal-type data, so that we can
        // build a global random test matrix.
        RCP<MessengerBase<ordinal_type> > ordinalMessenger =
          rcp_implicit_cast<MessengerBase<ordinal_type> > (rcp (new TeuchosMessenger<ordinal_type> (comm)));

        // We also need a Messenger for Scalar-type data.  The TSQR
        // implementation already constructed one, but it's OK to
        // construct another one; TeuchosMessenger is just a thin
        // wrapper over the Teuchos::Comm object.
        RCP<MessengerBase<scalar_type> > scalarMessenger =
          rcp_implicit_cast<MessengerBase<scalar_type> > (rcp (new TeuchosMessenger<scalar_type> (comm)));

        {
          // Generate a global distributed matrix (whose part local to
          // this node is in A_local) with the given singular values.
          // This part has O(P) communication for P MPI processes.
          using TSQR::Random::randomGlobalMatrix;
          // Help the C++ compiler with type inference.
          mat_view_type A_local_view (A_local.nrows(), A_local.ncols(), A_local.get(), A_local.lda());
          const magnitude_type* const singVals = (numCols == 0) ? NULL : &singularValues[0];
          randomGlobalMatrix<mat_view_type, generator_type> (&gen, A_local_view, singVals,
                                                             ordinalMessenger.getRawPtr(),
                                                             scalarMessenger.getRawPtr());
        }
        // Save the pseudorandom number generator's seed for any later
        // tests.  The generator keeps its own copy of the seed and
        // updates it internally, so we have to ask for its copy.
        gen.getSeed (randomSeed);

        // If specified in the test parameters, rearrange cache blocks
        // in the copy.  Otherwise, just copy the test problem into
        // A_copy.  The factorization overwrites the input matrix, so
        // we have to make a copy in order to validate the final
        // result.
        if (contiguousCacheBlocks) {
          tsqr->cache_block (numRowsLocal, numCols, A_copy.get(),
                             A_local.get(), A_local.lda());
          if (debug) {
            Teuchos::barrier (*comm);
            if (myRank == 0)
              cerr << "-- Finished Tsqr::cache_block" << endl;
          }
        }
        else {
          deep_copy (A_copy, A_local);
        }

        // "factorExplicit" is an alternate, hopefully faster way of
        // factoring the matrix, when only the explicit Q factor is
        // wanted.
        typedef KokkosClassic::MultiVector<scalar_type, node_type> KMV;
        if (testFactorExplicit) {
          KMV A_copy_view (node);
          A_copy_view.initializeValues (static_cast<size_t> (A_copy.nrows()),
                                        static_cast<size_t> (A_copy.ncols()),
                                        arcp (A_copy.get(), 0, A_copy.nrows()*A_copy.ncols(), false), // non-owning ArrayRCP
                                        static_cast<size_t> (A_copy.lda()));
          KMV Q_view (node);
          Q_view.initializeValues (static_cast<size_t> (Q_local.nrows()),
                                   static_cast<size_t> (Q_local.ncols()),
                                   arcp (Q_local.get(), 0, Q_local.nrows()*Q_local.ncols(), false), // non-owning ArrayRCP
                                   static_cast<size_t> (Q_local.lda()));
          Teuchos::SerialDenseMatrix<ordinal_type, scalar_type>
            R_view (Teuchos::View, R.get(), R.lda(), R.nrows(), R.ncols());

          tsqr->factorExplicit (A_copy_view, Q_view, R_view,
                                contiguousCacheBlocks);
          if (debug) {
            Teuchos::barrier (*comm);
            if (myRank == 0)
              cerr << "-- Finished Tsqr::factorExplicit" << endl;
          }
        }
        else {
          // Factor the (copy of the) matrix.
          factor_output_type factorOutput =
            tsqr->factor (numRowsLocal, numCols, A_copy.get(), A_copy.lda(),
                          R.get(), R.lda(), contiguousCacheBlocks);
          if (debug) {
            Teuchos::barrier (*comm);
            if (myRank == 0)
              cerr << "-- Finished Tsqr::factor" << endl;
          }
          // Compute the explicit Q factor in Q_local.
          tsqr->explicit_Q (numRowsLocal, numCols, A_copy.get(), A_copy.lda(),
                            factorOutput, numCols, Q_local.get(), Q_local.lda(),
                            contiguousCacheBlocks);
          if (debug) {
            Teuchos::barrier (*comm);
            if (myRank == 0)
              cerr << "-- Finished Tsqr::explicit_Q" << endl;
          }
        }

        // Optionally, test rank-revealing capability.  We do this
        // before un-cache-blocking the explicit Q factor, since
        // revealRank can work with contiguous cache blocks, and
        // modifies the Q factor if the matrix doesn't have full
        // column rank.
        if (testRankRevealing) {
          KMV Q_view (node);
          Q_view.initializeValues (static_cast<size_t> (Q_local.nrows()),
                                   static_cast<size_t> (Q_local.ncols()),
                                   arcp (Q_local.get(), 0, Q_local.nrows()*Q_local.ncols(), false), // non-owning ArrayRCP
                                   static_cast<size_t> (Q_local.lda()));
          Teuchos::SerialDenseMatrix<ordinal_type, scalar_type>
            R_view (Teuchos::View, R.get(), R.lda(), R.nrows(), R.ncols());
          // If 2^{# columns} > machine precision, then our choice
          // of singular values will make the smallest singular
          // value < machine precision.  In that case, the SVD can't
          // promise it will distinguish between tiny and zero.  If
          // the number of columns is less than that, we can use a
          // tolerance of zero to test the purported rank with the
          // actual numerical rank.
          const magnitude_type tol = STM::zero();
          const ordinal_type rank =
            tsqr->revealRank (Q_view, R_view, tol, contiguousCacheBlocks);

          magnitude_type two_to_the_numCols = STM::one();
          for (int k = 0; k < numCols; ++k) {
            const magnitude_type two = STM::one() + STM::one();
            two_to_the_numCols *= two;
          }
          // Throw in a factor of 10, just for more tolerance of
          // rounding error (so the test only fails if something is
          // really broken).
          if (two_to_the_numCols > magnitude_type(10) * STM::eps ()) {
            TEUCHOS_TEST_FOR_EXCEPTION(
              rank != numCols, std::logic_error, "The matrix of " << numCols
              << " columns should have full numerical rank, but Tsqr reports "
              "that it has rank " << rank << ".  Please report this bug to "
              "the Kokkos developers.");
            if (debug) {
              Teuchos::barrier (*comm);
              if (myRank == 0)
                cerr << "-- Tested rank-revealing capability" << endl;
            }
          }
          else {
            if (debug) {
              Teuchos::barrier (*comm);
              if (myRank == 0)
                cerr << "-- Not testing rank-revealing capability; too many columns" << endl;
            }
          }
        }
        // "Un"-cache-block the output, if contiguous cache blocks
        // were used.  This is only necessary because global_verify()
        // doesn't currently support contiguous cache blocks.
        if (contiguousCacheBlocks) {
          // We can use A_copy as scratch space for
          // un-cache-blocking Q_local, since we're done using
          // A_copy for other things.
          tsqr->un_cache_block (numRowsLocal, numCols, A_copy.get(),
                                A_copy.lda(), Q_local.get());
          // Overwrite Q_local with the un-cache-blocked Q factor.
          deep_copy (Q_local, A_copy);
          if (debug) {
            Teuchos::barrier (*comm);
            if (myRank == 0)
              cerr << "-- Finished Tsqr::un_cache_block" << endl;
          }
        }

        // Test accuracy of the factorization.
        const std::vector<magnitude_type> results =
          global_verify (numRowsLocal, numCols, A_local.get(), A_local.lda(),
                         Q_local.get(), Q_local.lda(), R.get(), R.lda(),
                         scalarMessenger.getRawPtr());
        if (debug) {
          Teuchos::barrier (*comm);
          if (myRank == 0)
            cerr << "-- Finished global_verify" << endl;
        }

        // Print the results on Proc 0.
        if (myRank == 0) {
          if (testParams->get<bool> ("printFieldNames")) {
            cout << "%"
                 << "method"
                 << ",scalarType"
                 << ",numRowsLocal"
                 << ",numCols"
                 << ",numProcs"
                 << ",numCores"
                 << ",cacheSizeHint"
                 << ",contiguousCacheBlocks"
                 << ",absFrobResid"
                 << ",absFrobOrthog"
                 << ",frobA" << endl;
            // We don't need to print field names again for the other
            // tests, so set the test parameters accordingly.
            testParams->set ("printFieldNames", false);
          }
          if (testParams->get<bool> ("printResults")) {
            cout << "Tsqr"
                 << "," << Teuchos::TypeNameTraits<scalar_type>::name()
                 << "," << numRowsLocal
                 << "," << numCols
                 << "," << numProcs
                 << "," << numCores
                 << "," << tsqr->cache_size_hint()
                 << "," << contiguousCacheBlocks
                 << "," << results[0]
                 << "," << results[1]
                 << "," << results[2]
                 << endl;
          }
        } // if (myRank == 0)

        // If requested, check accuracy and fail if results are not
        // sufficiently accurate.
        if (testParams->get<bool> ("failIfInaccurate")) {
          // Avoid overflow of the local Ordinal type, by casting
          // first to a floating-point type.
          const magnitude_type dimsProd = magnitude_type(numRowsLocal) *
            magnitude_type(numProcs) * magnitude_type(numCols*numCols);

          // Relative residual error is ||A-Q*R|| / ||A||, or just
          // ||A-Q*R|| if ||A|| == 0.  (The result had better be zero
          // in the latter case.)  A reasonable error bound should
          // incorporate the dimensions of the matrix, since this
          // indicates the amount of rounding error.  Square root of
          // the matrix dimensions is an old heuristic from Wilkinson
          // or perhaps even an earlier source.  We include a factor
          // of 10 so that the test won't fail unless there is a
          // really good reason.
          const magnitude_type relResidBound =
            magnitude_type(10) * STM::squareroot(dimsProd) * STM::eps();

          // Orthogonality of the matrix should not depend on the
          // matrix dimensions, if we measure in the 2-norm.
          // However, we are measuring in the Frobenius norm, so
          // it's appropriate to multiply eps by the number of
          // entries in the matrix for which we compute the
          // Frobenius norm.  We include a factor of 10 for the same
          // reason as mentioned above.
          const magnitude_type orthoBound =
            magnitude_type(10*numCols*numCols) * STM::eps();

          // Avoid division by zero.
          const magnitude_type relResidError =
            results[0] / (results[2] == STM::zero() ? STM::one() : results[2]);
          TEUCHOS_TEST_FOR_EXCEPTION(
            relResidError > relResidBound, TsqrInaccurate, "Full Tsqr "
            "has an inaccurate relative residual ||A - QR||_F"
            << (results[2] == STM::zero() ? " / ||A||_F" : "")
            << " = " << relResidError << ", which is greater than the bound "
            << relResidBound << " by a factor of "
            << relResidError / relResidBound << ".");
          const magnitude_type orthoError = results[1];
          TEUCHOS_TEST_FOR_EXCEPTION(
            orthoError > orthoBound, TsqrInaccurate,
            "Full Tsqr has an inaccurate orthogonality measure ||I - Q^* Q||_F"
            << results[1] << " = " << orthoError << ", which is greater than "
            "the bound " << orthoBound << " by a factor of "
            << orthoError / orthoBound << ".");
        } // if (the tests should fail on inaccuracy)
      }