int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  Teuchos::CommandLineProcessor clp(false);

  clp.setDocString("This is the canonical ML scaling example");

  //Problem
  std::string optMatrixType = "Laplace2D"; clp.setOption("matrixType",       &optMatrixType,           "matrix type ('Laplace2D', 'Laplace3D')");
  int optNx = 100;                         clp.setOption("nx",               &optNx,                   "mesh size in x direction");
  int optNy = -1;                          clp.setOption("ny",               &optNy,                   "mesh size in y direction");
  int optNz = -1;                          clp.setOption("nz",               &optNz,                   "mesh size in z direction");

  //Smoothers
  //std::string optSmooType = "Chebshev";  clp.setOption("smooType",       &optSmooType,           "smoother type ('l1-sgs', 'sgs 'or 'cheby')");
  int optSweeps = 3;                     clp.setOption("sweeps",         &optSweeps,             "Chebyshev degreee (or SGS sweeps)");
  double optAlpha = 7;                   clp.setOption("alpha",          &optAlpha,              "Chebyshev eigenvalue ratio (recommend 7 in 2D, 20 in 3D)");

  //Coarsening
  int optMaxCoarseSize = 500;                     clp.setOption("maxcoarse",         &optMaxCoarseSize,  "Size of coarsest grid when coarsening should stop");
  int optMaxLevels = 10;                     clp.setOption("maxlevels",         &optMaxLevels,  "Maximum number of levels");

  //Krylov solver
  double optTol      = 1e-12;              clp.setOption("tol",            &optTol,                "stopping tolerance for Krylov method");
  int optMaxIts      = 500;              clp.setOption("maxits",            &optMaxIts,                "maximum iterations for Krylov method");

  //XML file with additional options
  std::string xmlFile = ""; clp.setOption("xml", &xmlFile, "XML file containing ML options. [OPTIONAL]");


  //Debugging
  int  optWriteMatrices = -2;                  clp.setOption("write",                  &optWriteMatrices, "write matrices to file (-1 means all; i>=0 means level i)");

  switch (clp.parse(argc, argv)) {
  case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
  case Teuchos::CommandLineProcessor::PARSE_ERROR:
  case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
  case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
  }

#ifdef ML_SCALING
   const int ntimers=4;
   enum {total, probBuild, precBuild, solve};
   ml_DblLoc timeVec[ntimers], maxTime[ntimers], minTime[ntimers];

  for (int i=0; i<ntimers; i++) timeVec[i].rank = Comm.MyPID();
  timeVec[total].value = MPI_Wtime();
#endif

  // Creates the linear problem using the Galeri package.
  // Several matrix examples are supported; please refer to the
  // Galeri documentation for more details.
  // Most of the examples using the ML_Epetra::MultiLevelPreconditioner
  // class are based on Epetra_CrsMatrix. Example
  // `ml_EpetraVbr.cpp' shows how to define a Epetra_VbrMatrix.
  // `Laplace2D' is a symmetric matrix; an example of non-symmetric
  // matrices is `Recirc2D' (advection-diffusion in a box, with
  // recirculating flow). The grid has optNx x optNy nodes, divided into
  // mx x my subdomains, each assigned to a different processor.
  if (optNy == -1) optNy = optNx;
  if (optNz == -1) optNz = optNx;

  ParameterList GaleriList;
  GaleriList.set("nx", optNx);
  GaleriList.set("ny", optNy);
  GaleriList.set("nz", optNz);
  //GaleriList.set("mx", 1);
  //GaleriList.set("my", Comm.NumProc());

#ifdef ML_SCALING
  timeVec[probBuild].value = MPI_Wtime();
#endif
  Epetra_Map* Map;
  Epetra_CrsMatrix* A;
  Epetra_MultiVector* Coord;

  if (optMatrixType == "Laplace2D") {
    Map = CreateMap("Cartesian2D", Comm, GaleriList);
    A = CreateCrsMatrix("Laplace2D", Map, GaleriList);
    Coord = CreateCartesianCoordinates("2D", &(A->Map()), GaleriList);
  } else if (optMatrixType == "Laplace3D") {
    Map = CreateMap("Cartesian3D", Comm, GaleriList);
    A = CreateCrsMatrix("Laplace3D", Map, GaleriList);
    Coord = CreateCartesianCoordinates("3D", &(A->Map()), GaleriList);
  } else {
    throw(std::runtime_error("Bad matrix type"));
  }

  //EpetraExt::RowMatrixToMatlabFile("A.m",*A);

  double *x_coord = (*Coord)[0];
  double *y_coord = (*Coord)[1];
  double* z_coord=NULL;
  if (optMatrixType == "Laplace3D") z_coord = (*Coord)[2];

  //EpetraExt::MultiVectorToMatrixMarketFile("mlcoords.m",*Coord);

  if( Comm.MyPID()==0 ) {
    std::cout << "========================================================" << std::endl;
    std::cout << " Matrix type: " << optMatrixType << std::endl;
    if (optMatrixType == "Laplace2D")
      std::cout << " Problem size: " << optNx*optNy << " (" << optNx << "x" << optNy << ")" << std::endl;
    else if (optMatrixType == "Laplace3D")
      std::cout << " Problem size: " << optNx*optNy*optNz << " (" << optNx << "x" << optNy << "x" << optNz << ")" << std::endl;

    int mx = GaleriList.get("mx", -1);
    int my = GaleriList.get("my", -1);
    int mz = GaleriList.get("my", -1);
    std::cout << " Processor subdomains in x direction: " << mx << std::endl
              << " Processor subdomains in y direction: " << my << std::endl;
    if (optMatrixType == "Laplace3D")
      std::cout << " Processor subdomains in z direction: " << mz << std::endl;
    std::cout << "========================================================" << std::endl;
  }

  // Build a linear system with trivial solution, using a random vector
  // as starting solution.
  Epetra_Vector LHS(*Map); LHS.Random();
  Epetra_Vector RHS(*Map); RHS.PutScalar(0.0);

  Epetra_LinearProblem Problem(A, &LHS, &RHS);

  // As we wish to use AztecOO, we need to construct a solver object 
  // for this problem
  AztecOO solver(Problem);
#ifdef ML_SCALING
  timeVec[probBuild].value = MPI_Wtime() - timeVec[probBuild].value;
#endif

  // =========================== begin of ML part ===========================
  
#ifdef ML_SCALING
  timeVec[precBuild].value = MPI_Wtime();
#endif
  // create a parameter list for ML options
  ParameterList MLList;

  // Sets default parameters for classic smoothed aggregation. After this
  // call, MLList contains the default values for the ML parameters,
  // as required by typical smoothed aggregation for symmetric systems.
  // Other sets of parameters are available for non-symmetric systems
  // ("DD" and "DD-ML"), and for the Maxwell equations ("maxwell").
  ML_Epetra::SetDefaults("SA",MLList);
  
  // overwrite some parameters. Please refer to the user's guide
  // for more information
  // some of the parameters do not differ from their default value,
  // and they are here reported for the sake of clarity
  
  // output level, 0 being silent and 10 verbose
  MLList.set("ML output", 10);
  // maximum number of levels
  MLList.set("max levels",optMaxLevels);
  // set finest level to 0
  MLList.set("increasing or decreasing","increasing");
  MLList.set("coarse: max size",optMaxCoarseSize);

  // use Uncoupled scheme to create the aggregate
  MLList.set("aggregation: type", "Uncoupled");

  // smoother is Chebyshev. Example file 
  // `ml/examples/TwoLevelDD/ml_2level_DD.cpp' shows how to use
  // AZTEC's preconditioners as smoothers

  MLList.set("smoother: type","Chebyshev");
  MLList.set("smoother: Chebyshev alpha",optAlpha);
  MLList.set("smoother: sweeps",optSweeps);

  // use both pre and post smoothing
  MLList.set("smoother: pre or post", "both");

#ifdef HAVE_ML_AMESOS
  // solve with serial direct solver KLU
  MLList.set("coarse: type","Amesos-KLU");
#else
  // this is for testing purposes only, you should have 
  // a direct solver for the coarse problem (either Amesos, or the SuperLU/
  // SuperLU_DIST interface of ML)
  MLList.set("coarse: type","Jacobi");
#endif

  MLList.set("repartition: enable",1);
  MLList.set("repartition: start level",1);
  MLList.set("repartition: max min ratio",1.1);
  MLList.set("repartition: min per proc",800);
  MLList.set("repartition: partitioner","Zoltan");
  MLList.set("repartition: put on single proc",1);
  MLList.set("x-coordinates", x_coord);
  MLList.set("y-coordinates", y_coord);
  if (optMatrixType == "Laplace2D") {
    MLList.set("repartition: Zoltan dimensions",2);
  } else if (optMatrixType == "Laplace3D") {
    MLList.set("repartition: Zoltan dimensions",3);
    MLList.set("z-coordinates", z_coord);
  }

  MLList.set("print hierarchy",optWriteMatrices);
  //MLList.set("aggregation: damping factor",0.);

  // Read in XML options
  if (xmlFile != "")
    ML_Epetra::ReadXML(xmlFile,MLList,Comm);

  // Creates the preconditioning object. We suggest to use `new' and
  // `delete' because the destructor contains some calls to MPI (as
  // required by ML and possibly Amesos). This is an issue only if the
  // destructor is called **after** MPI_Finalize().
  ML_Epetra::MultiLevelPreconditioner* MLPrec = 
    new ML_Epetra::MultiLevelPreconditioner(*A, MLList);

  // verify unused parameters on process 0 (put -1 to print on all
  // processes)
  MLPrec->PrintUnused(0);
#ifdef ML_SCALING
  timeVec[precBuild].value = MPI_Wtime() - timeVec[precBuild].value;
#endif

  // ML allows the user to cheaply recompute the preconditioner. You can
  // simply uncomment the following line:
  // 
  // MLPrec->ReComputePreconditioner();
  //
  // It is supposed that the linear system matrix has different values, but
  // **exactly** the same structure and layout. The code re-built the
  // hierarchy and re-setup the smoothers and the coarse solver using
  // already available information on the hierarchy. A particular
  // care is required to use ReComputePreconditioner() with nonzero
  // threshold.

  // =========================== end of ML part =============================
  
  // tell AztecOO to use the ML preconditioner, specify the solver 
  // and the output, then solve with 500 maximum iterations and 1e-12 
  // of tolerance (see AztecOO's user guide for more details)
  
#ifdef ML_SCALING
  timeVec[solve].value = MPI_Wtime();
#endif
  solver.SetPrecOperator(MLPrec);
  solver.SetAztecOption(AZ_solver, AZ_cg);
  solver.SetAztecOption(AZ_output, 32);
  solver.Iterate(optMaxIts, optTol);
#ifdef ML_SCALING
  timeVec[solve].value = MPI_Wtime() - timeVec[solve].value;
#endif

  // destroy the preconditioner
  delete MLPrec;
  
  // compute the real residual

  double residual;
  LHS.Norm2(&residual);
  
  if( Comm.MyPID()==0 ) {
    cout << "||b-Ax||_2 = " << residual << endl;
  }

  // for testing purposes
  if (residual > 1e-5)
    exit(EXIT_FAILURE);

  delete A;
  delete Map;

#ifdef ML_SCALING
  timeVec[total].value = MPI_Wtime() - timeVec[total].value;

  //avg
  double dupTime[ntimers],avgTime[ntimers];
  for (int i=0; i<ntimers; i++) dupTime[i] = timeVec[i].value;
  MPI_Reduce(dupTime,avgTime,ntimers,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
  for (int i=0; i<ntimers; i++) avgTime[i] = avgTime[i]/Comm.NumProc();
  //min
  MPI_Reduce(timeVec,minTime,ntimers,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD);
  //max
  MPI_Reduce(timeVec,maxTime,ntimers,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD);

  if (Comm.MyPID() == 0) {
    printf("timing :  max (pid)  min (pid)  avg\n");
    printf("Problem build         :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[probBuild].value,maxTime[probBuild].rank,
             minTime[probBuild].value,minTime[probBuild].rank,
             avgTime[probBuild]);
    printf("Preconditioner build  :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[precBuild].value,maxTime[precBuild].rank,
             minTime[precBuild].value,minTime[precBuild].rank,
             avgTime[precBuild]);
    printf("Solve                 :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[solve].value,maxTime[solve].rank,
             minTime[solve].value,minTime[solve].rank,
             avgTime[solve]);
    printf("Total                 :   %2.3e (%d)  %2.3e (%d)  %2.3e \n",
             maxTime[total].value,maxTime[total].rank,
             minTime[total].value,minTime[total].rank,
             avgTime[total]);
  }
#endif

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  return(EXIT_SUCCESS);
}
Beispiel #2
0
int main(int argc, char *argv[])
{

#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  int nx;
  if (argc > 1)
    nx = (int) strtol(argv[1],NULL,10);
  else
    nx = 256;
  int ny = nx * Comm.NumProc(); // each subdomain is a square

  ParameterList GaleriList;
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);
  GaleriList.set("mx", 1);
  GaleriList.set("my", Comm.NumProc());

  int NumNodes = nx*ny;
  int NumPDEEqns = 2;

  Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList);
  Epetra_CrsMatrix* CrsA = CreateCrsMatrix("Laplace2D", Map, GaleriList);
  Epetra_VbrMatrix* A = CreateVbrMatrix(CrsA, NumPDEEqns);

  Epetra_Vector LHS(A->DomainMap()); LHS.PutScalar(0);
  Epetra_Vector RHS(A->DomainMap()); RHS.Random();
  Epetra_LinearProblem Problem(A, &LHS, &RHS);
  AztecOO solver(Problem);
  double *x_coord = 0, *y_coord = 0, *z_coord = 0;

  Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", &(CrsA->Map()),
                                                          GaleriList);

  double **ttt;
  if (!coords->ExtractView(&ttt)) {
    x_coord = ttt[0];
    y_coord = ttt[1];
  } else {
    printf("Error extracting coordinate vectors\n");
#   ifdef HAVE_MPI
    MPI_Finalize() ;
#   endif
    exit(EXIT_FAILURE);
  }

  ParameterList MLList;
  SetDefaults("SA",MLList);
  MLList.set("ML output",10);
  MLList.set("max levels",10);
  MLList.set("increasing or decreasing","increasing");
  MLList.set("smoother: type", "Chebyshev");
  MLList.set("smoother: sweeps", 3);

  // *) if a low number, it will use all the available processes
  // *) if a big number, it will use only processor 0 on the next level
  MLList.set("aggregation: next-level aggregates per process", 1);

  MLList.set("aggregation: type (level 0)", "Zoltan");
  MLList.set("aggregation: type (level 1)", "Uncoupled");
  MLList.set("aggregation: type (level 2)", "Zoltan");
  MLList.set("aggregation: smoothing sweeps", 2);

  MLList.set("x-coordinates", x_coord);
  MLList.set("y-coordinates", y_coord);
  MLList.set("z-coordinates", z_coord);

  // specify the reduction with respect to the previous level
  // (very small values can break the code)
  int ratio = 16;
  MLList.set("aggregation: global aggregates (level 0)",
             NumNodes / ratio);
  MLList.set("aggregation: global aggregates (level 1)",
             NumNodes / (ratio * ratio));
  MLList.set("aggregation: global aggregates (level 2)",
             NumNodes / (ratio * ratio * ratio));

  MultiLevelPreconditioner* MLPrec =
    new MultiLevelPreconditioner(*A, MLList, true);

  solver.SetPrecOperator(MLPrec);
  solver.SetAztecOption(AZ_solver, AZ_cg_condnum);
  solver.SetAztecOption(AZ_output, 1);
  solver.Iterate(100, 1e-12);

  // compute the real residual
  Epetra_Vector Residual(A->DomainMap());
  //1.0 * RHS + 0.0 * RHS - 1.0 * (A * LHS)
  A->Apply(LHS,Residual);
  Residual.Update(1.0, RHS, 0.0, RHS, -1.0);
  double rn;
  Residual.Norm2(&rn);

  if (Comm.MyPID() == 0 )
    std::cout << "||b-Ax||_2 = " << rn << endl;

  if (Comm.MyPID() == 0 && rn > 1e-5) {
    std::cout << "TEST FAILED!!!!" << endl;
#   ifdef HAVE_MPI
    MPI_Finalize() ;
#   endif
    exit(EXIT_FAILURE);
  }

  delete MLPrec;
  delete coords;
  delete Map;
  delete CrsA;
  delete A;

  if (Comm.MyPID() == 0)
    std::cout << "TEST PASSED" << endl;

#ifdef HAVE_MPI
  MPI_Finalize() ;
#endif

  exit(EXIT_SUCCESS);

}
Beispiel #3
0
int main(int narg, char *arg[]) 
{
  using std::cout;

#ifdef EPETRA_MPI  
  // Initialize MPI  
  MPI_Init(&narg,&arg);   
  Epetra_MpiComm Comm( MPI_COMM_WORLD );  
#else  
  Epetra_SerialComm Comm;  
#endif
  
  int MyPID = Comm.MyPID();

  bool verbose = true;
  int verbosity = 1;
  
  bool testEpetra64 = true;

  // Matrix properties
  bool isHermitian = true;

  // Multivector properties

  std::string initvec = "random";

  // Eigenvalue properties

  std::string which = "SR";
  std::string method = "LOBPCG";
  std::string precond = "none";
  std::string ortho = "SVQB";
  bool lock = true;
  bool relconvtol = false;
  bool rellocktol = false;
  int nev = 5;

  // Block-Arnoldi properties

  int blockSize = -1;
  int numblocks = -1;
  int maxrestarts = -1;
  int maxiterations = -1;
  int extrablocks = 0;
  int gensize = 25;  // Needs to be long long to test with > INT_MAX rows
  double tol = 1.0e-5;
  
  // Echo the command line
  if (MyPID == 0)  {
    for (int i = 0; i < narg; i++)
      cout << arg[i] << " ";
    cout << endl;
  }

  // Command-line processing

  Teuchos::CommandLineProcessor cmdp(false,true);
  cmdp.setOption("Epetra64", "no-Epetra64", &testEpetra64,
                 "Force code to use Epetra64, even if the problem size does "
                 "not require it. (Epetra64 will be used automatically for "
                 "sufficiently large problems, or not used if Epetra does not have built in support.)");
  cmdp.setOption("gen",&gensize,
                 "Generate a simple Laplacian matrix of size n.");
  cmdp.setOption("verbosity", &verbosity, "0=quiet, 1=low, 2=medium, 3=high.");
  cmdp.setOption("method",&method,
                 "Solver method to use:  LOBPCG, BD, BKS or IRTR.");
  cmdp.setOption("nev",&nev,"Number of eigenvalues to find.");
  cmdp.setOption("which",&which,"Targetted eigenvalues (SM,LM,SR,or LR).");
  cmdp.setOption("tol",&tol,"Solver convergence tolerance.");
  cmdp.setOption("blocksize",&blockSize,"Block size to use in solver.");
  cmdp.setOption("numblocks",&numblocks,"Number of blocks to allocate.");
  cmdp.setOption("extrablocks",&extrablocks,
                 "Number of extra NEV blocks to allocate in BKS.");
  cmdp.setOption("maxrestarts",&maxrestarts,
                 "Maximum number of restarts in BKS or BD.");
  cmdp.setOption("maxiterations",&maxiterations,
                 "Maximum number of iterations in LOBPCG.");
  cmdp.setOption("lock","no-lock",&lock,
                 "Use Locking parameter (deflate for converged eigenvalues)");
  cmdp.setOption("initvec", &initvec,
                 "Initial vectors (random, unit, zero, random2)");
  cmdp.setOption("ortho", &ortho,
                 "Orthogonalization method (DGKS, SVQB, TSQR).");
  cmdp.setOption("relative-convergence-tol","no-relative-convergence-tol",
                 &relconvtol,
                 "Use Relative convergence tolerance "
                 "(normalized by eigenvalue)");
  cmdp.setOption("relative-lock-tol","no-relative-lock-tol",&rellocktol,
                 "Use Relative locking tolerance (normalized by eigenvalue)");
  if (cmdp.parse(narg,arg)!=Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    FINALIZE;
    return -1;
  }

  // Print the most essential options (not in the MyPL parameters later)
  verbose = (verbosity>0);
  if (verbose && MyPID==0){
    cout << "verbosity = " << verbosity << endl;
    cout << "method = " << method << endl;
    cout << "initvec = " << initvec << endl;
    cout << "nev = " << nev << endl;
  }

  // We need blockSize to be set so we can allocate memory with it.
  // If it wasn't set on the command line, set it to the Anasazi defaults here.
  // Defaults are those given in the documentation.
  if (blockSize < 0) 
    if (method == "BKS") 
      blockSize = 1;
    else // other methods: LOBPCG, BD, IRTR
      blockSize = nev;

  // Make sure Epetra was built with 64-bit global indices enabled.
#ifdef EPETRA_NO_64BIT_GLOBAL_INDICES
  if (testEpetra64)
    testEpetra64 = false;
#endif

  Epetra_CrsMatrix *K = NULL;
  
  // Read matrix from file or generate a matrix
  if ((gensize > 0 && testEpetra64)) {
    // Generate the matrix using long long for global indices
    build_simple_matrix<long long>(Comm, K, (long long)gensize, true, verbose);
  }
  else if (gensize) {
    // Generate the matrix using int for global indices
    build_simple_matrix<int>(Comm, K, gensize, false, verbose);
  }
  else {
    printf("YOU SHOULDN'T BE HERE \n");
    exit(-1);
  }

  if (verbose && (K->NumGlobalRows64() < TINYMATRIX)) {
    if (MyPID == 0) cout << "Input matrix:  " << endl;
    K->Print(cout);
  }
  Teuchos::RCP<Epetra_CrsMatrix> rcpK = Teuchos::rcp( K );

  // Set Anasazi verbosity level
  if (MyPID == 0) cout << "Setting up the problem..." << endl;

  int anasazi_verbosity = Anasazi::Errors + Anasazi::Warnings;
  if (verbosity >= 1)  // low
    anasazi_verbosity += Anasazi::FinalSummary + Anasazi::TimingDetails;
  if (verbosity >= 2)  // medium
    anasazi_verbosity += Anasazi::IterationDetails;
  if (verbosity >= 3)  // high
    anasazi_verbosity += Anasazi::StatusTestDetails
                       + Anasazi::OrthoDetails
                       + Anasazi::Debug;
  
  // Create parameter list to pass into solver
  Teuchos::ParameterList MyPL;
  MyPL.set("Verbosity", anasazi_verbosity);
  MyPL.set("Which", which);
  MyPL.set("Convergence Tolerance", tol);
  MyPL.set("Relative Convergence Tolerance", relconvtol);
  MyPL.set("Orthogonalization", ortho);

  // For the following, use Anasazi's defaults unless explicitly specified.
  if (numblocks > 0) MyPL.set( "Num Blocks", numblocks);
  if (maxrestarts > 0) MyPL.set( "Maximum Restarts", maxrestarts);
  if (maxiterations > 0) MyPL.set( "Maximum Iterations", maxiterations);
  if (blockSize > 0) MyPL.set( "Block Size", blockSize );

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef Anasazi::MultiVecTraits<double, MV> MVT;
  typedef Anasazi::OperatorTraits<double, MV, OP> OPT;
    

  // Create the eigenproblem to be solved.
    
  // Dummy initial vectors - will be set later.
  Teuchos::RCP<Epetra_MultiVector> ivec = 
    Teuchos::rcp(new Epetra_MultiVector(K->Map(), blockSize));

  Teuchos::RCP<Anasazi::BasicEigenproblem<double, MV, OP> > MyProblem;
  MyProblem = 
    Teuchos::rcp(new Anasazi::BasicEigenproblem<double, MV, OP>(rcpK, ivec) );

  // Inform the eigenproblem whether K is Hermitian

  MyProblem->setHermitian(isHermitian);

  // Set the number of eigenvalues requested 

  MyProblem->setNEV(nev);

  // Loop to solve the same eigenproblem numtrial times (different initial vectors)
  int numfailed = 0;
  int iter = 0;
  double solvetime = 0;

  // Set random seed to have consistent initial vectors between
  // experiments.  Different seed in each loop iteration.
  ivec->SetSeed(2*(MyPID) +1); // Odd seed

  // Set up initial vectors
  // Using random values as the initial guess.
  if (initvec == "random"){
    MVT::MvRandom(*ivec);
  } 
  else if (initvec == "zero"){
    // All zero initial vector should be essentially the same,
    // but appears slightly worse in practice.
    ivec->PutScalar(0.);
  }
  else if (initvec == "unit"){
    // Orthogonal unit initial vectors.
    ivec->PutScalar(0.);
    for (int i = 0; i < blockSize; i++)
      ivec->ReplaceGlobalValue(i,i,1.);
  }
  else if (initvec == "random2"){
    // Partially random but orthogonal (0,1) initial vectors.
    // ivec(i,*) is zero in all but one column (for each i)
    // Inefficient implementation but this is only done once...
    double rowmax;
    int col;
    ivec->Random();
    for (int i = 0; i < ivec->MyLength(); i++){
      rowmax = -1;
      col = -1;
      for (int j = 0; j < blockSize; j++){
        // Make ivec(i,j) = 1 for largest random value in row i
        if ((*ivec)[j][i] > rowmax){
          rowmax = (*ivec)[j][i];
          col = j;
        }
        ivec->ReplaceMyValue(i,j,0.);
      }
      ivec->ReplaceMyValue(i,col,1.);
    }
  }
  else
    cout << "ERROR: Unknown value for initial vectors." << endl;

  if (verbose && (ivec->GlobalLength64() < TINYMATRIX)) 
    ivec->Print(std::cout);
  
  // Inform the eigenproblem that you are finished passing it information
  
  bool boolret = MyProblem->setProblem();
  if (boolret != true) {
    if (verbose && MyPID == 0) {
      cout << "Anasazi::BasicEigenproblem::setProblem() returned with error." 
           << endl;
    }
    FINALIZE;
    return -1;
  }
 
  Teuchos::RCP<Anasazi::SolverManager<double, MV, OP> > MySolverMgr;
 
  if (method == "BKS") {
    // Initialize the Block Arnoldi solver
    MyPL.set("Extra NEV Blocks", extrablocks);
    MySolverMgr = Teuchos::rcp( new Anasazi::BlockKrylovSchurSolMgr<double, MV, OP>(MyProblem,MyPL) );
  }
  else if (method == "BD") {
    // Initialize the Block Davidson solver
    MyPL.set("Use Locking", lock);
    MyPL.set("Relative Locking Tolerance", rellocktol);
    MySolverMgr = Teuchos::rcp( new Anasazi::BlockDavidsonSolMgr<double, MV, OP>(MyProblem, MyPL) );
  }
  else if (method == "LOBPCG") {
    // Initialize the LOBPCG solver
    MyPL.set("Use Locking", lock);
    MyPL.set("Relative Locking Tolerance", rellocktol);
    MySolverMgr = Teuchos::rcp( new Anasazi::LOBPCGSolMgr<double, MV, OP>(MyProblem, MyPL) );
  }
  else if (method == "IRTR") {
    // Initialize the IRTR solver
    MySolverMgr = Teuchos::rcp( new Anasazi::RTRSolMgr<double, MV, OP>(MyProblem, MyPL) );
  }
  else
    cout << "Unknown solver method!" << endl;

  if (verbose && MyPID==0) MyPL.print(cout);
      
  // Solve the problem to the specified tolerances or length
  if (MyPID == 0) cout << "Beginning the " << method << " solve..." << endl;
    
  Anasazi::ReturnType returnCode = MySolverMgr->solve();
  if (returnCode != Anasazi::Converged && MyPID==0) {
    ++numfailed;
    cout << "Anasazi::SolverManager::solve() returned unconverged." << endl;
  }
  iter = MySolverMgr->getNumIters();
  solvetime = (MySolverMgr->getTimers()[0])->totalElapsedTime();
  
  if (MyPID == 0) {
    cout << "Iterations in this solve: " << iter << endl; 
    cout << "Solve complete; beginning post-processing..."<< endl;
  }
  
  // Get the eigenvalues and eigenvectors from the eigenproblem
  
  Anasazi::Eigensolution<double,MV> sol = MyProblem->getSolution();
  std::vector<Anasazi::Value<double> > evals = sol.Evals;
  Teuchos::RCP<MV> evecs = sol.Evecs;
  std::vector<int> index = sol.index;
  int numev = sol.numVecs;
  
  // Compute residuals.
  
  if (numev > 0) {
    Teuchos::LAPACK<int,double> lapack;
    std::vector<double> normR(numev);
     
    if (MyProblem->isHermitian()) {
      // Get storage
      Epetra_MultiVector Kevecs(K->Map(),numev);
      Teuchos::RCP<Epetra_MultiVector> Mevecs;
      Teuchos::SerialDenseMatrix<int,double> B(numev,numev);
      B.putScalar(0.0); 
      for (int i=0; i<numev; i++) {B(i,i) = evals[i].realpart;}
      
      // Compute A*evecs
      OPT::Apply( *rcpK, *evecs, Kevecs );
      Mevecs = evecs;
      
      // Compute A*evecs - lambda*evecs and its norm
      MVT::MvTimesMatAddMv( -1.0, *Mevecs, B, 1.0, Kevecs );
      MVT::MvNorm( Kevecs, normR );
        
      // Scale the norms by the eigenvalue if relative convergence tol was used
      if (relconvtol) {
        for (int i=0; i<numev; i++) 
          normR[i] /= Teuchos::ScalarTraits<double>::magnitude(evals[i].realpart);
      }
        
    } else {
      printf("The problem isn't non-Hermitian; sorry.\n");
      exit(-1);
    }
  
  
    if (verbose && MyPID==0) {
      cout.setf(std::ios_base::right, std::ios_base::adjustfield);	
      cout<<endl<< "Actual Results"<<endl;
      if (MyProblem->isHermitian()) {
        cout<< std::setw(16) << "Eigenvalue "
            << std::setw(20) << "Direct Residual"
            << (relconvtol?" (normalized by eigenvalue)":" (no normalization)")
            << endl;
        cout<<"--------------------------------------------------------"<<endl;
        for (int i=0; i<numev; i++) {
          cout<< "EV" << i << std::setw(16) << evals[i].realpart 
              << std::setw(20) << normR[i] << endl;
        }  
        cout<<"--------------------------------------------------------"<<endl;
      } 
      else {
        cout<< std::setw(16) << "Real Part"
            << std::setw(16) << "Imag Part"
            << std::setw(20) << "Direct Residual"<< endl;
        cout<<"--------------------------------------------------------"<<endl;
        for (int i=0; i<numev; i++) {
          cout<< std::setw(16) << evals[i].realpart 
              << std::setw(16) << evals[i].imagpart 
              << std::setw(20) << normR[i] << endl;
        }  
        cout<<"--------------------------------------------------------"<<endl;
      }  
    }
  }

  // Summarize iteration counts and solve time
  if (MyPID == 0) {
    cout << endl;
    cout << "DRIVER SUMMARY" << endl;
    cout << "Failed to converge: " << numfailed << endl;
    cout << "Solve time:           " << solvetime << endl; 
  }

  FINALIZE;

  if (numfailed) {
    if (MyPID == 0) {
      cout << "End Result: TEST FAILED" << endl;
    }
    return -1;
  }
  //
  // Default return value
  //
  if (MyPID == 0) {
    cout << "End Result: TEST PASSED" << endl;
  } 
  return 0;
} 
int main(int argc, char *argv[])
{

#ifdef EPETRA_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  // `Laplace2D' is a symmetric matrix; an example of non-symmetric
  // matrices is `Recirc2D' (advection-diffusion in a box, with
  // recirculating flow). The grid has nx x ny nodes, divided into
  // mx x my subdomains, each assigned to a different processor.
  int nx = 8;
  int ny = 8 * Comm.NumProc();

  ParameterList GaleriList;
  GaleriList.set("nx", nx);
  GaleriList.set("ny", ny);
  GaleriList.set("mx", 1);
  GaleriList.set("my", Comm.NumProc());

  Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList);
  Epetra_CrsMatrix* A = CreateCrsMatrix("Laplace2D", Map, GaleriList);

  // use the following Galeri function to get the
  // coordinates for a Cartesian grid.

  Epetra_MultiVector* Coord = CreateCartesianCoordinates("2D", &(A->Map()),
                                                         GaleriList);
  double* x_coord = (*Coord)[0];
  double* y_coord = (*Coord)[1];

  // Create the linear problem, with a zero solution
  Epetra_Vector LHS(*Map); LHS.Random();
  Epetra_Vector RHS(*Map); RHS.PutScalar(0.0);

  Epetra_LinearProblem Problem(A, &LHS, &RHS);

  // As we wish to use AztecOO, we need to construct a solver object for this problem
  AztecOO solver(Problem);

  // =========================== begin of ML part ===========================

  // create a parameter list for ML options
  ParameterList MLList;

  // set defaults for classic smoothed aggregation.
  ML_Epetra::SetDefaults("SA",MLList);

  // use user's defined aggregation scheme to create the aggregates
  // 1.- set "user" as aggregation scheme (for all levels, or for
  //     a specify level only)
  MLList.set("aggregation: type", "user");
  // 2.- set the label (for output)
  ML_SetUserLabel(UserLabel);
  // 3.- set the aggregation scheme (see function above)
  ML_SetUserPartitions(UserPartitions);
  // 4.- set the coordinates.
  MLList.set("x-coordinates", x_coord);
  MLList.set("y-coordinates", y_coord);
  MLList.set("aggregation: dimensions", 2);

  // also setup some variables to visualize the aggregates
  // (more details are reported in example `ml_viz.cpp'.
  MLList.set("viz: enable", true);

  // now we create the preconditioner
  ML_Epetra::MultiLevelPreconditioner * MLPrec =
    new ML_Epetra::MultiLevelPreconditioner(*A, MLList);

  MLPrec->VisualizeAggregates();

  // tell AztecOO to use this preconditioner, then solve
  solver.SetPrecOperator(MLPrec);

  // =========================== end of ML part =============================

  solver.SetAztecOption(AZ_solver, AZ_cg_condnum);
  solver.SetAztecOption(AZ_output, 32);

  // solve with 500 iterations and 1e-12 tolerance
  solver.Iterate(500, 1e-12);

  delete MLPrec;

  // compute the real residual

  double residual;
  LHS.Norm2(&residual);

  if (Comm.MyPID() == 0)
  {
    cout << "||b-Ax||_2 = " << residual << endl;
  }

  delete Coord;
  delete A;
  delete Map;

  if (residual > 1e-3)
    exit(EXIT_FAILURE);

#ifdef EPETRA_MPI
  MPI_Finalize();
#endif

  exit(EXIT_SUCCESS);

}