Esempio n. 1
0
int main(int argc, char *argv[])
{
  int ierr = 0;
  double elapsed_time;
  double total_flops;
  double MFLOPs;


#ifdef EPETRA_MPI

  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm comm;
#endif

  bool verbose = false;
  bool summary = false;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;

  if(argc < 6) {
    cerr << "Usage: " << argv[0]
         << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
         << "where:" << endl
         << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
         << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
         << "NumProcX          - Number of processors to use in X direction" << endl
         << "NumProcY          - Number of processors to use in Y direction" << endl
         << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
         << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
         << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
	 << " Serial example:" << endl
         << argv[0] << " 16 12 1 1 25 -v" << endl
	 << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
	 << " MPI example:" << endl
         << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
	 << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
	 << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
         << endl;
    return(1);

  }
    //char tmp;
    //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
    //if (comm.MyPID()==0) cin >> tmp;
    //comm.Barrier();

  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose && comm.MyPID()==0)
    cout << Epetra_Version() << endl << endl;
  if (summary && comm.MyPID()==0) {
    if (comm.NumProc()==1)
      cout << Epetra_Version() << endl << endl;
    else
      cout << endl << endl; // Print two blank line to keep output columns lined up
  }

  if (verbose) cout << comm <<endl;


  // Redefine verbose to only print on PE 0

  if (verbose && comm.MyPID()!=0) verbose = false;
  if (summary && comm.MyPID()!=0) summary = false;

  int numNodesX = atoi(argv[1]);
  int numNodesY = atoi(argv[2]);
  int numProcsX = atoi(argv[3]);
  int numProcsY = atoi(argv[4]);
  int numPoints = atoi(argv[5]);

  if (verbose || (summary && comm.NumProc()==1)) {
    cout << " Number of local nodes in X direction  = " << numNodesX << endl
	 << " Number of local nodes in Y direction  = " << numNodesY << endl
	 << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
	 << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
	 << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
	 << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
	 << " Number of Processors in X direction   = " << numProcsX << endl
	 << " Number of Processors in Y direction   = " << numProcsY << endl
	 << " Number of Points in stencil           = " << numPoints << endl << endl;
  }
  // Print blank line to keep output columns lined up
  if (summary && comm.NumProc()>1)
    cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;

  if (numProcsX*numProcsY!=comm.NumProc()) {
    cerr << "Number of processors = " << comm.NumProc() << endl
	 << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
    return(1);
  }

  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
    cerr << "Number of points specified = " << numPoints << endl
	 << " is not 5, 9, 25" << endl << endl;
    return(1);
  }

  if (numNodesX*numNodesY<=0) {
    cerr << "Product of number of nodes is <= zero" << endl << endl;
    return(1);
  }

  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
  if (numPoints==5) {

     // Generate a 5-point 2D Finite Difference matrix
    Xoff.Size(5);
    Yoff.Size(5);
    Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0;
    Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;

     // Generate a 2-point 2D Lower triangular Finite Difference matrix
    XLoff.Size(2);
    YLoff.Size(2);
    XLoff[0] = -1; XLoff[1] =  0;
    YLoff[0] =  0; YLoff[1] = -1;

     // Generate a 3-point 2D upper triangular Finite Difference matrix
    XUoff.Size(3);
    YUoff.Size(3);
    XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0;
    YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
  }
  else if (numPoints==9) {
    // Generate a 9-point 2D Finite Difference matrix
    Xoff.Size(9);
    Yoff.Size(9);
    Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1;
    Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1;
    Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1;
    Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0;
    Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1;
    Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1;

    // Generate a 5-point lower triangular 2D Finite Difference matrix
    XLoff.Size(5);
    YLoff.Size(5);
    XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1;
    YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1;
    XLoff[3] = -1;  XLoff[4] =  0;
    YLoff[3] =  0;  YLoff[4] =  0;

    // Generate a 4-point upper triangular 2D Finite Difference matrix
    XUoff.Size(4);
    YUoff.Size(4);
    XUoff[0] =  1;
    YUoff[0] =  0;
    XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1;
    YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1;

  }
  else {
    // Generate a 25-point 2D Finite Difference matrix
    Xoff.Size(25);
    Yoff.Size(25);
    int xi = 0, yi = 0;
    int xo = -2, yo = -2;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;

    // Generate a 13-point lower triangular 2D Finite Difference matrix
    XLoff.Size(13);
    YLoff.Size(13);
    xi = 0, yi = 0;
    xo = -2, yo = -2;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;

    // Generate a 13-point upper triangular 2D Finite Difference matrix
    XUoff.Size(13);
    YUoff.Size(13);
    xi = 0, yi = 0;
    xo = 0, yo = 0;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;

  }

  Epetra_Map * map;
  Epetra_Map * mapL;
  Epetra_Map * mapU;
  Epetra_CrsMatrix * A;
  Epetra_CrsMatrix * L;
  Epetra_CrsMatrix * U;
  Epetra_MultiVector * b;
  Epetra_MultiVector * bt;
  Epetra_MultiVector * xexact;
  Epetra_MultiVector * bL;
  Epetra_MultiVector * btL;
  Epetra_MultiVector * xexactL;
  Epetra_MultiVector * bU;
  Epetra_MultiVector * btU;
  Epetra_MultiVector * xexactU;
  Epetra_SerialDenseVector resvec(0);

  //Timings
  Epetra_Flops flopcounter;
  Epetra_Time timer(comm);

#ifdef EPETRA_VERY_SHORT_PERFTEST
  int jstop = 1;
#elif EPETRA_SHORT_PERFTEST
  int jstop = 1;
#else
  int jstop = 2;
#endif
  for (int j=0; j<jstop; j++) {
    for (int k=1; k<17; k++) {
#ifdef EPETRA_VERY_SHORT_PERFTEST
      if (k<3 || (k%4==0 && k<9)) {
#elif EPETRA_SHORT_PERFTEST
      if (k<6 || k%4==0) {
#else
      if (k<7 || k%2==0) {
#endif
      int nrhs=k;
      if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";

      bool StaticProfile = (j!=0);
      if (verbose) {
        if (StaticProfile) cout << " static profile\n";
        else cout << " dynamic profile\n";
      }
      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
			 Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
			 map, A, b, bt, xexact, StaticProfile, false);


#ifdef EPETRA_HAVE_JADMATRIX

      timer.ResetStartTime();
      Epetra_JadMatrix JA(*A);
      elapsed_time = timer.ElapsedTime();
      if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;

      //cout << "A = " << *A << endl;
      //cout << "JA = " << JA << endl;

      runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);

#endif
      runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);

      delete A;
      delete b;
      delete bt;
      delete xexact;

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
			 XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
			 mapL, L, bL, btL, xexactL, StaticProfile, true);


      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
			 XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
			 mapU, U, bU, btU, xexactU, StaticProfile, true);


      runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);

      delete L;
      delete bL;
      delete btL;
      delete xexactL;
      delete mapL;

      delete U;
      delete bU;
      delete btU;
      delete xexactU;
      delete mapU;

      Epetra_MultiVector q(*map, nrhs);
      Epetra_MultiVector z(q);
      Epetra_MultiVector r(q);

      delete map;
      q.SetFlopCounter(flopcounter);
      z.SetFlopCounter(q);
      r.SetFlopCounter(q);

      resvec.Resize(nrhs);


      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 norms
      for( int i = 0; i < 10; ++i )
	q.Norm2( resvec.Values() );

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Norm2" << '\t';
	cout << MFLOPs << endl;
      }

      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Dot(z, resvec.Values());

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "DotProd" << '\t';
	cout << MFLOPs << endl;
      }

      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Update(1.0, z, 1.0, r, 0.0);

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Update" << '\t';
	cout << MFLOPs << endl;
      }
    }
    }
  }
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return ierr ;
}

// Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
//
// nx      (In) - number of grid points in x direction
// ny      (In) - number of grid points in y direction
//   The total number of equations will be nx*ny ordered such that the x direction changes
//   most rapidly:
//      First equation is at point (0,0)
//      Second at                  (1,0)
//       ...
//      nx equation at             (nx-1,0)
//      nx+1st equation at         (0,1)

// numPoints (In) - number of points in finite difference stencil
// xoff    (In) - stencil offsets in x direction (of length numPoints)
// yoff    (In) - stencil offsets in y direction (of length numPoints)
//   A standard 5-point finite difference stencil would be described as:
//     numPoints = 5
//     xoff = [-1, 1, 0,  0, 0]
//     yoff = [ 0, 0, 0, -1, 1]

// nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed

// comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
// map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
// A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
//                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
//                diagonal will be slightly diag dominant.
// b      (Out) - Generated RHS.  Values satisfy b = A*xexact
// bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
// xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact

// Note: Caller of this function is responsible for deleting all output objects.

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_Vector *& b,
			Epetra_Vector *& bt,
			Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_MultiVector * b1, * bt1, * xexact1;
	
  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
		     xoff, yoff, 1, comm, verbose, summary,
		     map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);

  b = dynamic_cast<Epetra_Vector *>(b1);
  bt = dynamic_cast<Epetra_Vector *>(bt1);
  xexact = dynamic_cast<Epetra_Vector *>(xexact1);

  return;
}

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff, int nrhs,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_MultiVector *& b,
			Epetra_MultiVector *& bt,
			Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_Time timer(comm);
  // Determine my global IDs
  long long * myGlobalElements;
  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);

  int numMyEquations = numNodesX*numNodesY;

  map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
  delete [] myGlobalElements;

  long long numGlobalEquations = map->NumGlobalElements64();

  int profile = 0; if (StaticProfile) profile = numPoints;

#ifdef EPETRA_HAVE_STATICPROFILE

  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix

#else

  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix

#endif

  long long * indices = new long long[numPoints];
  double * values = new double[numPoints];

  double dnumPoints = (double) numPoints;
  int nx = numNodesX*numProcsX;

  for (int i=0; i<numMyEquations; i++) {

    long long rowID = map->GID64(i);
    int numIndices = 0;

    for (int j=0; j<numPoints; j++) {
      long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
      if (colID>-1 && colID<numGlobalEquations) {
	indices[numIndices] = colID;
	double value = - ((double) rand())/ ((double) RAND_MAX);
	if (colID==rowID)
	  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
	else
	  values[numIndices++] = value;
      }
    }
    //cout << "Building row " << rowID << endl;
    A->InsertGlobalValues(rowID, numIndices, values, indices);
  }

  delete [] indices;
  delete [] values;
  double insertTime = timer.ElapsedTime();
  timer.ResetStartTime();
  A->FillComplete(false);
  double fillCompleteTime = timer.ElapsedTime();

  if (verbose)
    cout << "Time to insert matrix values = " << insertTime << endl
	 << "Time to complete fill        = " << fillCompleteTime << endl;
  if (summary) {
    if (comm.NumProc()==1) cout << "InsertTime" << '\t';
    cout << insertTime << endl;
    if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
    cout << fillCompleteTime << endl;
  }

  if (nrhs<=1) {
    b = new Epetra_Vector(*map);
    bt = new Epetra_Vector(*map);
    xexact = new Epetra_Vector(*map);
  }
  else {
    b = new Epetra_MultiVector(*map, nrhs);
    bt = new Epetra_MultiVector(*map, nrhs);
    xexact = new Epetra_MultiVector(*map, nrhs);
  }

  xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);
  A->Multiply(true, *xexact, *bt);

  return;
}
void
Stokhos::EpetraMultiVectorOrthogPoly::
computeMean(Epetra_MultiVector& v) const
{
  v.Scale(1.0, *(coeff_[0]));
}
int TestMultiLevelPreconditioner(char ProblemType[],
				 Teuchos::ParameterList & MLList,
				 Epetra_LinearProblem & Problem, double & TotalErrorResidual,
				 double & TotalErrorExactSol)
{
  
  Epetra_MultiVector* lhs = Problem.GetLHS();
  Epetra_MultiVector* rhs = Problem.GetRHS();
  Epetra_CrsMatrix* A = dynamic_cast<Epetra_CrsMatrix*>(Problem.GetMatrix());
  int PID = A->Comm().MyPID();
  int numProcs = A->Comm().NumProc();
  RCP<const Epetra_RowMatrix> Arcp = Teuchos::rcp(A, false);
  double n1, n2,nf;
  
  // ======================================== //
  // create a rhs corresponding to lhs or 1's //
  // ======================================== //
  
  lhs->PutScalar(1.0);
  A->Multiply(false,*lhs,*rhs);

  lhs->PutScalar(0.0);
  MLList.set("ML output", 0);

  RowMatrixToMatlabFile("mat_f.dat",*A);  
  MultiVectorToMatrixMarketFile("lhs_f.dat",*lhs,0,0,false);
  MultiVectorToMatrixMarketFile("rhs_f.dat",*rhs,0,0,false);

  
  Epetra_Time Time(A->Comm());
  /* Build the Zoltan list - Group #1 */
  ParameterList Zlist1,Sublist1;
  Sublist1.set("DEBUG_LEVEL","0");
  Sublist1.set("NUM_GLOBAL_PARTITIONS","2");
  Zlist1.set("Zoltan",Sublist1);
  
  /* Start Isorropia's Ninja Magic - Group #1 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner1 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist1);
  Isorropia::Epetra::Redistributor rd1(partitioner1);

  Teuchos::RCP<Epetra_CrsMatrix> ResA1=rd1.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX1=rd1.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB1=rd1.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW1;
  RW1.restrict_comm(ResA1);
  RestrictedMultiVectorWrapper RX1,RB1;
  RX1.restrict_comm(ResX1);
  RB1.restrict_comm(ResB1);

  /* Build the Zoltan list - Group #2 */
  ParameterList Zlist2,Sublist2;
  Sublist2.set("DEBUG_LEVEL","0");
  if(PID > 1) Sublist2.set("NUM_LOCAL_PARTITIONS","1");
  else Sublist2.set("NUM_LOCAL_PARTITIONS","0");
  Zlist2.set("Zoltan",Sublist2);
    
  /* Start Isorropia's Ninja Magic - Group #2 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner2 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist2);
  Isorropia::Epetra::Redistributor rd2(partitioner2);

  Teuchos::RCP<Epetra_CrsMatrix> ResA2=rd2.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX2=rd2.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB2=rd2.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW2;
  RW2.restrict_comm(ResA2);
  RestrictedMultiVectorWrapper RX2,RB2;
  RX2.restrict_comm(ResX2);
  RB2.restrict_comm(ResB2);

  if(RW1.RestrictedProcIsActive()){
    Teuchos::RCP<Epetra_CrsMatrix> SubA1 = RW1.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX1 = RX1.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB1 = RB1.RestrictedMultiVector();    
    ML_Epetra::MultiLevelPreconditioner * SubPrec1 = new ML_Epetra::MultiLevelPreconditioner(*SubA1, MLList, true);        

    Epetra_LinearProblem Problem1(&*SubA1,&*SubX1,&*SubB1);
    AztecOO solver1(Problem1);
    solver1.SetPrecOperator(SubPrec1);  
    solver1.SetAztecOption(AZ_solver, AZ_gmres);
    solver1.SetAztecOption(AZ_output, 32);
    solver1.SetAztecOption(AZ_kspace, 160);  
    solver1.Iterate(1550, 1e-12);
    delete SubPrec1;

  }
  else{
    Teuchos::RCP<Epetra_CrsMatrix> SubA2 = RW2.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX2 = RX2.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB2 = RB2.RestrictedMultiVector();        
    ML_Epetra::MultiLevelPreconditioner * SubPrec2 = new ML_Epetra::MultiLevelPreconditioner(*SubA2, MLList, true);        
    
    Epetra_LinearProblem Problem2(&*SubA2,&*SubX2,&*SubB2);
    AztecOO solver2(Problem2);
    solver2.SetPrecOperator(SubPrec2);  
    solver2.SetAztecOption(AZ_solver, AZ_gmres);
    solver2.SetAztecOption(AZ_output, 32);
    solver2.SetAztecOption(AZ_kspace, 160);  
    solver2.Iterate(1550, 1e-12);
    delete SubPrec2;

  }

  /* Post-processing exports */
  Epetra_MultiVector ans1(*lhs), ans2(*lhs); 
  rd1.redistribute_reverse(*ResX1,ans1);
  rd2.redistribute_reverse(*ResX2,ans2);
  
  /* Run on Full Problem */
  A->Comm().Barrier();    
  ML_Epetra::MultiLevelPreconditioner * FullPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);          
  AztecOO solverF(Problem);
  solverF.SetPrecOperator(FullPrec);  
  solverF.SetAztecOption(AZ_solver, AZ_gmres);
  solverF.SetAztecOption(AZ_output, 32);
  solverF.SetAztecOption(AZ_kspace, 160);  
  solverF.Iterate(1550, 1e-12);
  delete FullPrec;


  /* Solution Comparison */
  ans1.Update(1.0,*lhs,-1.0);
  ans2.Update(1.0,*lhs,-1.0);
  ans1.Norm2(&n1);
  ans2.Norm2(&n2);
  if(!PID) {
    printf("Norm Diff 1 = %6.4e\n",n1);
    printf("Norm Diff 2 = %6.4e\n",n2);
  }

  TotalErrorExactSol += n1 + n2;
    
  
}
Esempio n. 4
0
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {

  int xrow = X.MyLength();
  int xcol = X.NumVectors();
  int ycol = Y.NumVectors();

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);
  double *valX = X.Values();
  int NB = 3 + callLAPACK.ILAENV(1, "hetrd", "u", blkSize);
  int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
  int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;

  bool useY = true;
  if (ycol % blkSize != 0) {
    // Allocate an extra block to store the solutions
    wSize += blkSize*xrow;
    useY = false;
  }

  if (lWorkSpace < wSize) {
    delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  // Array to store the matrix PtKP
  double *PtKP = pointer;
  pointer = pointer + blkSize*blkSize;

  // Array to store coefficient matrices
  double *coeff = pointer;
  pointer = pointer + blkSize*blkSize;

  // Workspace array
  double *workD = pointer;
  pointer = pointer + lworkD;

  // Array to store the eigenvalues of P^t K P
  double *da = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of right hand sides
  double *initNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of residuals
  double *resNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the residuals
  double *valR = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);

  // Array to store the preconditioned residuals
  double *valZ = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);

  // Array to store the search directions
  double *valP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);

  // Array to store the image of the search directions
  double *valKP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);

  // Pointer to store the solutions
  double *valSOL = (useY == true) ? Y.Values() : pointer;

  int iRHS;
  for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {

    int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;

    // Set the initial residuals to the right hand sides
    if (numVec < blkSize) {
      R.Random();
    }
    memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));

    // Set the initial guess to zero
    valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
    Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
    SOL.PutScalar(0.0);

    int ii = 0;
    int iter = 0;
    int nFound = 0;

    R.Norm2(initNorm);

    if (localVerbose > 1) {
      std::cout << std::endl;
      std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << std::endl;
      if (localVerbose > 2) {
        std::fprintf(stderr,"\n");
        for (ii = 0; ii < numVec; ++ii) {
          std::cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << std::endl;
        }
        std::cout << std::endl;
      }
    }

    // Iteration loop
    for (iter = 1; iter <= iterMax; ++iter) {

      // Apply the preconditioner
      if (Prec)
        Prec->ApplyInverse(R, Z);
      else
        Z = R;

      // Define the new search directions
      if (iter == 1) {
        P = Z;
      }
      else {
        // Compute P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
                      0.0, workD, blkSize);
        MyComm.SumAll(workD, coeff, blkSize*blkSize);

        // Compute the coefficient (P^t K P)^{-1} P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                      0.0, workD, blkSize);
        for (ii = 0; ii < blkSize; ++ii)
          callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                      0.0, coeff, blkSize);

        // Update the search directions
        // Note: Use KP as a workspace
        memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
                      0.0, P.Values(), xrow);

        P.Update(1.0, Z, -1.0);

      } // if (iter == 1)

      K->Apply(P, KP);

      // Compute P^t K P
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, PtKP, blkSize*blkSize);

      // Eigenvalue decomposition of P^t K P
      callLAPACK.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
      if (info) {
        // Break the loop as spectral decomposition failed
        break;
      } // if (info)

      // Compute the pseudo-inverse of the eigenvalues
      for (ii = 0; ii < blkSize; ++ii) {
        TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative "
                           "eigenvalue for P^T K P: da[" << ii << "] = "
                           << da[ii] << ".");
        da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
      } // for (ii = 0; ii < blkSize; ++ii)

      // Compute P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, coeff, blkSize*blkSize);

      // Compute the coefficient (P^t K P)^{-1} P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                    0.0, workD, blkSize);
      for (ii = 0; ii < blkSize; ++ii)
        callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                    0.0, coeff, blkSize);

      // Update the solutions
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
                    1.0, valSOL, xrow);

      // Update the residuals
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
                    1.0, R.Values(), xrow);

      // Check convergence
      R.Norm2(resNorm);
      nFound = 0;
      for (ii = 0; ii < numVec; ++ii) {
        if (resNorm[ii] <= tolCG*initNorm[ii])
          nFound += 1;
      }

      if (localVerbose > 1) {
        std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
        std::cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
        if (localVerbose > 2) {
          std::cout << std::endl;
          for (ii = 0; ii < numVec; ++ii) {
            std::cout << " ... ";
            std::cout.width(5);
            std::cout << ii << " ... Residual = ";
            std::cout.precision(2);
            std::cout.setf(std::ios::scientific, std::ios::floatfield);
            std::cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << std::endl;
          }
          std::cout << std::endl;
        }
      }

      if (nFound == numVec) {
        break;
      }

    }  // for (iter = 1; iter <= maxIter; ++iter)

    if (useY == false) {
      // Copy the solutions back into Y
      memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
    }

    numSolve += nFound;

    if (nFound == numVec) {
      minIter = (iter < minIter) ? iter : minIter;
      maxIter = (iter > maxIter) ? iter : maxIter;
      sumIter += iter;
    }

  } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)

  return info;
}
Esempio n. 5
0
int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  // 
  //      K X = M X Lambda
  // 
  // using a Block Deflation Accelerated Conjugate Gradient algorithm.
  //
  // Note that if M is not specified, then  K X = X Lambda is solved.
  //
  // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
  // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
  // With the notations of this report, the coefficient beta is defined as 
  //                 diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} )
  // 
  // Input variables:
  // 
  // numEigen  (integer) = Number of eigenmodes requested
  // 
  // Q (Epetra_MultiVector) = Converged eigenvectors
  //                   The number of columns of Q must be equal to numEigen + blockSize.
  //                   The rows of Q are distributed across processors.
  //                   At exit, the first numEigen columns contain the eigenvectors requested.
  // 
  // lambda (array of doubles) = Converged eigenvalues
  //                   At input, it must be of size numEigen + blockSize.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  //
  // startingEV (integer) = Number of existing converged eigenmodes
  //
  // Return information on status of computation
  // 
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // 
  // // Failure due to input arguments
  // 
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  //
  // info = - 10 >> Failure during the mass orthonormalization
  // 
  // info = - 20 >> Error in LAPACK during the local eigensolve
  //
  // info = - 30 >> MEMORY
  //

  // Check the input parameters
  
  if (numEigen <= startingEV) {
    return startingEV;
  }

  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
  }

  int knownEV = startingEV;
  int localVerbose = verbose*(myPid==0);

  // Define local block vectors
  //
  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
  // KX = Working vectors (storing K*X)
  //
  // R = Residuals
  //
  // H = Preconditioned residuals
  //
  // P = Search directions
  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
  // KP = Working vectors (storing K*P)

  int xr = Q.MyLength();
  Epetra_MultiVector X(View, Q, numEigen, blockSize);
  X.Random();

  int tmp;
  tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;

  double *work1 = new (nothrow) double[tmp]; 
  if (work1 == 0) {
    if (vectWeight)
      delete vectWeight;
    info = -30;
    return info;
  }
  memRequested += sizeof(double)*tmp/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double *tmpD = work1;

  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
  tmpD = (M) ? tmpD + xr*blockSize : tmpD;

  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);

  // Define arrays
  //
  // theta = Store the local eigenvalues (size: 2*blockSize)
  // normR = Store the norm of residuals (size: blockSize)
  //
  // oldHtR = Store the previous H_i^T*R_i    (size: blockSize)
  // currentHtR = Store the current H_i^T*R_i (size: blockSize)
  //
  // MM = Local mass matrix              (size: 2*blockSize x 2*blockSize)
  // KK = Local stiffness matrix         (size: 2*blockSize x 2*blockSize)
  //
  // S = Local eigenvectors              (size: 2*blockSize x 2*blockSize)

  int lwork2;
  lwork2 = 5*blockSize + 12*blockSize*blockSize;
  double *work2 = new (nothrow) double[lwork2];
  if (work2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] work1;
    info = -30;
    return info;
  }

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  tmpD = work2;

  double *theta = tmpD;
  tmpD = tmpD + 2*blockSize;

  double *normR = tmpD;
  tmpD = tmpD + blockSize;

  double *oldHtR = tmpD;
  tmpD = tmpD + blockSize;
  
  double *currentHtR = tmpD;
  tmpD = tmpD + blockSize;
  memset(currentHtR, 0, blockSize*sizeof(double));
  
  double *MM = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *KK = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *S = tmpD;

  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
    if (resHistory == 0) {
      if (vectWeight)
        delete vectWeight;
      delete[] work1;
      delete[] work2;
      info = -30;
      return info;
    }
    historyCount = 0;
  }

  // Miscellaneous definitions

  bool reStart = false;
  numRestart = 0;

  int localSize;
  int twoBlocks = 2*blockSize;
  int nFound = blockSize;
  int i, j;

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M) 
      cout << "K*Q = M*Q D ";
    else
      cout << "K*Q = Q D ";
    if (Prec)
      cout << " with preconditioner";
    cout << endl;
    cout << " *|* Algorithm = DACG (block version)" << endl;
    cout << " *|* Size of blocks = " << blockSize << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.precision(2);
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    cout << " *|* Norm used for convergence: ";
    if (normWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
    else
      cout << "L^2-norm" << endl;
    if (startingEV > 0)
      cout << " *|* Input converged eigenvectors = " << startingEV << endl;
    cout << "\n -- Start iterations -- \n";
  }

  timeOuterLoop -= MyWatch.WallTime();
  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    if ((outerIter == 1) || (reStart == true)) {

      reStart = false;
      localSize = blockSize;

      if (nFound > 0) {

        Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
        Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
        Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);

        // Apply the mass matrix to X
        timeMassOp -= MyWatch.WallTime();
        if (M)
          M->Apply(X2, MX2);
        timeMassOp += MyWatch.WallTime();
        massOp += nFound;

        if (knownEV > 0) {
          // Orthonormalize X against the known eigenvectors with Gram-Schmidt
          // Note: Use R as a temporary work space
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          timeOrtho -= MyWatch.WallTime();
          info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
          timeOrtho += MyWatch.WallTime();
          // Exit the code if the orthogonalization did not succeed
          if (info < 0) {
            info = -10;
            delete[] work1;
            delete[] work2;
            if (vectWeight)
              delete vectWeight;
            return info;
          }
        }

        // Apply the stiffness matrix to X
        timeStifOp -= MyWatch.WallTime();
        K->Apply(X2, KX2);
        timeStifOp += MyWatch.WallTime();
        stifOp += nFound;

      } // if (nFound > 0)

    } // if ((outerIter == 1) || (reStart == true))
    else {

      // Apply the preconditioner on the residuals
      if (Prec != 0) {
        timePrecOp -= MyWatch.WallTime();
        Prec->ApplyInverse(R, H);
        timePrecOp += MyWatch.WallTime();
        precOp += blockSize;
      }
      else {
        memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
      }

      // Compute the product H^T*R
      timeSearchP -= MyWatch.WallTime();      
      memcpy(oldHtR, currentHtR, blockSize*sizeof(double));
      H.Dot(R, currentHtR);
      // Define the new search directions
      if (localSize == blockSize) {
        P.Scale(-1.0, H);
        localSize = twoBlocks;
      } // if (localSize == blockSize)
      else {
        bool hasZeroDot = false;
        for (j = 0; j < blockSize; ++j) {
          if (oldHtR[j] == 0.0) {
            hasZeroDot = true;
            break; 
          }
          callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr);
        }
        if (hasZeroDot == true) {
          // Restart the computation when there is a null dot product
          if (localVerbose > 0) {
            cout << endl;
            cout << " !! Null dot product -- Restart the search space !!\n";
            cout << endl;
          }
          if (blockSize == 1) {
            X.Random();
            nFound = blockSize;
          }
          else {
            Epetra_MultiVector Xinit(View, X, j, blockSize-j);
            Xinit.Random();
            nFound = blockSize - j;
          } // if (blockSize == 1)
          reStart = true;
          numRestart += 1;
          info = 0;
          continue;
        }
        callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
      } // if (localSize == blockSize)
      timeSearchP += MyWatch.WallTime();

      // Apply the mass matrix on P
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(P, MP);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      if (knownEV > 0) {
        // Orthogonalize P against the known eigenvectors
        // Note: Use R as a temporary work space
        Epetra_MultiVector copyQ(View, Q, 0, knownEV);
        timeOrtho -= MyWatch.WallTime();
        modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
        timeOrtho += MyWatch.WallTime();
      }

      // Apply the stiffness matrix to P
      timeStifOp -= MyWatch.WallTime();
      K->Apply(P, KP);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

    } // if ((outerIter == 1) || (reStart == true))

    // Form "local" mass and stiffness matrices
    // Note: Use S as a temporary workspace
    timeLocalProj -= MyWatch.WallTime();
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
                    KK, localSize, S);
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
                    MM, localSize, S);
    if (localSize > blockSize) {
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize + blockSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize + blockSize, localSize, S);
    } // if (localSize > blockSize)
    timeLocalProj += MyWatch.WallTime();

    // Perform a spectral decomposition
    timeLocalSolve -= MyWatch.WallTime();
    int nevLocal = localSize;
    info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
                                  S, localSize, theta, localVerbose,
                                  (blockSize == 1) ? 1: 0);
    timeLocalSolve += MyWatch.WallTime();

    if (info < 0) {
      // Stop when spectral decomposition has a critical failure
      break;
    }

    // Check for restarting
    if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter;
        cout << "- Failure for spectral decomposition - RESTART with new random search\n";
      }
      if (blockSize == 1) {
        X.Random();
        nFound = blockSize;
      }
      else {
        Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
        Xinit.Random();
        nFound = blockSize - 1;
      } // if (blockSize == 1)
      reStart = true;
      numRestart += 1;
      info = 0;
      continue;
    } // if ((theta[0] < 0.0) || (nevLocal < blockSize))

    if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
      for (j = 0; j < nevLocal; ++j)
        memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
      localSize = blockSize;
    }

    // Check the direction of eigenvectors
    // Note: This sign check is important for convergence
    for (j = 0; j < nevLocal; ++j) {
      double coeff = S[j + j*localSize];
      if (coeff < 0.0)
        callBLAS.SCAL(localSize, -1.0, S + j*localSize);
    }

    // Compute the residuals
    timeResidual -= MyWatch.WallTime();
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
                  S, localSize, 0.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                    S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, theta[j], S + j*localSize);
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
                  S, localSize, 1.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
                  S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
    timeResidual += MyWatch.WallTime();
    
    // Compute the norms of the residuals
    timeNorm -= MyWatch.WallTime();
    if (vectWeight)
      R.NormWeighted(*vectWeight, normR);
    else
      R.Norm2(normR);
    // Scale the norms of residuals with the eigenvalues
    // Count the converged eigenvectors
    nFound = 0;
    for (j = 0; j < blockSize; ++j) {
      normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
      if (normR[j] < tolEigenSolve) 
        nFound += 1;
    }
    timeNorm += MyWatch.WallTime();

    // Store the residual history
    if (localVerbose > 2) {
      memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
      historyCount += 1;
    }

    // Print information on current iteration
    if (localVerbose > 0) {
      cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
      cout << knownEV + nFound << endl;
    }

    if (localVerbose > 1) {
      cout << endl;
      cout.precision(2);
      cout.setf(ios::scientific, ios::floatfield);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
        cout << " = " << normR[i] << endl;
      }
      cout << endl;
      cout.precision(2);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
        cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
        cout << " = " << theta[i] << endl;
      }
      cout << endl;
    }

    if (nFound == 0) {
      // Update the spaces
      // Note: Use H as a temporary work space
      timeLocalUpdate -= MyWatch.WallTime();
      memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, X.Values(), xr);
      memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, KX.Values(), xr);
      if (M) {
        memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                      0.0, MX.Values(), xr);
      }
      if (localSize == twoBlocks) {
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, X.Values(), xr);
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                      S + blockSize, localSize, 1.0, KX.Values(), xr);
        if (M) {
          callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
                      S + blockSize, localSize, 1.0, MX.Values(), xr);
        }
      } // if (localSize == twoBlocks)
      timeLocalUpdate += MyWatch.WallTime();
      // When required, monitor some orthogonalities
      if (verbose > 2) {
        if (knownEV == 0) {
          accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
        }
        else {
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
        }
      } // if (verbose > 2)
      continue;
    } // if (nFound == 0)

    // Order the Ritz eigenvectors by putting the converged vectors at the beginning
    int firstIndex = blockSize;
    for (j = 0; j < blockSize; ++j) {
      if (normR[j] >= tolEigenSolve) {
        firstIndex = j;
        break;
      }
    } // for (j = 0; j < blockSize; ++j)
    while (firstIndex < nFound) {
      for (j = firstIndex; j < blockSize; ++j) {
        if (normR[j] < tolEigenSolve) {
          // Swap the j-th and firstIndex-th position
          callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
          callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
          callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
          break;
        }
      } // for (j = firstIndex; j < blockSize; ++j)
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] >= tolEigenSolve) {
          firstIndex = j;
          break;
        }
      } // for (j = 0; j < blockSize; ++j)
    } // while (firstIndex < nFound)

    // Copy the converged eigenvalues
    memcpy(lambda + knownEV, theta, nFound*sizeof(double));

    // Convergence test
    if (knownEV + nFound >= numEigen) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                    S, localSize, 0.0, R.Values(), xr);
      if (localSize > blockSize) {
        callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, R.Values(), xr);
      }
      memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
      knownEV += nFound;
      if (localVerbose == 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
      }
      break;
    }

    // Store the converged eigenvalues and eigenvectors
    callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                    S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
    }
    knownEV += nFound;

    // Define the restarting vectors
    timeRestart -= MyWatch.WallTime();
    int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
    double *Snew = S + nFound*localSize;
    memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, X.Values(), xr);
    memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, KX.Values(), xr);
    if (M) {
      memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                    Snew, localSize, 0.0, MX.Values(), xr);
    }
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
                    Snew+blockSize, localSize, 1.0, X.Values(), xr);
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
                    Snew+blockSize, localSize, 1.0, KX.Values(), xr);
      if (M) {
        callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
                      Snew+blockSize, localSize, 1.0, MX.Values(), xr);
      }
    } // if (localSize == twoBlocks)
    if (nevLocal < blockSize + nFound) {
      // Put new random vectors at the end of the block
      Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
      Xtmp.Random();
    }
    else {
      nFound = 0;
    } // if (nevLocal < blockSize + nFound)
    reStart = true;
    timeRestart += MyWatch.WallTime();

  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Clean memory
  delete[] work1;
  delete[] work2;
  if (vectWeight)
    delete vectWeight;

  // Sort the eigenpairs
  timePostProce -= MyWatch.WallTime();
  if ((info == 0) && (knownEV > 0)) {
    mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
  }
  timePostProce += MyWatch.WallTime();

  return (info == 0) ? knownEV : info;

}
//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();
  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;

        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
      }
    }
    for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;
        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

      }
    }

    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
}
//==============================================================================
int LinearProblem_CrsSingletonFilter::UpdateReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;

  if (Problem==0) EPETRA_CHK_ERR(-1); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-2); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-3); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-4); // Need a LHS
  if (!HaveReducedProblem_) EPETRA_CHK_ERR(-5); // Must have set up reduced problem

  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix
      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (indices global)
      int ierr = ReducedMatrix()->ReplaceGlobalValues(curGRID, NumEntries, 
						      Values, Indices);
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    // Otherwise if singleton row we explicitly eliminate this row and solve for corresponding X value
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	j = ColSingletonPivotLIDs_[ColSingletonCounter];
	double pivot = Values[j];
	if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	ColSingletonPivots_[ColSingletonCounter] = pivot;
	ColSingletonCounter++;
      }
    }
  }

  assert(ColSingletonCounter==NumMyColSingletons_); // Sanity test

  // Update Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_->PutScalar(0.0); // zero out Reduced LHS
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // Zero out temp space
  tempX_->PutScalar(0.0);
  tempB_->PutScalar(0.0);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_->PutScalar(0.0);
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));
    return(0);
}
Esempio n. 8
0
int ModeLaplace3DQ2::eigenCheck(const Epetra_MultiVector &Q, double *lambda, 
                                double *normWeight, bool /*smallest*/) const {

  using std::cout;
  using std::ios;

  int info = 0;
  int qc = Q.NumVectors();
  int myPid = MyComm.MyPID();

  cout.precision(2);
  cout.setf(ios::scientific, ios::floatfield);

  // Check orthonormality of eigenvectors
  double tmp = myVerify.errorOrthonormality(&Q, M);
  if (myPid == 0)
    cout << " Maximum coefficient in matrix Q^T M Q - I = " << tmp << endl;

  // Print out norm of residuals
  myVerify.errorEigenResiduals(Q, lambda, K, M, normWeight);

  // Check the eigenvalues
  int numX = (int) ceil(sqrt(Lx*Lx*lambda[qc-1]/M_PI/M_PI));
  numX = (numX > 2*nX) ? 2*nX : numX;
  int numY = (int) ceil(sqrt(Ly*Ly*lambda[qc-1]/M_PI/M_PI));
  numY = (numY > 2*nY) ? 2*nY : numY;
  int numZ = (int) ceil(sqrt(Lz*Lz*lambda[qc-1]/M_PI/M_PI));
  numZ = (numZ > 2*nZ) ? 2*nZ : numZ;
  int newSize = (numX-1)*(numY-1)*(numZ-1);
  double *discrete = new (std::nothrow) double[2*newSize];
  if (discrete == 0) {
    return -1;
  }
  double *continuous = discrete + newSize;

  double hx = Lx/nX;
  double hy = Ly/nY;
  double hz = Lz/nZ;

  int i, j, k;
  for (k = 1; k < numZ; ++k) {
    // Compute the coefficient alphaz
    double cosk = cos(k*M_PI*hz/2/Lz);
    double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz));
    double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz);
    double c = -160.0*cosk;
    double delta = sqrt(b*b - 4*a*c);
    double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
    for (j = 1; j < numY; ++j) {
      // Compute the coefficient alphay
      double cosj = cos(j*M_PI*hy/2/Ly);
      a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly));
      b = 48.0 + 32.0*cos(j*M_PI*hy/Ly);
      c = -160.0*cosj;
      delta = sqrt(b*b - 4*a*c);
      double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
      for (i = 1; i < numX; ++i) {
        // Compute the coefficient alphax
        double cosi = cos(i*M_PI*hx/2/Lx);
        a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx));
        b = 48.0 + 32.0*cos(i*M_PI*hx/Lx);
        c = -160.0*cosi;
        delta = sqrt(b*b - 4*a*c);
        double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
        // Compute the continuous eigenvalue
        int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1);
        continuous[pos] = M_PI*M_PI*(i*i/(Lx*Lx) + j*j/(Ly*Ly) + k*k/(Lz*Lz));
        // Compute the discrete eigenvalue
        discrete[pos] = 240.0*(1.0-alphax*cosi)/((8.0+2*alphax*cosi)*(3.0*hx*hx));
        discrete[pos] += 240.0*(1.0-alphay*cosj)/((8.0+2*alphay*cosj)*(3.0*hy*hy));
        discrete[pos] += 240.0*(1.0-alphaz*cosk)/((8.0+2*alphaz*cosk)*(3.0*hz*hz));
      }
    }
  }

  // Sort the eigenvalues in ascending order
  mySort.sortScalars(newSize, continuous);

  int *used = new (std::nothrow) int[newSize];
  if (used == 0) {
    delete[] discrete;
    return -1;
  }

  mySort.sortScalars(newSize, discrete, used);

  int *index = new (std::nothrow) int[newSize];
  if (index == 0) {
    delete[] discrete;
    delete[] used;
    return -1;
  }

  for (i=0; i<newSize; ++i) {
    index[used[i]] = i;
  }
  delete[] used;

  int nMax = myVerify.errorLambda(continuous, discrete, newSize, lambda, qc);

  // Define the exact discrete eigenvectors
  int localSize = Map->NumMyElements();
  double *vQ = new (std::nothrow) double[(nMax+1)*localSize + nMax];
  if (vQ == 0) {
    delete[] discrete;
    delete[] index;
    info = -1;
    return info;
  }

  double *normL2 = vQ + (nMax+1)*localSize;
  Epetra_MultiVector Qex(View, *Map, vQ, localSize, nMax);

  if ((myPid == 0) && (nMax > 0)) {
    cout << endl;
    cout << " --- Relative discretization errors for exact eigenvectors ---" << endl;
    cout << endl;
    cout << "       Cont. Values   Disc. Values     Error      H^1 norm   L^2 norm\n";
  }

  for (k=1; k < numZ; ++k) {
    for (j=1; j < numY; ++j) {
      for (i=1; i < numX; ++i) {
        int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1);
        if (index[pos] < nMax) {
          int ii;
          for (ii=0; ii<localSize; ++ii) {
             // Compute the coefficient alphaz
            double cosk = cos(k*M_PI*hz/2/Lz);
            double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz));
            double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz);
            double c = -160.0*cosk;
            double delta = sqrt(b*b - 4*a*c);
            double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Compute the coefficient alphay
            double cosj = cos(j*M_PI*hy/2/Ly);
            a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly));
            b = 48.0 + 32.0*cos(j*M_PI*hy/Ly);
            c = -160.0*cosj;
            delta = sqrt(b*b - 4*a*c);
            double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Compute the coefficient alphax
            double cosi = cos(i*M_PI*hx/2/Lx);
            a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx));
            b = 48.0 + 32.0*cos(i*M_PI*hx/Lx);
            c = -160.0*cosi;
            delta = sqrt(b*b - 4*a*c);
            double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Get the value for this eigenvector
            double coeff = sin(i*(M_PI/Lx)*x[ii])*sin(j*(M_PI/Ly)*y[ii])*sin(k*(M_PI/Lz)*z[ii]);
            if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx)
              coeff *= alphax;
            if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy)
              coeff *= alphay;
            if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz)
              coeff *= alphaz;
            Qex.ReplaceMyValue(ii, index[pos], coeff);
          }
          // Normalize Qex against the mass matrix
          Epetra_MultiVector MQex(View, *Map, vQ + nMax*localSize, localSize, 1);
          Epetra_MultiVector Qi(View, Qex, index[pos], 1);
          M->Apply(Qi, MQex);
          double mnorm = 0.0;
          Qi.Dot(MQex, &mnorm); 
          Qi.Scale(1.0/sqrt(mnorm));
          // Compute the L2 norm
          Epetra_MultiVector shapeInt(View, *Map, vQ + nMax*localSize, localSize, 1);
          for (ii=0; ii<localSize; ++ii) {
            double iX, iY, iZ;
            if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx)
              iX = 2.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))*
                   sqrt(2.0/Lx)*( 3*hx*i*(M_PI/Lx) - 4*sin(i*(M_PI/Lx)*hx) +
                                  cos(i*(M_PI/Lx)*hx)*hx*i*(M_PI/Lx) );
            else
              iX = 8.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))*
                   sqrt(2.0/Lx)*( 2*sin(i*(M_PI/Lx)*0.5*hx) - 
                                  cos(i*(M_PI/Lx)*0.5*hx)*hx*i*(M_PI/Lx));
            if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy)
              iY = 2.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))*
                   sqrt(2.0/Ly)*( 3*hy*j*(M_PI/Ly) - 4*sin(j*(M_PI/Ly)*hy) +
                                  cos(j*(M_PI/Ly)*hy)*hy*j*(M_PI/Ly) );
            else
              iY = 8.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))*
                   sqrt(2.0/Ly)*( 2*sin(j*(M_PI/Ly)*0.5*hy) - 
                                  cos(j*(M_PI/Ly)*0.5*hy)*hy*j*(M_PI/Ly));
            if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz)
              iZ = 2.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))*
                   sqrt(2.0/Lz)*( 3*hz*k*(M_PI/Lz) - 4*sin(k*(M_PI/Lz)*hz) +
                                  cos(k*(M_PI/Lz)*hz)*hz*k*(M_PI/Lz) );
            else
              iZ = 8.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))*
                   sqrt(2.0/Lz)*( 2*sin(k*(M_PI/Lz)*0.5*hz) - 
                                  cos(k*(M_PI/Lz)*0.5*hz)*hz*k*(M_PI/Lz));
            shapeInt.ReplaceMyValue(ii, 0, iX*iY*iZ);
          }
          Qi.Dot(shapeInt, normL2 + index[pos]);
        } // if index[pos] < nMax)
      } // for (i=1; i < numX; ++i)
    } // for (j=1; j < numY; ++j)
  } // for (k=1; k < numZ; ++k)

  if (myPid == 0) {
    for (i = 0; i < nMax; ++i) {
      double normH1 = continuous[i]*(1.0 - 2.0*normL2[i]) + discrete[i];
      normL2[i] = 2.0 - 2.0*normL2[i];
      normH1+= normL2[i];
      // Print out the result
      if (myPid == 0) {
        cout << " ";
        cout.width(4);
        cout << i+1 << ". ";
        cout.setf(ios::scientific, ios::floatfield);
        cout.precision(8);
        cout << continuous[i] << " " << discrete[i] << "  ";
        cout.precision(3);
        cout << fabs(discrete[i] - continuous[i])/continuous[i] << "  ";
        cout << sqrt(fabs(normH1)/(continuous[i]+1.0)) << "  ";
        cout << sqrt(fabs(normL2[i])) << endl;
      }
    } // for (i = 0; i < nMax; ++i)
  } // if (myPid == 0)

  delete[] discrete;
  delete[] index;

  // Check the angles between exact discrete eigenvectors and computed

  myVerify.errorSubspaces(Q, Qex, M);

  delete[] vQ;

  return info;
}
LOCA::Epetra::Interface::MultiPoint::
MultiPoint(
       const Teuchos::RCP<LOCA::Epetra::Interface::Required> &iReq_,
       const Teuchos::RCP< NOX::Epetra::Interface::Jacobian> &iJac_,
       const Epetra_MultiVector &splitMultiVec_, 
       const Teuchos::RCP<Epetra_RowMatrix> &splitJac_,
       const Teuchos::RCP<EpetraExt::MultiComm> &globalComm_) :
  iReq(iReq_),
  iJac(iJac_),
  splitJac(splitJac_), 
  globalComm(globalComm_),
  splitVec(*(splitMultiVec_(0))),
  splitRes(*(splitMultiVec_(0))), 
  jacobian(0), 
  solution(0),
  solutionOverlap(0), 
  overlapImporter(0), 
  timeStepsOnTimeDomain(splitMultiVec_.NumVectors()), 
  numTimeDomains(globalComm_->NumSubDomains()),
  timeDomain(globalComm_->SubDomainRank()), 
  conStep(0),
  rowStencil(0),
  rowIndex(0)
{

   if (globalComm->MyPID()==0) {
     // TODO: pass in globalData and use output stream
     cout  << "----------MultiPoint Partition Info------------"
           << "\n\tNumProcs              = " << globalComm->NumProc()
           << "\n\tSpatial Decomposition = " << splitMultiVec_.Comm().NumProc()
           << "\n\tNumber of Domains     = " << numTimeDomains
           << "\n\tSteps on Domain 0     = " << timeStepsOnTimeDomain
           << "\n\tTotal Number of Steps = " << globalComm->NumTimeSteps();
    cout   << "\n-----------------------------------------------" << endl;
    }

   // Construct global block matrix graph from split jacobian and stencil,
   // which is just diagonal in this case

   rowStencil = new std::vector< std::vector<int> >(timeStepsOnTimeDomain);
   rowIndex = new std::vector<int>;
   for (int i=0; i < timeStepsOnTimeDomain; i++) {
     (*rowStencil)[i].push_back(0);
     (*rowIndex).push_back(i + globalComm->FirstTimeStepOnDomain());
   }

   jacobian = new EpetraExt::BlockCrsMatrix(*splitJac, *rowStencil, 
					    *rowIndex, *globalComm);

   // Construct global solution vector, the overlap vector, 
   //and importer between them
   solution = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), 
					 jacobian->RowMap());
   solutionOverlap = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), 
						jacobian->ColMap());
  
   overlapImporter = new Epetra_Import(solutionOverlap->Map(), solution->Map());


   // Load initial guess into block solution vector
   for (int i=0; i < timeStepsOnTimeDomain; i++) 
           solution->LoadBlockValues(*(splitMultiVec_(i)), (*rowIndex)[i]);
}
int 
Stokhos::ApproxSchurComplementPreconditioner::
ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
#ifdef STOKHOS_TEUCHOS_TIME_MONITOR
  TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time");
#endif

  // We have to be careful if Input and Result are the same vector.
  // If this is the case, the only possible solution is to make a copy
  const Epetra_MultiVector *input = &Input;
  bool made_copy = false;
  if (Input.Values() == Result.Values()) {
    input = new Epetra_MultiVector(Input);
    made_copy = true;
  } 

  // Allocate temporary storage
  int m = input->NumVectors();
  if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m)
    rhs_block = 
      Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m));
  if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec)
    tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, 
					      m*max_num_mat_vec));
  j_ptr.resize(m*max_num_mat_vec);
  mj_indices.resize(m*max_num_mat_vec);
  
  // Extract blocks
  EpetraExt::BlockMultiVector input_block(View, *base_map, *input);
  EpetraExt::BlockMultiVector result_block(View, *base_map, Result);

  result_block.PutScalar(0.0);

  // Set right-hand-side to input_block
  rhs_block->Update(1.0, input_block, 0.0);

  // At level l, linear system has the structure
  // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ]
  // [ C_l     D_l ][ u_l^l     ]   [ r_l^l     ]

  for (int l=P; l>=1; l--) {
    // Compute D_l^{-1} r_l^l
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);

    // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l
    multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block);
  }

  // Solve A_0 u_0 = r_0
  divide_diagonal_block(0, 1, *rhs_block, result_block);

  for (int l=1; l<=P; l++) {
    // Compute r_l^l - C_l*u_l^{l-1}
    multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block);

    // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1})
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);
  }

  if (made_copy)
    delete input;

  return 0; 
}
void
Albany::SolutionResponseFunction::
cullSolution(const Epetra_MultiVector& x, Epetra_MultiVector& x_culled) const
{
  x_culled.Import(x, *importer, Insert);
}
Esempio n. 12
0
//==============================================================================
int Ifpack_Chebyshev::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  
  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (PolyDegree_ == 0)
    return 0;

  int nVec = X.NumVectors();
  int len = X.MyLength();
  if (nVec != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  // AztecOO gives X and Y pointing to the same memory location,
  // need to create an auxiliary vector, Xcopy
  Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0])
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
  else
    Xcopy = Teuchos::rcp( &X, false );

  double **xPtr = 0, **yPtr = 0;
  Xcopy->ExtractView(&xPtr);
  Y.ExtractView(&yPtr);

#ifdef HAVE_IFPACK_EPETRAEXT
  EpetraExt_PointToBlockDiagPermute* IBD=0;
  if (UseBlockMode_) IBD=&*InvBlockDiagonal_;
#endif
  

  //--- Do a quick solve when the matrix is identity
  double *invDiag=0;
  if(!UseBlockMode_) invDiag=InvDiagonal_->Values();
  if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) {
#ifdef HAVE_IFPACK_EPETRAEXT
    if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y);
    else
#endif
    if (nVec == 1) {
      double *yPointer = yPtr[0], *xPointer = xPtr[0];
      for (int i = 0; i < len; ++i)
        yPointer[i] = xPointer[i]*invDiag[i];
    }
    else {
      int i, k;
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i];
        for (k = 0; k < nVec; ++k)
          yPtr[k][i] = xPtr[k][i] * coeff;
      }
    } // if (nVec == 1)
    return 0;
  } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_))

  //--- Initialize coefficients
  // Note that delta stores the inverse of ML_Cheby::delta
  double alpha = LambdaMax_ / EigRatio_;
  double beta = 1.1 * LambdaMax_;
  double delta = 2.0 / (beta - alpha);
  double theta = 0.5 * (beta + alpha);
  double s1 = theta * delta;

  //--- Define vectors
  // In ML_Cheby, V corresponds to pAux and W to dk
  Epetra_MultiVector V(X);
  Epetra_MultiVector W(X);
#ifdef HAVE_IFPACK_EPETRAEXT
  Epetra_MultiVector Temp(X);
#endif
  
  double *vPointer = V.Values(), *wPointer = W.Values();

  double oneOverTheta = 1.0/theta;
  int i, j, k;


  //--- If solving normal equations, multiply RHS by A^T
  if(SolveNormalEquations_){
    Apply_Transpose(Operator_,Y,V);
    Y=V;
  }

  // Do the smoothing when block scaling is turned OFF
  // --- Treat the initial guess
  if (ZeroStartingSolution_ == false) {
    Operator_->Apply(Y, V);
    // Compute W = invDiag * ( X - V )/ Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0);
      IBD->ApplyInverse(Temp,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i)
        wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta;
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = (xPtr[k][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
    } // if (nVec == 1)
    // Update the vector Y
    Y.Update(1.0, W, 1.0);
  }
  else {
    // Compute W = invDiag * X / Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      IBD->ApplyInverse(X,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }

      W.Scale(oneOverTheta);
      Y.Update(1.0, W, 0.0);      
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i){
        wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta;
      }
      memcpy(yPtr[0], wPointer, len*sizeof(double));
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = xPtr[k][i] * coeff;
          wi = wi + len;
        }
      }
      for (k = 0; k < nVec; ++k)
        memcpy(yPtr[k], wPointer + k*len, len*sizeof(double));
    } // if (nVec == 1)
  } // if (ZeroStartingSolution_ == false)
  
  //--- Apply the polynomial
  double rhok = 1.0/s1, rhokp1;
  double dtemp1, dtemp2;
  int degreeMinusOne = PolyDegree_ - 1;
  if (nVec == 1) {
    double *xPointer = xPtr[0];
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //NTS: We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }

      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i)
        wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]);
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif

      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  }
  else {
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }


      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*dtemp2;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (j = 0; j < nVec; ++j) {
          *wi += (xPtr[j][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif      
      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  } // if (nVec == 1)

  
  // Flops are updated in each of the following. 
  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}
Esempio n. 13
0
int Amesos_Scalapack::Solve() { 
  
  if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl;
  
  NumSolve_++;
  
  Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
  Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
  
  //
  //  Compute the number of right hands sides 
  //  (and check that X and B have the same shape) 
  //
  int nrhs; 
  if ( vecX == 0 ) { 
    nrhs = 0 ;
    EPETRA_CHK_ERR( vecB != 0 ) ; 
  } else { 
    nrhs = vecX->NumVectors() ; 
    EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
  }
  
  Epetra_MultiVector *ScalapackB =0;
  Epetra_MultiVector *ScalapackX =0;
  //
  //  Extract Scalapack versions of X and B 
  //
  double *ScalapackXvalues ;
  
  Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
  Time_->ResetStartTime(); // track time to broadcast vectors
  //
  //  Copy B to the scalapack version of B
  //
  const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap();
  Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  
  Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap );
  ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ;
  ScalapackB = ScalapackBextract ; 
  ScalapackX = ScalapackXextract ; 
  
  VecTime_ += Time_->ElapsedTime();
  
  //
  //  Call SCALAPACKs PDGETRS to perform the solve
  //
  
  int DescX[10];  
  
  ScalapackX->Scale(1.0, *ScalapackB) ;  
  
  int ScalapackXlda ; 
  
  Time_->ResetStartTime(); // tract time to solve
  
  //
  //  Setup DescX 
  //
  
  if( nrhs > nb_ ) {
    EPETRA_CHK_ERR( -2 );  
  }
  
  int Ierr[1] ; 
  Ierr[0] = 0 ; 
  const int zero = 0 ; 
  const int one = 1 ; 
  if ( iam_ < nprow_ * npcol_ ) {
    assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; 
    
    if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = "  <<  ScalapackXlda 
		      << " lda_ = "  << lda_ 
		      << " nprow_ = "  << nprow_ 
		      << " npcol_ = "  << npcol_ 
		      << " myprow_ = "  << myprow_ 
		      << " mypcol_ = "  << mypcol_ 
		      << " iam_ = "  << iam_ << std::endl ;
    if (  TwoD_distribution_ )    assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; 
    
    DESCINIT_F77(DescX, 
		 &NumGlobalElements_, 
		 &nrhs, 
		 &nb_,
		 &nb_,
		 &zero,
		 &zero,
		 &ictxt_,
		 &lda_,
		 Ierr ) ;
    assert( Ierr[0] == 0 ) ; 
    
    //
    //  For the 1D data distribution, we factor the transposed 
    //  matrix, hence we must invert the sense of the transposition
    //
    char trans = 'N';
    if ( TwoD_distribution_ ) {
      if ( UseTranspose() ) trans = 'T' ;
    } else {
      
      if ( ! UseTranspose() ) trans = 'T' ;
    }
    
    if ( nprow_ * npcol_ == 1 ) { 
      DGETRS_F77(&trans,
		 &NumGlobalElements_,  
		 &nrhs, 
		 &DenseA_[0],
		 &lda_,
		 &Ipiv_[0],
		 ScalapackXvalues,
		 &lda_,
		 Ierr ) ;
    } else { 
      PDGETRS_F77(&trans,
		  &NumGlobalElements_,  
		  &nrhs, 
		  &DenseA_[0],
		  &one,
		  &one, 
		  DescA_,
		  &Ipiv_[0],
		  ScalapackXvalues,
		  &one,
		  &one, 
		  DescX,
		  Ierr ) ;
    }
  }
  
  SolTime_ += Time_->ElapsedTime();
  
  Time_->ResetStartTime();  // track time to broadcast vectors
  //
  //  Copy X back to the original vector
  // 
  Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ );
  vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ;
  delete ScalapackBextract ;
  delete ScalapackXextract ;
  
  VecTime_ += Time_->ElapsedTime();
  
  //  All processes should return the same error code
  if ( nprow_ * npcol_ < Comm().NumProc() ) 
    Comm().Broadcast( Ierr, 1, 0 ) ; 
  
  // MS // compute vector norms
  if( ComputeVectorNorms_ == true || verbose_ == 2 ) {
    double NormLHS, NormRHS;
    for( int i=0 ; i<nrhs ; ++i ) {
      assert((*vecX)(i)->Norm2(&NormLHS)==0);
      assert((*vecB)(i)->Norm2(&NormRHS)==0);
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS
	     << ", ||b|| = " << NormRHS << std::endl;
      }
    }
  }
  
  // MS // compute true residual
  if( ComputeTrueResidual_ == true || verbose_ == 2  ) {
    double Norm;
    Epetra_MultiVector Ax(vecB->Map(),nrhs);
    for( int i=0 ; i<nrhs ; ++i ) {
      (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax));
      (Ax.Update(1.0, *((*vecB)(i)), -1.0));
      (Ax.Norm2(&Norm));
      
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl;
      }
    }
  }
  
  return Ierr[0];
  
}
Esempio n. 14
0
//=============================================================================
int Amesos_Klu::Solve() 
{
  Epetra_MultiVector* vecX = 0 ;
  Epetra_MultiVector* vecB = 0 ;

#ifdef HAVE_AMESOS_EPETRAEXT
  Teuchos::RCP<Epetra_MultiVector> vecX_rcp;
  Teuchos::RCP<Epetra_MultiVector> vecB_rcp;
#endif
  
#ifdef Bug_8212
  //  This demonstrates Bug #2812 - Valgrind does not catch this
  //  memory leak
  lose_this_ = (int *) malloc( 300 ) ;
  
#ifdef Bug_8212_B
  //  This demonstrates Bug #2812 - Valgrind does catch this
  //  use of unitialized data - but only in TestOptions/TestOptions.exe 
  //  not in Test_Basic/amesos_test.exe 	
  //  		
    if ( lose_this_[0] == 12834 ) { 
	     std::cout << __FILE__ << "::"  << __LINE__ << " very unlikely to happen " << std::endl ; 
    }
#endif
#endif

  if ( !TrustMe_  ) { 

    SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false);
    SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false);
    
    Epetra_MultiVector* OrigVecX ;
    Epetra_MultiVector* OrigVecB ;

    if (IsNumericFactorizationOK_ == false)
      AMESOS_CHK_ERR(NumericFactorization());
    
    ResetTimer(1);
    
    //
    //  Reindex the LHS and RHS 
    //
    OrigVecX = Problem_->GetLHS() ;
    OrigVecB = Problem_->GetRHS() ;
    
    if ( Reindex_ ) { 
#ifdef HAVE_AMESOS_EPETRAEXT
      vecX_rcp = StdIndexDomain_->StandardizeIndex( *OrigVecX ) ;
      vecB_rcp = StdIndexRange_->StandardizeIndex( *OrigVecB ) ;

      vecX = &*vecX_rcp;
      vecB = &*vecB_rcp;
#else
      AMESOS_CHK_ERR( -13 ) ; // Amesos_Klu can't handle non-standard indexing without EpetraExt 
#endif
    } else {
      vecX = OrigVecX ;
      vecB = OrigVecB ;
    } 
    
    if ((vecX == 0) || (vecB == 0))
      AMESOS_CHK_ERR(-1); // something wrong in input
    
    //  Extract Serial versions of X and B
    
    ResetTimer(0);

    //  Copy B to the serial version of B
    //
    if (UseDataInPlace_ == 1) {
#ifdef HAVE_AMESOS_EPETRAEXT
      if(vecX_rcp==Teuchos::null)
         SerialX_ = Teuchos::rcp(vecX,false);
      else
         SerialX_ = vecX_rcp;

      if(vecB_rcp==Teuchos::null)
         SerialB_ = Teuchos::rcp(vecB,false);
      else 
         SerialB_ = vecB_rcp;
#else
      SerialB_ = Teuchos::rcp(vecB,false);
      SerialX_ = Teuchos::rcp(vecX,false);
#endif
      NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
    } else {
      assert (UseDataInPlace_ == 0);
      
      if( NumVectors_ != Problem_->GetRHS()->NumVectors() ) {
	NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
	SerialXextract_ = rcp( new Epetra_MultiVector(*SerialMap_,NumVectors_));
	SerialBextract_ = rcp (new Epetra_MultiVector(*SerialMap_,NumVectors_));
      }
      if (NumVectors_ != vecB->NumVectors())
	AMESOS_CHK_ERR(-1); // internal error 
      
      //ImportRangeToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecB->Map() ) );
      //if ( SerialBextract_->Import(*vecB,*ImportRangeToSerial_,Insert) )
      Epetra_Import *UseImport;
      if(!UseTranspose_) UseImport=&*ImportRangeToSerial_;
      else UseImport=&*ImportDomainToSerial_;      
      if ( SerialBextract_->Import(*vecB,*UseImport,Insert) )
	AMESOS_CHK_ERR( -1 ) ; // internal error
      
      SerialB_ = Teuchos::rcp(&*SerialBextract_,false) ;
      SerialX_ = Teuchos::rcp(&*SerialXextract_,false) ;
    }
    
    VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);

    //  Call KLU to perform the solve
    
    ResetTimer(0);
    if (MyPID_ == 0) {
      AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
      AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
      if (SerialXlda_ != NumGlobalElements_)
	AMESOS_CHK_ERR(-1);
    }

    OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
  }
  else
  {
     SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false) ;
     SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false) ;
     NumVectors_ = SerialX_->NumVectors();
    if (MyPID_ == 0) {
      AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
      AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
    }
  }

  if ( MyPID_ == 0) {
    if ( NumVectors_ == 1 ) {
      for ( int i = 0 ; i < NumGlobalElements_ ; i++ ) 
	SerialXBvalues_[i] = SerialBvalues_[i] ;
    } else {
      SerialX_->Scale(1.0, *SerialB_ ) ;    // X = B (Klu overwrites B with X)
    }
    if (UseTranspose()) {
      amesos_klu_solve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
			SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
    } else {
      amesos_klu_tsolve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
			 SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
    }
  }

  if ( !TrustMe_ ) {
    SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
    
    //  Copy X back to the original vector
    
    ResetTimer(0);
    ResetTimer(1);
    
    if (UseDataInPlace_ == 0) {
      Epetra_Import *UseImport;
      if(!UseTranspose_) UseImport=&*ImportDomainToSerial_;
      else UseImport=&*ImportRangeToSerial_;      
      //        ImportDomainToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecX->Map() ) );
      vecX->Export( *SerialX_, *UseImport, Insert ) ;
      
    } // otherwise we are already in place.
    
    VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
    
#if 0
    //
    //  ComputeTrueResidual causes TestOptions to fail on my linux box 
    //  Bug #1417
    if (ComputeTrueResidual_)
      ComputeTrueResidual(*SerialMatrix_, *vecX, *vecB, UseTranspose(), "Amesos_Klu");
#endif
    
    if (ComputeVectorNorms_)
      ComputeVectorNorms(*vecX, *vecB, "Amesos_Klu");
    
    OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
    
  }
  ++NumSolve_;
  
  return(0) ;
}
//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();

  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_)
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  else
    Y2 = Teuchos::rcp( &Y, false );

  // extract views (for nicer and faster code)
  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int j = 0; j < NumSweeps_ ; j++) {

    // data exchange is here, once per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    // FIXME: do I really need this code below?
    if (NumVectors == 1) {

      double* y0_ptr = y_ptr[0];
      double* y20_ptr = y2_ptr[0];
      double* x0_ptr = x_ptr[0];

      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[col];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }
      else {
        /* Backward Mode */
        for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          (void) col; // Forestall compiler warning for unused variable.
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));
          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[i];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int i = 0 ; i < NumMyRows_ ; ++i)
          y0_ptr[i] = y20_ptr[i];

    }
    else {
      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int i = 0 ; i < NumMyRows_ ; ++i) {

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
          }
        }
      }
      else {
        /* Backward Mode */
        for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);

          }
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int m = 0 ; m < NumVectors ; ++m)
          for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
            int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
            y_ptr[m][i] = y2_ptr[m][i];
          }
    }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
#endif

  return(0);
} //ApplyInverseGS_RowMatrix()
Esempio n. 16
0
//=============================================================================
int Amesos_Umfpack::Solve() 
{ 
  // if necessary, perform numeric factorization. 
  // This may call SymbolicFactorization() as well.
  if (!IsNumericFactorizationOK_)
    AMESOS_CHK_ERR(NumericFactorization()); 

  ResetTimer(1);

  Epetra_MultiVector* vecX = Problem_->GetLHS(); 
  Epetra_MultiVector* vecB = Problem_->GetRHS(); 

  if ((vecX == 0) || (vecB == 0))
    AMESOS_CHK_ERR(-1);

  int NumVectors = vecX->NumVectors(); 
  if (NumVectors != vecB->NumVectors())
    AMESOS_CHK_ERR(-1);

  Epetra_MultiVector *SerialB, *SerialX; 

  //  Extract Serial versions of X and B 
  //
  double *SerialXvalues ;
  double *SerialBvalues ;

  Epetra_MultiVector* SerialXextract = 0;
  Epetra_MultiVector* SerialBextract = 0;
    
  //  Copy B to the serial version of B
  //
  ResetTimer(0);
  
  if (IsLocal_ == 1) { 
    SerialB = vecB ; 
    SerialX = vecX ; 
  } else { 
    assert (IsLocal_ == 0);
    SerialXextract = new Epetra_MultiVector(SerialMap(),NumVectors); 
    SerialBextract = new Epetra_MultiVector(SerialMap(),NumVectors); 

    SerialBextract->Import(*vecB,Importer(),Insert);
    SerialB = SerialBextract; 
    SerialX = SerialXextract; 
  } 

  VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
  
  //  Call UMFPACK to perform the solve
  //  Note:  UMFPACK uses a Compressed Column Storage instead of compressed row storage, 
  //  Hence to compute A X = B, we ask UMFPACK to perform A^T X = B and vice versa

  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);

  ResetTimer(0);

  int SerialBlda, SerialXlda ; 
  int UmfpackRequest = UseTranspose()?UMFPACK_A:UMFPACK_At ;
  int status = 0;

  if ( MyPID_ == 0 ) {
    int ierr;
    ierr = SerialB->ExtractView(&SerialBvalues, &SerialBlda);
    assert (ierr == 0);
    ierr = SerialX->ExtractView(&SerialXvalues, &SerialXlda);
    assert (ierr == 0);
    assert( SerialBlda == NumGlobalElements_ ) ; 
    assert( SerialXlda == NumGlobalElements_ ) ; 
    
    for ( int j =0 ; j < NumVectors; j++ ) { 
      double *Control = (double *) NULL, *Info = (double *) NULL ;

      status = umfpack_di_solve (UmfpackRequest, &Ap[0], 
				     &Ai[0], &Aval[0], 
				     &SerialXvalues[j*SerialXlda], 
				     &SerialBvalues[j*SerialBlda], 
				     Numeric, Control, Info) ;
    }
  }
    
  if (status) AMESOS_CHK_ERR(status);

  SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
  
  //  Copy X back to the original vector
  
  ResetTimer(0);
  ResetTimer(1);

  if ( IsLocal_ == 0 ) {
    vecX->Export(*SerialX, Importer(), Insert ) ;
    if (SerialBextract) delete SerialBextract ;
    if (SerialXextract) delete SerialXextract ;
  }

  VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);

  if (ComputeTrueResidual_)
  {
    Epetra_RowMatrix* Matrix = 
      dynamic_cast<Epetra_RowMatrix*>(Problem_->GetOperator());
    ComputeTrueResidual(*Matrix, *vecX, *vecB, UseTranspose(), "Amesos_Umfpack");
  }

  if (ComputeVectorNorms_) {
    ComputeVectorNorms(*vecX, *vecB, "Amesos_Umfpack");
  }

  NumSolve_++;

  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); // Amesos overhead

  return(0);
}
int Ifpack_PointRelaxation::
ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int* IndexOffset;
  int* Indices;
  double* Values;
  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));

  int NumVectors = X.NumVectors();

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    if(!DoBackwardGS_){
      /* Forward Mode */
      for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;

          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
        }
      }
    }
    else {
      /* Backward Mode */
      for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;
          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

        }
      }
    }


    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i=LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
} //ApplyInverseGS_LocalFastCrsMatrix()
//=============================================================================
int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
  //
  // This function forms the product Y = A * Y or Y = A' * X
  //
  if (X.NumVectors()==1 && Y.NumVectors()==1) {
    double * xp = (double *) X[0];
    double * yp = (double *) Y[0];
    Epetra_Vector x(View, X.Map(), xp);
    Epetra_Vector y(View, Y.Map(), yp);
    return(Multiply(TransA, x, y));
  }
  if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled.

  int i, j, k;
  int * NumEntriesPerRow = NumEntriesPerRow_;
  int ** Indices = Indices_;
  double ** Values = Values_;

  double **Xp = (double**)X.Pointers();
  double **Yp = (double**)Y.Pointers();

  int NumVectors = X.NumVectors();
  int NumMyCols_ = NumMyCols();


  // Need to better manage the Import and Export vectors:
  // - Need accessor functions
  // - Need to make the NumVector match (use a View to do this)
  // - Need to look at RightScale and ColSum routines too.

  if (!TransA) {

    // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
    if (Importer()!=0) {
      if (ImportVector_!=0) {
	if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
      }
      if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed
      ImportVector_->Import(X, *Importer(), Insert);
      Xp = (double**)ImportVector_->Pointers();
    }

    // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
    if (Exporter()!=0) {
      if (ExportVector_!=0) {
	if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;}
      }
      if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed
      Yp = (double**)ExportVector_->Pointers();
    }

    // Do actual computation

    for (i=0; i < NumMyRows_; i++) {
      int      NumEntries = *NumEntriesPerRow++;
      int *    RowIndices = *Indices++;
      double * RowValues  = *Values++;
      for (k=0; k<NumVectors; k++) {
	double sum = 0.0;
	for (j=0; j < NumEntries; j++) sum += RowValues[j] * Xp[k][RowIndices[j]];
	Yp[k][i] = sum;
      }
    }
    if (Exporter()!=0) Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
  }
  else { // Transpose operation


    // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors

    if (Exporter()!=0) {
      if (ExportVector_!=0) {
	if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;}
      }
      if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed
      ExportVector_->Import(X, *Exporter(), Insert);
      Xp = (double**)ExportVector_->Pointers();
    }

    // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
    if (Importer()!=0) {
      if (ImportVector_!=0) {
	if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
      }
      if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed
      Yp = (double**)ImportVector_->Pointers();
    }

    // Do actual computation



    for (k=0; k<NumVectors; k++) 
      for (i=0; i < NumMyCols_; i++) Yp[k][i] = 0.0; // Initialize y for transpose multiply
    
    for (i=0; i < NumMyRows_; i++) {
      int      NumEntries = *NumEntriesPerRow++;
      int *    RowIndices = *Indices++;
      double * RowValues  = *Values++;
      for (k=0; k<NumVectors; k++) {
	for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] += RowValues[j] * Xp[k][i];
      }
    }
    if (Importer()!=0) Y.Export(*ImportVector_, *Importer(), Add); // Fill Y with Values from export vector
  }

  UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
  return(0);
}
//==============================================================================
int LinearProblem_CrsSingletonFilter::ConstructReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;
  if (HaveReducedProblem_) EPETRA_CHK_ERR(-1); // Setup already done once.  Cannot do it again
  if (Problem==0) EPETRA_CHK_ERR(-2); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-3); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-4); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-5); // Need a LHS
  // Generate reduced row and column maps

  Epetra_MapColoring & RowMapColors = *RowMapColors_;
  Epetra_MapColoring & ColMapColors = *ColMapColors_;

  ReducedMatrixRowMap_ = RowMapColors.GenerateMap(0);
  ReducedMatrixColMap_ = ColMapColors.GenerateMap(0);

  // Create domain and range map colorings by exporting map coloring of column and row maps

  if (FullMatrix()->RowMatrixImporter()!=0) {
    Epetra_MapColoring DomainMapColors(FullMatrixDomainMap());
    EPETRA_CHK_ERR(DomainMapColors.Export(*ColMapColors_, *FullMatrix()->RowMatrixImporter(), AbsMax));
    OrigReducedMatrixDomainMap_ = DomainMapColors.GenerateMap(0);
  }
  else
    OrigReducedMatrixDomainMap_ = ReducedMatrixColMap_;

  if (FullMatrixIsCrsMatrix_) {
    if (FullCrsMatrix()->Exporter()!=0) { // Non-trivial exporter
      Epetra_MapColoring RangeMapColors(FullMatrixRangeMap());
      EPETRA_CHK_ERR(RangeMapColors.Export(*RowMapColors_, *FullCrsMatrix()->Exporter(), 
					   AbsMax));
      ReducedMatrixRangeMap_ = RangeMapColors.GenerateMap(0);
    }
    else
      ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;
  }
  else
    ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;

  // Check to see if the reduced system domain and range maps are the same.
  // If not, we need to remap entries of the LHS multivector so that they are distributed
  // conformally with the rows of the reduced matrix and the RHS multivector
  SymmetricElimination_ = ReducedMatrixRangeMap_->SameAs(*OrigReducedMatrixDomainMap_);
  if (!SymmetricElimination_) 
    ConstructRedistributeExporter(OrigReducedMatrixDomainMap_, ReducedMatrixRangeMap_, 
				  RedistributeDomainExporter_, ReducedMatrixDomainMap_);
  else {
    ReducedMatrixDomainMap_ = OrigReducedMatrixDomainMap_;
    OrigReducedMatrixDomainMap_ = 0;
    RedistributeDomainExporter_ = 0;
  }
  
  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  // Create importers
//  cout << "RedDomainMap\n";
//  cout << *ReducedMatrixDomainMap();
//  cout << "FullDomainMap\n";
//  cout << FullMatrixDomainMap();
  Full2ReducedLHSImporter_ = new Epetra_Import(*ReducedMatrixDomainMap(), FullMatrixDomainMap());
//  cout << "RedRowMap\n";
//  cout << *ReducedMatrixRowMap();
//  cout << "FullRHSMap\n";
//  cout << FullRHS->Map();
  Full2ReducedRHSImporter_ = new Epetra_Import(*ReducedMatrixRowMap(), FullRHS->Map());

  // Construct Reduced Matrix
  ReducedMatrix_ = new Epetra_CrsMatrix(Copy, *ReducedMatrixRowMap(), *ReducedMatrixColMap(), 0);

  // Create storage for temporary X values due to explicit elimination of rows
  tempExportX_ = new Epetra_MultiVector(FullMatrixColMap(), NumVectors);

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix

      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (Indices are global)
      
      int ierr = ReducedMatrix()->InsertGlobalValues(curGRID, NumEntries, 
						     Values, Indices); // Insert into reduce matrix
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	int targetCol = ColSingletonColLIDs_[ColSingletonCounter];
	for (j=0; j<NumEntries; j++) {
	  if (Indices[j]==targetCol) {
	    double pivot = Values[j];
	    if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	    ColSingletonPivotLIDs_[ColSingletonCounter] = j; // Save for later use
	    ColSingletonPivots_[ColSingletonCounter] = pivot;
	    ColSingletonCounter++;
	    break;
	  }
	}
      }
    }
  }

  // Now convert to local indexing.  We have constructed things so that the domain and range of the
  // matrix will have the same map.  If the reduced matrix domain and range maps were not the same, the
  // differences were addressed in the ConstructRedistributeExporter() method
  EPETRA_CHK_ERR(ReducedMatrix()->FillComplete(*ReducedMatrixDomainMap(), *ReducedMatrixRangeMap()));

  // Construct Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_ = new Epetra_MultiVector(*ReducedMatrixDomainMap(), NumVectors);
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // First compute influence of already-known values of X on RHS
  tempX_ = new Epetra_MultiVector(FullMatrixDomainMap(), NumVectors);
  tempB_ = new Epetra_MultiVector(FullRHS->Map(), NumVectors);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_ = new Epetra_MultiVector(*ReducedMatrixRowMap(), FullRHS->NumVectors());
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));

  // Finally construct Reduced Linear Problem
  ReducedProblem_ = new Epetra_LinearProblem(ReducedMatrix_, ReducedLHS_, ReducedRHS_);

  double fn = FullMatrix()->NumGlobalRows();
  double fnnz = FullMatrix()->NumGlobalNonzeros();
  double rn = ReducedMatrix()->NumGlobalRows();
  double rnnz = ReducedMatrix()->NumGlobalNonzeros();

  RatioOfDimensions_ = rn/fn;
  RatioOfNonzeros_ = rnnz/fnnz;
  HaveReducedProblem_ = true;
  
  return(0);
}
//=============================================================================
int Epetra_FastCrsMatrix::Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
  //
  // This function find Y such that LY = X or UY = X or the transpose cases.
  //
  if (X.NumVectors()==1 && Y.NumVectors()==1) {
    double * xp = (double *) X[0];
    double * yp = (double *) Y[0];
    Epetra_Vector x(View, X.Map(), xp);
    Epetra_Vector y(View, Y.Map(), yp);
    return(Solve(Upper, Trans, UnitDiagonal, x, y));
  }
  if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled.

  if ((Upper) && (!UpperTriangular())) EPETRA_CHK_ERR(-2);
  if ((!Upper) && (!LowerTriangular())) EPETRA_CHK_ERR(-3);
  if ((!UnitDiagonal) && (NoDiagonal())) EPETRA_CHK_ERR(-4); // If matrix has no diagonal, we must use UnitDiagonal
  if ((!UnitDiagonal) && (NumMyDiagonals()<NumMyRows_)) EPETRA_CHK_ERR(-5); // Need each row to have a diagonal

  int i, j, j0, k;
  int * NumEntriesPerRow = NumEntriesPerRow_;
  int ** Indices = Indices_;
  double ** Values = Values_;
  double diag;

  // If upper, point to last row
  if ((Upper && !Trans) || (!Upper && Trans)) {
    NumEntriesPerRow += NumMyRows_-1;
    Indices += NumMyRows_-1;
    Values += NumMyRows_-1;
  }

  double **Xp = (double**)X.Pointers();
  double **Yp = (double**)Y.Pointers();

  int NumVectors = X.NumVectors();

  if (!Trans) {
    

    if (Upper) {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      for (i=NumMyRows_-1; i >=0; i--) {
	int      NumEntries = *NumEntriesPerRow--;
	int *    RowIndices = *Indices--;
	double * RowValues  = *Values--;
	if (!UnitDiagonal) diag = 1.0/RowValues[0]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  double sum = 0.0;
	  for (j=j0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]];
	  
	  if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum;
	  else Yp[k][i] = (Xp[k][i] - sum)*diag;
	}
      }
    }
    else {
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      for (i=0; i < NumMyRows_; i++) {
	int      NumEntries = *NumEntriesPerRow++ - j0;
	int *    RowIndices = *Indices++;
	double * RowValues  = *Values++;
	if (!UnitDiagonal) diag = 1.0/RowValues[NumEntries]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  double sum = 0.0;
	  for (j=0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]];
	  
	  if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum;
	  else Yp[k][i] = (Xp[k][i] - sum)*diag;
	}
      }
    }
  }
  // ***********  Transpose case *******************************

  else {

    for (k=0; k<NumVectors; k++) 
      if (Yp[k]!=Xp[k]) for (i=0; i < NumMyRows_; i++) Yp[k][i] = Xp[k][i]; // Initialize y for transpose multiply
    
    if (Upper) {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      
      for (i=0; i < NumMyRows_; i++) {
	int      NumEntries = *NumEntriesPerRow++;
	int *    RowIndices = *Indices++;
	double * RowValues  = *Values++;
	if (!UnitDiagonal) diag = 1.0/RowValues[j0]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  if (!UnitDiagonal) Yp[k][i] = Yp[k][i]*diag;
	  for (j=j0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i];
	}
      }
    }
    else {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      
      for (i=NumMyRows_-1; i>=0; i--) {
	int      NumEntries = *NumEntriesPerRow-- - j0;
	int *    RowIndices = *Indices--;
	double * RowValues  = *Values--;
	for (k=0; k<NumVectors; k++) {
	  if (!UnitDiagonal)  Yp[k][i] = Yp[k][i]/Xp[k][i];
	  for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i];
        }
      }
    }
  }
  
  UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
  return(0);
}
Esempio n. 21
0
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const {

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);

  int xr = X.MyLength();

  int wSize = 3*xr;

  if (lWorkSpace < wSize) {
    if (workSpace)
      delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  Epetra_Vector r(View, X.Map(), pointer);
  pointer = pointer + xr;

  Epetra_Vector p(View, X.Map(), pointer);
  pointer = pointer + xr;

  // Note: Kp and z uses the same memory space
  Epetra_Vector Kp(View, X.Map(), pointer);
  Epetra_Vector z(View, X.Map(), pointer);

  double tmp;
  double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0;
  double tolSquare = tolCG*tolCG;

  memcpy(r.Values(), X.Values(), xr*sizeof(double));
  tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
  MyComm.SumAll(&tmp, &initNorm, 1);

  Y.PutScalar(0.0);

  if (localVerbose > 1) {
    std::cout << std::endl;
    std::cout  << " --- PCG Iterations --- " << std::endl;
  }

  int iter;
  for (iter = 1; iter <= iterMax; ++iter) {

    if (Prec) {
      Prec->ApplyInverse(r, z);
    }
    else {
      memcpy(z.Values(), r.Values(), xr*sizeof(double));
    }

    if (iter == 1) {
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      memcpy(p.Values(), z.Values(), xr*sizeof(double));
    }
    else {
      oldRZ = newRZ;
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      p.Update(1.0, z, newRZ/oldRZ);
    }

    K->Apply(p, Kp);

    tmp = callBLAS.DOT(xr, p.Values(), 1, Kp.Values(), 1);
    MyComm.SumAll(&tmp, &alpha, 1);
    alpha = newRZ/alpha;

    TEUCHOS_TEST_FOR_EXCEPTION(alpha <= 0.0, std::runtime_error,
                         " !!! Non-positive value for p^TKp (" << alpha << ") !!!");

    callBLAS.AXPY(xr, alpha, p.Values(), 1, Y.Values(), 1);

    alpha *= -1.0;
    callBLAS.AXPY(xr, alpha, Kp.Values(), 1, r.Values(), 1);

    // Check convergence
    tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
    MyComm.SumAll(&tmp, &rNorm, 1);

    if (localVerbose > 1) {
      std::cout  << "   Iter. " << iter;
      std::cout.precision(4);
      std::cout.setf(std::ios::scientific, std::ios::floatfield);
      std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    }

    if (rNorm <= tolSquare*initNorm)
      break;

  } // for (iter = 1; iter <= iterMax; ++iter)

  if (localVerbose == 1) {
    std::cout << std::endl;
    std::cout << " --- End of PCG solve ---" << std::endl;
    std::cout << "   Iter. " << iter;
    std::cout.precision(4);
    std::cout.setf(std::ios::scientific, std::ios::floatfield);
    std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    std::cout << std::endl;
  }

  if (localVerbose > 1) {
    std::cout << std::endl;
  }

  numSolve += 1;

  minIter = (iter < minIter) ? iter : minIter;
  maxIter = (iter > maxIter) ? iter : maxIter;
  sumIter += iter;

  return info;
}
// ================================================ ====== ==== ==== == =
//! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y
int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{
  const Epetra_MultiVector *B;
  Epetra_MultiVector *Bcopy=0;

  /* Sanity Checks */
  int NumVectors=B_.NumVectors();
  if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1);
  if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1);

  Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false);
  Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false);

  /* Deal with the B==X case */
  if (B_.Pointers()[0] == X.Pointers()[0]){
    Bcopy=new Epetra_MultiVector(B_);
    B=Bcopy;
    X.PutScalar(0.0);
  }
  else B=&B_;


  for(int i=0;i<num_cycles;i++){
    /* Pre-smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

    if(MaxLevels > 0){
      if(i != 0
#ifdef HAVE_ML_IFPACK
         || Smoother_
#endif
         ){
        /* Calculate Residual (r_e = b - (S+M+Addon) * x) */
        ML_CHK_ERR(Operator_->Apply(X,r_edge));
        ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0));

        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node));
      }
      else{
        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node));
      }

      /* AMG on coarse grid  (e_n = (CoarseMatrix)^{-1} r_n) */
      ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node));

      /* Xfer back to fine grid (e_e = P * e_n) */
      ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge));

      /* Add in correction (x = x + e_e)        */
      ML_CHK_ERR(X.Update(1.0,e_edge,1.0));
    }/*end if*/

    /* Post-Smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

  }/*end for*/

  /* Cleanup */
  if(Bcopy) delete Bcopy;

  return 0;
}/*end ApplyInverse*/
Esempio n. 23
0
// ================================================ ====== ==== ==== == =
//! Implicitly applies in the inverse in an additive format
int  ML_Epetra::RefMaxwellPreconditioner::ApplyInverse_Implicit_Additive(const Epetra_MultiVector& B, Epetra_MultiVector& X) const
{
#ifdef ML_TIMING
  double t_time,t_diff;
  StartTimer(&t_time);
#endif

  int NumVectors=B.NumVectors();
  Epetra_MultiVector TempE1(X.Map(),NumVectors,false);
  Epetra_MultiVector TempE2(X.Map(),NumVectors,true);
  Epetra_MultiVector TempN1(*NodeMap_,NumVectors,false);
  Epetra_MultiVector TempN2(*NodeMap_,NumVectors,true);
  Epetra_MultiVector Resid(B.Map(),NumVectors);

  /* Pre-Smoothing */
#ifdef HAVE_ML_IFPACK
  if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));}
  else
#endif
  if(PreEdgeSmoother) ML_CHK_ERR(PreEdgeSmoother->ApplyInverse(B,X));

  /* Build Residual */
  ML_CHK_ERR(SM_Matrix_->Multiply(false,X,TempE1));
  ML_CHK_ERR(Resid.Update(-1.0,TempE1,1.0,B,0.0));

  if(!HasOnlyDirichletNodes){
    ML_CHK_ERR(D0_Matrix_->Multiply(true,Resid,TempN1));
  }

  /* Precondition (1,1) block (additive)*/
  ML_CHK_ERR(EdgePC->ApplyInverse(Resid,TempE2));

  /* Precondition (2,2) block (additive)*/
  if(!HasOnlyDirichletNodes){
    ML_CHK_ERR(NodePC->ApplyInverse(TempN1,TempN2));

    /* EXPERIMENTAL: Local Nodal Stuff, if active */
    if(use_local_nodal_solver){
      const Epetra_Map& LocalMap=LocalNodalMatrix->DomainMap();
      Epetra_MultiVector TempNL1(LocalMap,NumVectors,true);
      Epetra_MultiVector TempNL2(LocalMap,NumVectors,true);
      Epetra_MultiVector TempN3(*NodeMap_,NumVectors,true);

      NodesToLocalNodes->Multiply(true,TempN1,TempNL1);
      LocalNodalSolver->ApplyInverse(TempNL1,TempNL2);
      NodesToLocalNodes->Multiply(false,TempNL2,TempN3);
      TempN2.Update(1.0,TempN3,1.0);
    }/*end if*/

    D0_Matrix_->Multiply(false,TempN2,TempE1);
  }/*end if*/

  /* Update solution */
  if(HasOnlyDirichletNodes) X.Update(1.0,TempE2,1.0);
  else X.Update(1.0,TempE1,1.0,TempE2,1.0);

  /* Post-Smoothing */
#ifdef HAVE_ML_IFPACK
  if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));}
  else
#endif
    if(PostEdgeSmoother) ML_CHK_ERR(PostEdgeSmoother->ApplyInverse(B,X));


#ifdef ML_TIMING
  StopTimer(&t_time,&t_diff);
  /* Output */
  ML_Comm *comm_;
  ML_Comm_Create(&comm_);
  this->ApplicationTime_+= t_diff;
  ML_Comm_Destroy(&comm_);
#endif

  return 0;
}
int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
  if(!IsComputed_) return -1;
  Time_.ResetStartTime();
  bool initial_guess_is_zero=false;
  const int lclNumRows = W_->NumMyRows();
  const int NumVectors = X.NumVectors();
  Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);

  double omega=GetOmega();

  // need to create an auxiliary vector, Xcopy
  Teuchos::RCP<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0]){
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
    // Since the user didn't give us anything better, our initial guess is zero.
    Y.Scale(0.0);
    initial_guess_is_zero=true;
  }
  else
    Xcopy = Teuchos::rcp( &X, false );

  Teuchos::RCP< Epetra_MultiVector > T2;
  // Note: Assuming that the matrix has an importer.  Ifpack_PointRelaxation doesn't do this, but given that
  // I have a CrsMatrix, I'm probably OK.
  // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine
  // if things are on or off processor.
  // Note: T2 must be zero'd out
  if (IsParallel_ && W_->Importer())  T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
  else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));

  // Pointer grabs
  int* rowptr,*colind;
  double *values;
  double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
  T2->ExtractView(&t2_ptr);
  Y.ExtractView(&y_ptr);
  Temp.ExtractView(&t_ptr);
  Xcopy->ExtractView(&x_ptr);
  Wdiag_->ExtractView(&d_ptr);
  IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));


  for(int i=0; i<NumSweeps_; i++){
    // Calculate b-Ax
    if(!initial_guess_is_zero  || i > 0) {
      A_->Apply(Y,Temp);
      Temp.Update(1.0,*Xcopy,-1.0);
    }
    else
      Temp.Update(1.0,*Xcopy,0.0);

    // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated
    // in this sweep before they are used, so we don't need to reset T2 to zero here.

    // Do backsolve & update
    // x = x  + W^{-1} (b - A x)
    for(int j=0; j<lclNumRows; j++){
      double diag=d_ptr[j];
      for (int m=0 ; m<NumVectors; m++) {
        double dtmp=0.0;
        // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
        t2_ptr[m][j]=0.0;
        for(int k=rowptr[j];k<rowptr[j+1];k++){
          dtmp+= values[k]*t2_ptr[m][colind[k]];
        }
        // Yes, we need to update both of these.
        t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;
        y_ptr[m][j] += omega*t2_ptr[m][j];
      }
    }
  }

  // Counter update
  NumApplyInverse_++;
  ApplyInverseTime_ += Time_.ElapsedTime();
  return 0;
}
Esempio n. 25
0
// Apply the preconditioner w/ RHS B and get result X
int ML_Epetra::LevelWrap::ApplyInverse(const Epetra_MultiVector& B, Epetra_MultiVector& X_) const{
#ifdef ML_TIMING
  double t_time,t_diff;
  StartTimer(&t_time);
#endif
   
  // Sanity Checks
  if (!B.Map().SameAs(OperatorDomainMap())) return -1;
  if (!X_.Map().SameAs(OperatorRangeMap())) return -1;
  if (!X_.Map().SameAs(B.Map())) return -1;
  if (B.NumVectors() != X_.NumVectors()) return -1;

  // Build new work vector X 
  Epetra_MultiVector X(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp0(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp1(P0_->DomainMap(),X_.NumVectors(),true);
  Epetra_MultiVector tmp2(P0_->DomainMap(),X_.NumVectors(),true);
  
  // Pre Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){
    Smoother_->ApplyInverse(B,X);
  }

  // Form coarse residual
  A0_->Apply(X,tmp0);
  tmp0.Update(1.0,B,-1.0); 
  if(use_pt_) P0_->Multiply(true,tmp0,tmp1);
  else R0_->Multiply(false,tmp0,tmp1);

  // Solve coarse problem
  A1prec_->ApplyInverse(tmp1,tmp2);

  // Update solution
  P0_->Multiply(false,tmp2,tmp0);
  X.Update(1.0,tmp0,1.0);

  // Post Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){
    Smoother_->ApplyInverse(B,X);
  }

  // Copy to output
  X_=X;

#ifdef ML_TIMING
  StopTimer(&t_time,&t_diff);
  /* Output */
  ML_Comm *comm_;
  ML_Comm_Create(&comm_);
  ApplicationTime_+= t_diff;
  if(FirstApplication_){
    FirstApplication_=false;
    FirstApplicationTime_=ApplicationTime_;
  }/*end if*/
  ML_Comm_Destroy(&comm_);
#endif  

  return 0;
}
/* Computes the approximate Schur complement for the wide separator */
Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config,
    shylu_symbolic *ssym,   // symbolic structure
    Epetra_CrsMatrix *G, Epetra_CrsMatrix *R,
    Epetra_LinearProblem *LP, Amesos_BaseSolver *solver,
    Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C,
    Epetra_Map *localDRowMap)
{
    int i;
    double relative_thres = config->relative_threshold;

    // Need to create local G (block diagonal portion) , R, C

    // Get row map of G
    //Epetra_Map CrMap = C->RowMap();
    //int *c_rows = CrMap.MyGlobalElements();
    //int *c_cols = (C->ColMap()).MyGlobalElements();
    //int c_totalElems = CrMap.NumGlobalElements();
    //int c_localElems = CrMap.NumMyElements();
    //int c_localcolElems = (C->ColMap()).NumMyElements();

    Epetra_Map GrMap = G->RowMap();
    int *g_rows = GrMap.MyGlobalElements();
    //int g_totalElems = GrMap.NumGlobalElements();
    int g_localElems = GrMap.NumMyElements();

    //Epetra_Map RrMap = R->RowMap();
    //int *r_rows = RrMap.MyGlobalElements();
    //int *r_cols = (R->ColMap()).MyGlobalElements();
    //int r_totalElems = RrMap.NumGlobalElements();
    //int r_localElems = RrMap.NumMyElements();
    //int r_localcolElems = (R->ColMap()).NumMyElements();

    Epetra_SerialComm LComm;
    Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm);

    int nentries1, gid;
    // maxentries is the maximum of all three possible matrices as the arrays
    // are reused between the three
    int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries());
    maxentries = max(maxentries, G->MaxNumEntries());

    double *values1 = new double[maxentries];
    double *values2 = new double[maxentries];
    double *values3 = new double[maxentries];
    int *indices1 = new int[maxentries];
    int *indices2 = new int[maxentries];
    int *indices3 = new int[maxentries];

    // Sbar - Approximate Schur complement
    Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
                                            Copy, GrMap, g_localElems));

    // Include only the block diagonal elements of G in localG
    Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false);
    int cnt, scnt;
    for (i = 0; i < g_localElems ; i++)
    {
        gid = g_rows[i];
        G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);

        cnt = 0;
        scnt = 0;
        for (int j = 0 ; j < nentries1 ; j++)
        {
            if (G->LRID(indices1[j]) != -1)
            {
                values2[cnt] = values1[j];
                indices2[cnt++] = indices1[j];
            }
            else
            {
                // Add it to Sbar immediately
                values3[scnt] = values1[j];
                indices3[scnt++] = indices1[j];
            }
        }

        localG.InsertGlobalValues(gid, cnt, values2, indices2);
        Sbar->InsertGlobalValues(gid, scnt, values3, indices3);
    }
    localG.FillComplete();
    //cout << "Created local G matrix" << endl;

    int nvectors = 16;
    /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC,
                                        localDRowMap, nvectors);*/
    ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver,
                                        ifSolver, C, localDRowMap, nvectors);

#ifdef DUMP_MATRICES
    //ostringstream fnamestr;
    //fnamestr << "localC" << C->Comm().MyPID() << ".mat";
    //string Cfname = fnamestr.str();
    //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC);

    //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm());
    //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg =
                        //new EpetraExt::CrsMatrix_Reindex( defMapg );
    //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG );
    //ReIdx_MatTransg->fwd();
    //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G);
#endif

    //cout << " totalElems in Schur Complement" << totalElems << endl;
    //cout << myPID << " localElems" << localElems << endl;

    // **************** Two collectives here *********************
#ifdef TIMING_OUTPUT
    Teuchos::Time ftime("setup time");
    ftime.start();
#endif
#ifdef TIMING_OUTPUT
    Teuchos::Time app_time("setup time");
#endif

    int nentries;
    // size > maxentries as there could be fill
    // TODO: Currently the size of the two arrays can be one, Even if we switch
    // the loop below the size of the array required is nvectors. Fix it
    double *values = new double[nvectors];
    int *indices = new int[nvectors];
    double *vecvalues;
#ifdef SHYLU_DEBUG
    // mfh 25 May 2015: Don't declare this variable if it's not used.
    // It's only used if SHYLU_DEBUG is defined.
    int dropped = 0;
#endif // SHYLU_DEBUG
    double *maxvalue = new double[nvectors];
#ifdef TIMING_OUTPUT
    ftime.start();
#endif
    int findex = g_localElems / nvectors ;

    int cindex;
    // int mypid = C->Comm().MyPID(); // unused
    Epetra_MultiVector probevec (G_localRMap, nvectors);
    Epetra_MultiVector Scol (G_localRMap, nvectors);
    probevec.PutScalar(0.0);
    for (i = 0 ; i < findex*nvectors ; i+=nvectors)
    {
        // Set the probevec to find block columns of S.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
            //if (mypid == 0)
            //cout << "Changing row to 1.0 " << g_rows[cindex] << endl;
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec, Scol);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif

        // Reset the probevec to all zeros.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0);
        }

        Scol.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol[k];
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

    if (i < g_localElems)
    {
        nvectors = g_localElems - i;
        probeop.ResetTempVectors(nvectors);
        Epetra_MultiVector probevec1 (G_localRMap, nvectors);
        Epetra_MultiVector Scol1 (G_localRMap, nvectors);

        probevec1.PutScalar(0.0);
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec1, Scol1);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif
        Scol1.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            //cout << "MAX" << maxvalue << endl;
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol1[k];
                //nentries = 0; // inserting one entry in each row for now
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

#ifdef TIMING_OUTPUT
    ftime.stop();
    cout << "Time in finding and dropping entries" << ftime.totalElapsedTime()
                     << endl;
    ftime.reset();
    cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
    probeop.PrintTimingInfo();
#endif
    Sbar->FillComplete();

#ifdef DUMP_MATRICES
    Epetra_Map defMap2(-1, g_localElems, 0, C->Comm());
    EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 =
                        new EpetraExt::CrsMatrix_Reindex( defMap2 );
    Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar );
    ReIdx_MatTrans2->fwd();
    EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S);
#endif

#ifdef SHYLU_DEBUG
    cout << "#dropped entries" << dropped << endl;
#endif
    delete[] values;
    delete[] indices;
    delete[] values1;
    delete[] indices1;
    delete[] values2;
    delete[] indices2;
    delete[] values3;
    delete[] indices3;
    delete[] maxvalue;

    return Sbar;
}
//
//  Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using blocked right hand sides
//  and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  Amesos_TestMultiSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		      SparseSolverType SparseSolver, bool transpose,
		      int special, AMESOS_MatrixType matrix_type ) {


  int iam = Comm.MyPID() ;

  
  //  int hatever;
  //  if ( iam == 0 )  std::cin >> hatever ; 
  Comm.Barrier();


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    NonContiguousMap = true; 
    // Call routine to read in unsymmetric Triplet matrix
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, 
							readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);

  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    
    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 
      superluserial.Solve() ; 
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist.Solve( true ) ) ;
#endif 
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist2.Solve( true ) ) ;
#endif 
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ; 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 
    
      EPETRA_CHK_ERR( dscpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( umfpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Klu klu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ; 
      EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superluserial.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superluserial.NumericFactorization(  ) ); 

      EPETRA_CHK_ERR( superluserial.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARKLETE
    } else if ( SparseSolver == PARKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Parklete parklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superludist superludist( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 

      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.Solve( ) ); 
#endif
#ifdef TEST_SPOOLESSERIAL 
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
    //    SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;


    if ( false && iam == 0 ) { 

      std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; 
      for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) {
	std::cout << "i=" << i 
	     << " error = " << error[i] 
	     << " xnorm = " << xnorm[i] 
	     << " residual = " << residual[i] 
	     << " bnorm = " << bnorm[i] 
	     << std::endl ; 
      
      }
    
      std::cout << std::endl << " max_resid = " << max_resid ; 
      std::cout << " max_error = " << max_error << std::endl ; 
      std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ;

    }
  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

return 0 ;
}
Esempio n. 28
0
int Davidson::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  // 
  //      K X = M X Lambda
  // 
  // using a generalized Davidson algorithm
  //
  // Note that if M is not specified, then  K X = X Lambda is solved.
  // 
  // Input variables:
  // 
  // numEigen  (integer) = Number of eigenmodes requested
  // 
  // Q (Epetra_MultiVector) = Converged eigenvectors
  //                   The number of columns of Q must be at least numEigen + blockSize.
  //                   The rows of Q are distributed across processors.
  //                   At exit, the first numEigen columns contain the eigenvectors requested.
  // 
  // lambda (array of doubles) = Converged eigenvalues
  //                   At input, it must be of size numEigen + blockSize.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  //
  // startingEV (integer) = Number of existing converged eigenvectors
  //                   We assume that the user has check the eigenvectors and 
  //                   their M-orthonormality.
  //
  // Return information on status of computation
  // 
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // 
  // // Failure due to input arguments
  // 
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  //
  // info = -  8 >> The number of blocks is too small for the number of eigenvalues.
  // 
  // info = - 10 >> Failure during the mass orthonormalization
  // 
  // info = - 30 >> MEMORY
  //

  // Check the input parameters
  
  if (numEigen <= startingEV) {
    return startingEV;
  }

  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  if (numBlock*blockSize < numEigen) {
    if (myPid == 0) {
      cerr << endl;
      cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
      cerr << " the number of eigenvalues !!!\n";
      cerr << " Number of blocks = " << numBlock << endl;
      cerr << " Size of blocks = " << blockSize << endl;
      cerr << " Number of eigenvalues = " << numEigen << endl;
      cerr << endl;
    }
    return -8;
  }

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
  }

  int knownEV = startingEV;
  int localVerbose = verbose*(myPid==0);

  // Define local block vectors
  //
  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
  // KX = Working vectors (storing K*X)
  //
  // R = Residuals

  int xr = Q.MyLength();
  int dimSearch = blockSize*numBlock;

  Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
  if (knownEV > 0) {
    Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
    copyX.Random();
  }
  else {
    X.Random();
  }

  int tmp;
  tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr;

  double *work1 = new (nothrow) double[tmp]; 
  if (work1 == 0) {
    if (vectWeight)
      delete vectWeight;
    info = -30;
    return info;
  }
  memRequested += sizeof(double)*tmp/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double *tmpD = work1;

  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
  tmpD = (M) ? tmpD + xr*blockSize : tmpD;

  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);

  // Define arrays
  //
  // theta = Store the local eigenvalues (size: dimSearch)
  // normR = Store the norm of residuals (size: blockSize)
  //
  // KK = Local stiffness matrix         (size: dimSearch x dimSearch)
  //
  // S = Local eigenvectors              (size: dimSearch x dimSearch)
  //
  // tmpKK = Local workspace             (size: blockSize x blockSize)

  int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize;
  double *work2 = new (nothrow) double[lwork2];
  if (work2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] work1;
    info = -30;
    return info;
  }

  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  tmpD = work2;

  double *theta = tmpD;
  tmpD = tmpD + dimSearch;

  double *normR = tmpD;
  tmpD = tmpD + blockSize;

  double *KK = tmpD;
  tmpD = tmpD + dimSearch*dimSearch;
  memset(KK, 0, dimSearch*dimSearch*sizeof(double));

  double *S = tmpD;
  tmpD = tmpD + dimSearch*dimSearch;

  double *tmpKK = tmpD;

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
    spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
    if ((resHistory == 0) || (spaceSizeHistory == 0)) {
      if (vectWeight)
        delete vectWeight;
      delete[] work1;
      delete[] work2;
      info = -30;
      return info;
    }
    historyCount = 0;
  }

  // Miscellaneous definitions

  bool reStart = false;
  numRestart = 0;

  bool criticalExit = false;

  int bStart = 0;
  int offSet = 0;
  numBlock = (dimSearch/blockSize) - (knownEV/blockSize);

  int nFound = blockSize;
  int i, j;

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M)
      cout << "K*Q = M*Q D ";
    else
      cout << "K*Q = Q D ";
    if (Prec)
      cout << " with preconditioner";
    cout << endl;
    cout << " *|* Algorithm = Davidson algorithm (block version)" << endl;
    cout << " *|* Size of blocks = " << blockSize << endl;
    cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.precision(2);
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    cout << " *|* Norm used for convergence: ";
    if (vectWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
    else
      cout << "L^2-norm" << endl;
    if (startingEV > 0)
      cout << " *|* Input converged eigenvectors = " << startingEV << endl;
    cout << "\n -- Start iterations -- \n";
  }

  int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);

  timeOuterLoop -= MyWatch.WallTime();
  outerIter = 0;
  while (outerIter <= maxIterEigenSolve) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    int nb;
    for (nb = bStart; nb < maxBlock; ++nb) {

      outerIter += 1;
      if (outerIter > maxIterEigenSolve)
        break;

      int localSize = nb*blockSize;

      Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);

      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(Xcurrent, MX);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      // Orthonormalize X against the known eigenvectors and the previous vectors
      // Note: Use R as a temporary work space
      timeOrtho -= MyWatch.WallTime();
      if (nb == bStart) {
        if (nFound > 0) {
          if (knownEV == 0) {
            info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
          }
          else {
            Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
            info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
          }
        }
        nFound = 0;
      }
      else {
        Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
        info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
      }
      timeOrtho += MyWatch.WallTime();

      // Exit the code when the number of vectors exceeds the space dimension
      if (info < 0) {
        delete[] work1;
        delete[] work2;
        if (vectWeight)
          delete vectWeight;
        return -10;
      }

      timeStifOp -= MyWatch.WallTime();
      K->Apply(Xcurrent, KX);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

      // Check the orthogonality properties of X
      if (verbose > 2) {
        if (knownEV + localSize == 0)
          accuracyCheck(&Xcurrent, &MX, 0);
        else {
          Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
          accuracyCheck(&Xcurrent, &MX, &copyQ);
        }
        if (localVerbose > 0)
          cout << endl;
      } // if (verbose > 2)

      // Define the local stiffness matrix
      // Note: S is used as a workspace
      timeLocalProj -= MyWatch.WallTime();
      for (j = 0; j <= nb; ++j) {
        callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
                      1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
                      0.0, tmpKK, blockSize);
        MyComm.SumAll(tmpKK, S, blockSize*blockSize);
        int iC;
        for (iC = 0; iC < blockSize; ++iC) {
          double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
          memcpy(Kpointer, S + iC*blockSize, blockSize*sizeof(double));
        }
      }
      timeLocalProj += MyWatch.WallTime();

      // Perform a spectral decomposition
      timeLocalSolve -= MyWatch.WallTime();
      int nevLocal = localSize + blockSize;
      info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0,
                                    nevLocal, S, dimSearch, theta, localVerbose, 10);
      timeLocalSolve += MyWatch.WallTime();

      if (info != 0) {
        // Stop as spectral decomposition has a critical failure
        if (info < 0) {
          criticalExit = true;
          break;
        }
        // Restart as spectral decomposition failed
        if (localVerbose > 0) {
          cout << " Iteration " << outerIter;
          cout << "- Failure for spectral decomposition - RESTART with new random search\n";
        }
        reStart = true;
        numRestart += 1;
        timeRestart -= MyWatch.WallTime();
        Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
        Xinit.Random();
        timeRestart += MyWatch.WallTime();
        nFound = blockSize;
        bStart = 0;
        break;
      } // if (info != 0)

      // Update the search space
      // Note: Use KX as a workspace
      timeLocalUpdate -= MyWatch.WallTime();
      callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
                    S, dimSearch, 0.0, KX.Values(), xr);
      timeLocalUpdate += MyWatch.WallTime();

      // Apply the mass matrix for the next block
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(KX, MX);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      // Apply the stiffness matrix for the next block
      timeStifOp -= MyWatch.WallTime();
      K->Apply(KX, R);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

      // Form the residuals
      timeResidual -= MyWatch.WallTime();
      if (M) {
        for (j = 0; j < blockSize; ++j) {
          callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
        }
      }
      else {
        // Note KX contains the updated block
        for (j = 0; j < blockSize; ++j) {
          callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr);
        }
      }
      timeResidual += MyWatch.WallTime();
      residual += blockSize;

      // Compute the norm of residuals
      timeNorm -= MyWatch.WallTime();
      if (vectWeight) {
        R.NormWeighted(*vectWeight, normR);
      }
      else {
        R.Norm2(normR);
      }
      // Scale the norms of residuals with the eigenvalues
      // Count the number of converged eigenvectors
      nFound = 0;
      for (j = 0; j < blockSize; ++j) {
        normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
        if (normR[j] < tolEigenSolve)
          nFound += 1;
      } // for (j = 0; j < blockSize; ++j)
      timeNorm += MyWatch.WallTime();

      // Store the residual history
      if (localVerbose > 2) {
        memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
        spaceSizeHistory[historyCount] = localSize + blockSize;
        historyCount += 1;
      }
      maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
      sumSpaceSize += localSize + blockSize;

      // Print information on current iteration
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
        cout << knownEV + nFound << endl;
      } // if (localVerbose > 0)

      if (localVerbose > 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
        cout.precision(2);
        for (i=0; i<nevLocal; ++i) {
          cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
          cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);  
          cout << " = " << theta[i] << endl;
        }
        cout << endl;
      }

      // Exit the loop to treat the converged eigenvectors
      if (nFound > 0) {
        nb += 1;
        offSet = 0;
        break;
      }

      // Apply the preconditioner on the residuals
      // Note: Use KX as a workspace
      if (maxBlock == 1) {
        if (Prec) {
          timePrecOp -= MyWatch.WallTime();
          Prec->ApplyInverse(R, Xcurrent);
          timePrecOp += MyWatch.WallTime();
          precOp += blockSize;
        }
        else {
          memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*sizeof(double));
        }
        timeRestart -= MyWatch.WallTime();
        Xcurrent.Update(1.0, KX, -1.0);
        timeRestart += MyWatch.WallTime();
        break;
      } // if (maxBlock == 1)

      if (nb == maxBlock - 1) {
        nb += 1;
        break;
      }

      Epetra_MultiVector Xnext(View, X, knownEV+localSize+blockSize, blockSize);
      if (Prec) {
        timePrecOp -= MyWatch.WallTime();
        Prec->ApplyInverse(R, Xnext);
        timePrecOp += MyWatch.WallTime();
        precOp += blockSize;
      }
      else {
        memcpy(Xnext.Values(), R.Values(), blockSize*xr*sizeof(double));
      }

    } // for (nb = bStart; nb < maxBlock; ++nb)

    if (outerIter > maxIterEigenSolve)
      break;

    if (reStart == true) {
      reStart = false;
      continue;
    }

    if (criticalExit == true)
      break;

    // Store the final converged eigenvectors
    if (knownEV + nFound >= numEigen) {
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] < tolEigenSolve) {
          memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
          lambda[knownEV] = theta[j];
          knownEV += 1;
        }
      }
      if (localVerbose == 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
      }  
      break;
    } // if (knownEV + nFound >= numEigen)

    // Treat the particular case of 1 block
    if (maxBlock == 1) {
      if (nFound > 0) {
        double *Xpointer = X.Values() + (knownEV+nFound)*xr;
        nFound = 0;
        for (j = 0; j < blockSize; ++j) {
          if (normR[j] < tolEigenSolve) {
            memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
            lambda[knownEV] = theta[j];
            knownEV += 1;
            nFound += 1;
          }
          else {
            memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
          }
        }
        Epetra_MultiVector Xnext(View, X, knownEV + blockSize - nFound, nFound);
        Xnext.Random();
      }
      else {
        nFound = blockSize;
      }
      continue;
    }

    // Define the restarting block when maxBlock > 1
    if (nFound > 0) {
      int firstIndex = blockSize;
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] >= tolEigenSolve) {
          firstIndex = j;
          break;
        }
      } // for (j = 0; j < blockSize; ++j)
      while (firstIndex < nFound) {
        for (j = firstIndex; j < blockSize; ++j) {
          if (normR[j] < tolEigenSolve) {
            // Swap the j-th and firstIndex-th position
            callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
            callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
            callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
            break;
          }
        } // for (j = firstIndex; j < blockSize; ++j)
        for (j = 0; j < blockSize; ++j) {
          if (normR[j] >= tolEigenSolve) {
            firstIndex = j;
            break;
          }
        } // for (j = 0; j < blockSize; ++j)
      } // while (firstIndex < nFound)

      // Copy the converged eigenvalues
      memcpy(lambda + knownEV, theta, nFound*sizeof(double));

    } // if (nFound > 0)

    // Define the restarting size
    bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;

    // Define the restarting space and local stiffness
    timeRestart -= MyWatch.WallTime();
    memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
    for (j = 0; j < bStart*blockSize; ++j) {
      KK[j + j*dimSearch] = theta[j + nFound];
    }
    // Form the restarting space
    int oldCol = nb*blockSize;
    int newCol = nFound + (bStart+1)*blockSize;
    newCol = (newCol > oldCol) ? oldCol : newCol;
    callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
    callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
                      X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info);
    timeRestart += MyWatch.WallTime();

    if (nFound == 0)
      offSet += 1;

    knownEV += nFound;
    maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);

    // Put random vectors if the Rayleigh Ritz vectors are not enough
    newCol = nFound + (bStart+1)*blockSize;
    if (newCol > oldCol) {
      Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
      Xnext.Random();
      continue;
    }

    nFound = 0;

  } // while (outerIter <= maxIterEigenSolve)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Clean memory
  delete[] work1;
  delete[] work2;
  if (vectWeight)
    delete vectWeight;

  // Sort the eigenpairs
  timePostProce -= MyWatch.WallTime();
  if ((info == 0) && (knownEV > 0)) {
    mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
  }
  timePostProce += MyWatch.WallTime();

  return (info == 0) ? knownEV : info;

}
Esempio n. 29
0
int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
    Teuchos::GlobalMPISession mpiSession(&argc, &argv, 0);
    Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
    Epetra_SerialComm Comm;
#endif
    int nProcs, myPID ;
    Teuchos::ParameterList pLUList ;        // ParaLU parameters
    Teuchos::ParameterList isoList ;        // Isorropia parameters
    Teuchos::ParameterList shyLUList ;    // shyLU parameters
    Teuchos::ParameterList ifpackList ;    // shyLU parameters
    string ipFileName = "ShyLU.xml";       // TODO : Accept as i/p

    nProcs = mpiSession.getNProc();
    myPID = Comm.MyPID();

    if (myPID == 0)
    {
        cout <<"Parallel execution: nProcs="<< nProcs << endl;
    }

    // =================== Read input xml file =============================
    Teuchos::updateParametersFromXmlFile(ipFileName, &pLUList);
    isoList = pLUList.sublist("Isorropia Input");
    shyLUList = pLUList.sublist("ShyLU Input");
    shyLUList.set("Outer Solver Library", "AztecOO");
    // Get matrix market file name
    string MMFileName = Teuchos::getParameter<string>(pLUList, "mm_file");
    string prec_type = Teuchos::getParameter<string>(pLUList, "preconditioner");
    int maxiters = Teuchos::getParameter<int>(pLUList, "Outer Solver MaxIters");
    double tol = Teuchos::getParameter<double>(pLUList, "Outer Solver Tolerance");
    string rhsFileName = pLUList.get<string>("rhs_file", "");

    if (myPID == 0)
    {
        cout << "Input :" << endl;
        cout << "ParaLU params " << endl;
        pLUList.print(std::cout, 2, true, true);
        cout << "Matrix market file name: " << MMFileName << endl;
    }

    // ==================== Read input Matrix ==============================
    Epetra_CrsMatrix *A;
    Epetra_MultiVector *b1;

    int err = EpetraExt::MatrixMarketFileToCrsMatrix(MMFileName.c_str(), Comm,
                                                        A);
    //EpetraExt::MatlabFileToCrsMatrix(MMFileName.c_str(), Comm, A);
    //assert(err != 0);
    //cout <<"Done reading the matrix"<< endl;
    int n = A->NumGlobalRows();
    //cout <<"n="<< n << endl;

    // Create input vectors
    Epetra_Map vecMap(n, 0, Comm);
    if (rhsFileName != "")
    {
        err = EpetraExt::MatrixMarketFileToMultiVector(rhsFileName.c_str(),
                                         vecMap, b1);
    }
    else
    {
        b1 = new Epetra_MultiVector(vecMap, 1, false);
        b1->PutScalar(1.0);
    }

    Epetra_MultiVector x(vecMap, 1);
    //cout << "Created the vectors" << endl;

    // Partition the matrix with hypergraph partitioning and redisstribute
    Isorropia::Epetra::Partitioner *partitioner = new
                            Isorropia::Epetra::Partitioner(A, isoList, false);
    partitioner->partition();
    Isorropia::Epetra::Redistributor rd(partitioner);

    Epetra_CrsMatrix *newA;
    Epetra_MultiVector *newX, *newB; 
    rd.redistribute(*A, newA);
    delete A;
    A = newA;

    rd.redistribute(x, newX);
    rd.redistribute(*b1, newB);

    Epetra_LinearProblem problem(A, newX, newB);

    AztecOO solver(problem);

    ifpackList ;
    Ifpack_Preconditioner *prec;
    ML_Epetra::MultiLevelPreconditioner *MLprec;
    if (prec_type.compare("ShyLU") == 0)
    {
        prec = new Ifpack_ShyLU(A);
        prec->SetParameters(shyLUList);
        prec->Initialize();
        prec->Compute();
        //(dynamic_cast<Ifpack_ShyLU *>(prec))->JustTryIt();
        //cout << " Going to set it in solver" << endl ;
        solver.SetPrecOperator(prec);
        //cout << " Done setting the solver" << endl ;
    }
    else if (prec_type.compare("ILU") == 0)
    {
        ifpackList.set( "fact: level-of-fill", 1 );
        prec = new Ifpack_ILU(A);
        prec->SetParameters(ifpackList);
        prec->Initialize();
        prec->Compute();
        solver.SetPrecOperator(prec);
    }
    else if (prec_type.compare("ILUT") == 0)
    {
        ifpackList.set( "fact: ilut level-of-fill", 2 );
        ifpackList.set( "fact: drop tolerance", 1e-8);
        prec = new Ifpack_ILUT(A);
        prec->SetParameters(ifpackList);
        prec->Initialize();
        prec->Compute();
        solver.SetPrecOperator(prec);
    }
    else if (prec_type.compare("ML") == 0)
    {
        Teuchos::ParameterList mlList; // TODO : Take it from i/p
        MLprec = new ML_Epetra::MultiLevelPreconditioner(*A, mlList, true);
        solver.SetPrecOperator(MLprec);
    }

    solver.SetAztecOption(AZ_solver, AZ_gmres);
    solver.SetMatrixName(333);
    //solver.SetAztecOption(AZ_output, 1);
    //solver.SetAztecOption(AZ_conv, AZ_Anorm);
    //cout << "Going to iterate for the global problem" << endl;

    solver.Iterate(maxiters, tol);

    // compute ||Ax - b||
    double Norm;
    Epetra_MultiVector Ax(vecMap, 1);

    Epetra_MultiVector *newAx; 
    rd.redistribute(Ax, newAx);
    A->Multiply(false, *newX, *newAx);
    newAx->Update(1.0, *newB, -1.0);
    newAx->Norm2(&Norm);
    double ANorm = A->NormOne();

    cout << "|Ax-b |/|A| = " << Norm/ANorm << endl;

    delete newAx;
    if (prec_type.compare("ML") == 0)
    {
        delete MLprec;
    }
    else
    {
        delete prec;
    }

    delete b1;
    delete newX;
    delete newB;
    delete A;
    delete partitioner;
}
int ShyLU_Probing_Operator::Apply(const Epetra_MultiVector &X,
            Epetra_MultiVector &Y) const
{
#ifdef TIMING_OUTPUT
    apply_time_->start();
#endif

    int nvectors = X.NumVectors();
    bool local = (C_->Comm().NumProc() == 1);
    int err;
    //cout << "No of colors after probing" << nvectors << endl;

#ifdef TIMING_OUTPUT
    matvec_time_->start();
#endif

    err = G_->Multiply(false, X, *temp2);
    assert(err == 0);
    if (!local)
        err = C_->Multiply(false, X, *temp);
    else
    {
        // localize X
        double *values;
        int mylda;
        X.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(X.Map().NumMyElements(), X.Map().NumMyElements(),
                   X.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Xl(View, SerialMap, values, mylda, X.NumVectors());
       err = C_->Multiply(false, Xl, *temp);
    }
    assert(err == 0);

#ifdef TIMING_OUTPUT
    matvec_time_->stop();
#endif

    int nrows = C_->RowMap().NumMyElements();

#ifdef DEBUG
    cout << "DEBUG MODE" << endl;
    assert(nrows == localDRowMap_->NumGlobalElements());

    int gids[nrows], gids1[nrows];
    C_->RowMap().MyGlobalElements(gids);
    localDRowMap_->MyGlobalElements(gids1);

    for (int i = 0; i < nrows; i++)
    {
       assert(gids[i] == gids1[i]);
    }
#endif

#ifdef TIMING_OUTPUT
    localize_time_->start();
#endif

    //int err;
    int lda;
    double *values;
    if (!local)
    {
        err = temp->ExtractView(&values, &lda);
        assert (err == 0);

        // copy to local vector //TODO: OMP parallel
        assert(lda == nrows);

    //#pragma omp parallel for shared(nvectors, nrows, values)
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = ltemp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    localize_time_->stop();
    trisolve_time_->start();
#endif

    if (!local)
    {
        LP_->SetRHS(ltemp.getRawPtr());
    }
    else
    {
        //LP_->SetRHS(temp.getRawPtr());
    }
    //LP_->SetLHS(localX.getRawPtr());

    //TODO: Why not just in Reset(). Check the distr path.
    ssym_->OrigLP->SetLHS(localX.getRawPtr());
    ssym_->OrigLP->SetRHS(temp.getRawPtr());
    ssym_->ReIdx_LP->fwd();
    solver_->Solve();

#ifdef TIMING_OUTPUT
    trisolve_time_->stop();
    dist_time_->start();
#endif

    if (!local)
    {
        err = localX->ExtractView(&values, &lda);
        assert (err == 0);

        //Copy back to dist vector //TODO: OMP parallel
    //#pragma omp parallel for
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = temp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    dist_time_->stop();
    matvec2_time_->start();
#endif

    if (!local)
    {
        R_->Multiply(false, *temp, Y);
    }
    else
    {
        // Should Y be localY in Multiply and then exported to Y ?? TODO:
        // Use view mode ?
        double *values;
        int mylda;
        Y.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(Y.Map().NumMyElements(), Y.Map().NumMyElements(),
                   Y.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Yl(View, SerialMap, values, mylda, Y.NumVectors());
        R_->Multiply(false, *localX, Yl);
    }

#ifdef TIMING_OUTPUT
    matvec2_time_->stop();
    update_time_->start();
#endif

    err = Y.Update(1.0, *temp2, -1.0);
    //cout << Y.MyLength() << " " << temp2.MyLength() << endl;
    assert(err == 0);

#ifdef TIMING_OUTPUT
    update_time_->stop();
    apply_time_->stop();
#endif
    cntApply++;
    return 0;
}