Esempi in C++ (Cpp) per Epetra_MultiVector

Linguaggio di programmazione: C++ (Cpp)

Classe/tipologia: Epetra_MultiVector

Esempi su hotexamples.com: 30

Epetra_MultiVector in C++ (Cpp): 30 esempi trovati. Questi sono i migliori esempi reali in C++ (Cpp) per Epetra_MultiVector, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

PutScalar(30)

NumVectors(30)

Map(30)

MyLength(29)

Update(23)

ExtractView(21)

Values(16)

Pointers(15)

Export(14)

GlobalLength(13)

Multiply(9)

Scale(7)

Comm(6)

Random(4)

Norm2(4)

Import(4)

ConstantStride(3)

Dot(2)

MaxValue(1)

GlobalLength64(1)

ReplaceGlobalValue(1)

ReplaceMyValue(1)

NormInf(1)

SetSeed(1)

Stride(1)

DistributedGlobal(1)

ExtractCopy(1)

Esempio n. 1

Mostra file

File: cxx_main.cpp Progetto: 00liujj/trilinos

int main(int argc, char *argv[])
{
  int ierr = 0;
  double elapsed_time;
  double total_flops;
  double MFLOPs;


#ifdef EPETRA_MPI

  // Initialize MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm comm;
#endif

  bool verbose = false;
  bool summary = false;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;

  if(argc < 6) {
    cerr << "Usage: " << argv[0]
         << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
         << "where:" << endl
         << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
         << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
         << "NumProcX          - Number of processors to use in X direction" << endl
         << "NumProcY          - Number of processors to use in Y direction" << endl
         << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
         << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
         << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
	 << " Serial example:" << endl
         << argv[0] << " 16 12 1 1 25 -v" << endl
	 << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
	 << " MPI example:" << endl
         << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
	 << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
	 << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
         << endl;
    return(1);

  }
    //char tmp;
    //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
    //if (comm.MyPID()==0) cin >> tmp;
    //comm.Barrier();

  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose && comm.MyPID()==0)
    cout << Epetra_Version() << endl << endl;
  if (summary && comm.MyPID()==0) {
    if (comm.NumProc()==1)
      cout << Epetra_Version() << endl << endl;
    else
      cout << endl << endl; // Print two blank line to keep output columns lined up
  }

  if (verbose) cout << comm <<endl;


  // Redefine verbose to only print on PE 0

  if (verbose && comm.MyPID()!=0) verbose = false;
  if (summary && comm.MyPID()!=0) summary = false;

  int numNodesX = atoi(argv[1]);
  int numNodesY = atoi(argv[2]);
  int numProcsX = atoi(argv[3]);
  int numProcsY = atoi(argv[4]);
  int numPoints = atoi(argv[5]);

  if (verbose || (summary && comm.NumProc()==1)) {
    cout << " Number of local nodes in X direction  = " << numNodesX << endl
	 << " Number of local nodes in Y direction  = " << numNodesY << endl
	 << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
	 << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
	 << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
	 << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
	 << " Number of Processors in X direction   = " << numProcsX << endl
	 << " Number of Processors in Y direction   = " << numProcsY << endl
	 << " Number of Points in stencil           = " << numPoints << endl << endl;
  }
  // Print blank line to keep output columns lined up
  if (summary && comm.NumProc()>1)
    cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;

  if (numProcsX*numProcsY!=comm.NumProc()) {
    cerr << "Number of processors = " << comm.NumProc() << endl
	 << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
    return(1);
  }

  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
    cerr << "Number of points specified = " << numPoints << endl
	 << " is not 5, 9, 25" << endl << endl;
    return(1);
  }

  if (numNodesX*numNodesY<=0) {
    cerr << "Product of number of nodes is <= zero" << endl << endl;
    return(1);
  }

  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
  if (numPoints==5) {

     // Generate a 5-point 2D Finite Difference matrix
    Xoff.Size(5);
    Yoff.Size(5);
    Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0;
    Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;

     // Generate a 2-point 2D Lower triangular Finite Difference matrix
    XLoff.Size(2);
    YLoff.Size(2);
    XLoff[0] = -1; XLoff[1] =  0;
    YLoff[0] =  0; YLoff[1] = -1;

     // Generate a 3-point 2D upper triangular Finite Difference matrix
    XUoff.Size(3);
    YUoff.Size(3);
    XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0;
    YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
  }
  else if (numPoints==9) {
    // Generate a 9-point 2D Finite Difference matrix
    Xoff.Size(9);
    Yoff.Size(9);
    Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1;
    Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1;
    Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1;
    Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0;
    Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1;
    Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1;

    // Generate a 5-point lower triangular 2D Finite Difference matrix
    XLoff.Size(5);
    YLoff.Size(5);
    XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1;
    YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1;
    XLoff[3] = -1;  XLoff[4] =  0;
    YLoff[3] =  0;  YLoff[4] =  0;

    // Generate a 4-point upper triangular 2D Finite Difference matrix
    XUoff.Size(4);
    YUoff.Size(4);
    XUoff[0] =  1;
    YUoff[0] =  0;
    XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1;
    YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1;

  }
  else {
    // Generate a 25-point 2D Finite Difference matrix
    Xoff.Size(25);
    Yoff.Size(25);
    int xi = 0, yi = 0;
    int xo = -2, yo = -2;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;

    // Generate a 13-point lower triangular 2D Finite Difference matrix
    XLoff.Size(13);
    YLoff.Size(13);
    xi = 0, yi = 0;
    xo = -2, yo = -2;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;

    // Generate a 13-point upper triangular 2D Finite Difference matrix
    XUoff.Size(13);
    YUoff.Size(13);
    xi = 0, yi = 0;
    xo = 0, yo = 0;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;

  }

  Epetra_Map * map;
  Epetra_Map * mapL;
  Epetra_Map * mapU;
  Epetra_CrsMatrix * A;
  Epetra_CrsMatrix * L;
  Epetra_CrsMatrix * U;
  Epetra_MultiVector * b;
  Epetra_MultiVector * bt;
  Epetra_MultiVector * xexact;
  Epetra_MultiVector * bL;
  Epetra_MultiVector * btL;
  Epetra_MultiVector * xexactL;
  Epetra_MultiVector * bU;
  Epetra_MultiVector * btU;
  Epetra_MultiVector * xexactU;
  Epetra_SerialDenseVector resvec(0);

  //Timings
  Epetra_Flops flopcounter;
  Epetra_Time timer(comm);

#ifdef EPETRA_VERY_SHORT_PERFTEST
  int jstop = 1;
#elif EPETRA_SHORT_PERFTEST
  int jstop = 1;
#else
  int jstop = 2;
#endif
  for (int j=0; j<jstop; j++) {
    for (int k=1; k<17; k++) {
#ifdef EPETRA_VERY_SHORT_PERFTEST
      if (k<3 || (k%4==0 && k<9)) {
#elif EPETRA_SHORT_PERFTEST
      if (k<6 || k%4==0) {
#else
      if (k<7 || k%2==0) {
#endif
      int nrhs=k;
      if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";

      bool StaticProfile = (j!=0);
      if (verbose) {
        if (StaticProfile) cout << " static profile\n";
        else cout << " dynamic profile\n";
      }
      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
			 Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
			 map, A, b, bt, xexact, StaticProfile, false);


#ifdef EPETRA_HAVE_JADMATRIX

      timer.ResetStartTime();
      Epetra_JadMatrix JA(*A);
      elapsed_time = timer.ElapsedTime();
      if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;

      //cout << "A = " << *A << endl;
      //cout << "JA = " << JA << endl;

      runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);

#endif
      runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);

      delete A;
      delete b;
      delete bt;
      delete xexact;

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
			 XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
			 mapL, L, bL, btL, xexactL, StaticProfile, true);


      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
			 XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
			 mapU, U, bU, btU, xexactU, StaticProfile, true);


      runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);

      delete L;
      delete bL;
      delete btL;
      delete xexactL;
      delete mapL;

      delete U;
      delete bU;
      delete btU;
      delete xexactU;
      delete mapU;

      Epetra_MultiVector q(*map, nrhs);
      Epetra_MultiVector z(q);
      Epetra_MultiVector r(q);

      delete map;
      q.SetFlopCounter(flopcounter);
      z.SetFlopCounter(q);
      r.SetFlopCounter(q);

      resvec.Resize(nrhs);


      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 norms
      for( int i = 0; i < 10; ++i )
	q.Norm2( resvec.Values() );

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Norm2" << '\t';
	cout << MFLOPs << endl;
      }

      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Dot(z, resvec.Values());

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "DotProd" << '\t';
	cout << MFLOPs << endl;
      }

      flopcounter.ResetFlops();
      timer.ResetStartTime();

      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Update(1.0, z, 1.0, r, 0.0);

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Update" << '\t';
	cout << MFLOPs << endl;
      }
    }
    }
  }
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return ierr ;
}

// Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
//
// nx      (In) - number of grid points in x direction
// ny      (In) - number of grid points in y direction
//   The total number of equations will be nx*ny ordered such that the x direction changes
//   most rapidly:
//      First equation is at point (0,0)
//      Second at                  (1,0)
//       ...
//      nx equation at             (nx-1,0)
//      nx+1st equation at         (0,1)

// numPoints (In) - number of points in finite difference stencil
// xoff    (In) - stencil offsets in x direction (of length numPoints)
// yoff    (In) - stencil offsets in y direction (of length numPoints)
//   A standard 5-point finite difference stencil would be described as:
//     numPoints = 5
//     xoff = [-1, 1, 0,  0, 0]
//     yoff = [ 0, 0, 0, -1, 1]

// nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed

// comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
// map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
// A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
//                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
//                diagonal will be slightly diag dominant.
// b      (Out) - Generated RHS.  Values satisfy b = A*xexact
// bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
// xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact

// Note: Caller of this function is responsible for deleting all output objects.

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_Vector *& b,
			Epetra_Vector *& bt,
			Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_MultiVector * b1, * bt1, * xexact1;
	
  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
		     xoff, yoff, 1, comm, verbose, summary,
		     map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);

  b = dynamic_cast<Epetra_Vector *>(b1);
  bt = dynamic_cast<Epetra_Vector *>(bt1);
  xexact = dynamic_cast<Epetra_Vector *>(xexact1);

  return;
}

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff, int nrhs,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_MultiVector *& b,
			Epetra_MultiVector *& bt,
			Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_Time timer(comm);
  // Determine my global IDs
  long long * myGlobalElements;
  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);

  int numMyEquations = numNodesX*numNodesY;

  map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
  delete [] myGlobalElements;

  long long numGlobalEquations = map->NumGlobalElements64();

  int profile = 0; if (StaticProfile) profile = numPoints;

#ifdef EPETRA_HAVE_STATICPROFILE

  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix

#else

  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
  else
    A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix

#endif

  long long * indices = new long long[numPoints];
  double * values = new double[numPoints];

  double dnumPoints = (double) numPoints;
  int nx = numNodesX*numProcsX;

  for (int i=0; i<numMyEquations; i++) {

    long long rowID = map->GID64(i);
    int numIndices = 0;

    for (int j=0; j<numPoints; j++) {
      long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
      if (colID>-1 && colID<numGlobalEquations) {
	indices[numIndices] = colID;
	double value = - ((double) rand())/ ((double) RAND_MAX);
	if (colID==rowID)
	  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
	else
	  values[numIndices++] = value;
      }
    }
    //cout << "Building row " << rowID << endl;
    A->InsertGlobalValues(rowID, numIndices, values, indices);
  }

  delete [] indices;
  delete [] values;
  double insertTime = timer.ElapsedTime();
  timer.ResetStartTime();
  A->FillComplete(false);
  double fillCompleteTime = timer.ElapsedTime();

  if (verbose)
    cout << "Time to insert matrix values = " << insertTime << endl
	 << "Time to complete fill        = " << fillCompleteTime << endl;
  if (summary) {
    if (comm.NumProc()==1) cout << "InsertTime" << '\t';
    cout << insertTime << endl;
    if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
    cout << fillCompleteTime << endl;
  }

  if (nrhs<=1) {
    b = new Epetra_Vector(*map);
    bt = new Epetra_Vector(*map);
    xexact = new Epetra_Vector(*map);
  }
  else {
    b = new Epetra_MultiVector(*map, nrhs);
    bt = new Epetra_MultiVector(*map, nrhs);
    xexact = new Epetra_MultiVector(*map, nrhs);
  }

  xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);
  A->Multiply(true, *xexact, *bt);

  return;
}

Esempio n. 2

Mostra file

File: Stokhos_EpetraMultiVectorOrthogPoly.cpp Progetto: haripandey/trilinos

void
Stokhos::EpetraMultiVectorOrthogPoly::
computeMean(Epetra_MultiVector& v) const
{
  v.Scale(1.0, *(coeff_[0]));
}

Esempio n. 3

Mostra file

File: MultiLevelPreconditioner_Reduce.cpp Progetto: haripandey/trilinos

int TestMultiLevelPreconditioner(char ProblemType[],
				 Teuchos::ParameterList & MLList,
				 Epetra_LinearProblem & Problem, double & TotalErrorResidual,
				 double & TotalErrorExactSol)
{
  
  Epetra_MultiVector* lhs = Problem.GetLHS();
  Epetra_MultiVector* rhs = Problem.GetRHS();
  Epetra_CrsMatrix* A = dynamic_cast<Epetra_CrsMatrix*>(Problem.GetMatrix());
  int PID = A->Comm().MyPID();
  int numProcs = A->Comm().NumProc();
  RCP<const Epetra_RowMatrix> Arcp = Teuchos::rcp(A, false);
  double n1, n2,nf;
  
  // ======================================== //
  // create a rhs corresponding to lhs or 1's //
  // ======================================== //
  
  lhs->PutScalar(1.0);
  A->Multiply(false,*lhs,*rhs);

  lhs->PutScalar(0.0);
  MLList.set("ML output", 0);

  RowMatrixToMatlabFile("mat_f.dat",*A);  
  MultiVectorToMatrixMarketFile("lhs_f.dat",*lhs,0,0,false);
  MultiVectorToMatrixMarketFile("rhs_f.dat",*rhs,0,0,false);

  
  Epetra_Time Time(A->Comm());
  /* Build the Zoltan list - Group #1 */
  ParameterList Zlist1,Sublist1;
  Sublist1.set("DEBUG_LEVEL","0");
  Sublist1.set("NUM_GLOBAL_PARTITIONS","2");
  Zlist1.set("Zoltan",Sublist1);
  
  /* Start Isorropia's Ninja Magic - Group #1 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner1 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist1);
  Isorropia::Epetra::Redistributor rd1(partitioner1);

  Teuchos::RCP<Epetra_CrsMatrix> ResA1=rd1.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX1=rd1.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB1=rd1.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW1;
  RW1.restrict_comm(ResA1);
  RestrictedMultiVectorWrapper RX1,RB1;
  RX1.restrict_comm(ResX1);
  RB1.restrict_comm(ResB1);

  /* Build the Zoltan list - Group #2 */
  ParameterList Zlist2,Sublist2;
  Sublist2.set("DEBUG_LEVEL","0");
  if(PID > 1) Sublist2.set("NUM_LOCAL_PARTITIONS","1");
  else Sublist2.set("NUM_LOCAL_PARTITIONS","0");
  Zlist2.set("Zoltan",Sublist2);
    
  /* Start Isorropia's Ninja Magic - Group #2 */
  RefCountPtr<Isorropia::Epetra::Partitioner> partitioner2 =
    Isorropia::Epetra::create_partitioner(Arcp, Zlist2);
  Isorropia::Epetra::Redistributor rd2(partitioner2);

  Teuchos::RCP<Epetra_CrsMatrix> ResA2=rd2.redistribute(*A);
  Teuchos::RCP<Epetra_MultiVector> ResX2=rd2.redistribute(*lhs);
  Teuchos::RCP<Epetra_MultiVector> ResB2=rd2.redistribute(*rhs);

  RestrictedCrsMatrixWrapper RW2;
  RW2.restrict_comm(ResA2);
  RestrictedMultiVectorWrapper RX2,RB2;
  RX2.restrict_comm(ResX2);
  RB2.restrict_comm(ResB2);

  if(RW1.RestrictedProcIsActive()){
    Teuchos::RCP<Epetra_CrsMatrix> SubA1 = RW1.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX1 = RX1.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB1 = RB1.RestrictedMultiVector();    
    ML_Epetra::MultiLevelPreconditioner * SubPrec1 = new ML_Epetra::MultiLevelPreconditioner(*SubA1, MLList, true);        

    Epetra_LinearProblem Problem1(&*SubA1,&*SubX1,&*SubB1);
    AztecOO solver1(Problem1);
    solver1.SetPrecOperator(SubPrec1);  
    solver1.SetAztecOption(AZ_solver, AZ_gmres);
    solver1.SetAztecOption(AZ_output, 32);
    solver1.SetAztecOption(AZ_kspace, 160);  
    solver1.Iterate(1550, 1e-12);
    delete SubPrec1;

  }
  else{
    Teuchos::RCP<Epetra_CrsMatrix> SubA2 = RW2.RestrictedMatrix();
    Teuchos::RCP<Epetra_MultiVector> SubX2 = RX2.RestrictedMultiVector();
    Teuchos::RCP<Epetra_MultiVector> SubB2 = RB2.RestrictedMultiVector();        
    ML_Epetra::MultiLevelPreconditioner * SubPrec2 = new ML_Epetra::MultiLevelPreconditioner(*SubA2, MLList, true);        
    
    Epetra_LinearProblem Problem2(&*SubA2,&*SubX2,&*SubB2);
    AztecOO solver2(Problem2);
    solver2.SetPrecOperator(SubPrec2);  
    solver2.SetAztecOption(AZ_solver, AZ_gmres);
    solver2.SetAztecOption(AZ_output, 32);
    solver2.SetAztecOption(AZ_kspace, 160);  
    solver2.Iterate(1550, 1e-12);
    delete SubPrec2;

  }

  /* Post-processing exports */
  Epetra_MultiVector ans1(*lhs), ans2(*lhs); 
  rd1.redistribute_reverse(*ResX1,ans1);
  rd2.redistribute_reverse(*ResX2,ans2);
  
  /* Run on Full Problem */
  A->Comm().Barrier();    
  ML_Epetra::MultiLevelPreconditioner * FullPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true);          
  AztecOO solverF(Problem);
  solverF.SetPrecOperator(FullPrec);  
  solverF.SetAztecOption(AZ_solver, AZ_gmres);
  solverF.SetAztecOption(AZ_output, 32);
  solverF.SetAztecOption(AZ_kspace, 160);  
  solverF.Iterate(1550, 1e-12);
  delete FullPrec;


  /* Solution Comparison */
  ans1.Update(1.0,*lhs,-1.0);
  ans2.Update(1.0,*lhs,-1.0);
  ans1.Norm2(&n1);
  ans2.Norm2(&n2);
  if(!PID) {
    printf("Norm Diff 1 = %6.4e\n",n1);
    printf("Norm Diff 2 = %6.4e\n",n2);
  }

  TotalErrorExactSol += n1 + n2;
    
  
}

Esempio n. 4

Mostra file

File: BlockPCGSolver.cpp Progetto: ChiahungTai/Trilinos

int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const {

  int xrow = X.MyLength();
  int xcol = X.NumVectors();
  int ycol = Y.NumVectors();

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);
  double *valX = X.Values();
  int NB = 3 + callLAPACK.ILAENV(1, "hetrd", "u", blkSize);
  int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize;
  int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD;

  bool useY = true;
  if (ycol % blkSize != 0) {
    // Allocate an extra block to store the solutions
    wSize += blkSize*xrow;
    useY = false;
  }

  if (lWorkSpace < wSize) {
    delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  // Array to store the matrix PtKP
  double *PtKP = pointer;
  pointer = pointer + blkSize*blkSize;

  // Array to store coefficient matrices
  double *coeff = pointer;
  pointer = pointer + blkSize*blkSize;

  // Workspace array
  double *workD = pointer;
  pointer = pointer + lworkD;

  // Array to store the eigenvalues of P^t K P
  double *da = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of right hand sides
  double *initNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the norms of residuals
  double *resNorm = pointer;
  pointer = pointer + blkSize;

  // Array to store the residuals
  double *valR = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize);

  // Array to store the preconditioned residuals
  double *valZ = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize);

  // Array to store the search directions
  double *valP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize);

  // Array to store the image of the search directions
  double *valKP = pointer;
  pointer = pointer + xrow*blkSize;
  Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize);

  // Pointer to store the solutions
  double *valSOL = (useY == true) ? Y.Values() : pointer;

  int iRHS;
  for (iRHS = 0; iRHS < xcol; iRHS += blkSize) {

    int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS;

    // Set the initial residuals to the right hand sides
    if (numVec < blkSize) {
      R.Random();
    }
    memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double));

    // Set the initial guess to zero
    valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL;
    Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize);
    SOL.PutScalar(0.0);

    int ii = 0;
    int iter = 0;
    int nFound = 0;

    R.Norm2(initNorm);

    if (localVerbose > 1) {
      std::cout << std::endl;
      std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << std::endl;
      if (localVerbose > 2) {
        std::fprintf(stderr,"\n");
        for (ii = 0; ii < numVec; ++ii) {
          std::cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << std::endl;
        }
        std::cout << std::endl;
      }
    }

    // Iteration loop
    for (iter = 1; iter <= iterMax; ++iter) {

      // Apply the preconditioner
      if (Prec)
        Prec->ApplyInverse(R, Z);
      else
        Z = R;

      // Define the new search directions
      if (iter == 1) {
        P = Z;
      }
      else {
        // Compute P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow,
                      0.0, workD, blkSize);
        MyComm.SumAll(workD, coeff, blkSize*blkSize);

        // Compute the coefficient (P^t K P)^{-1} P^t K Z
        callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                      0.0, workD, blkSize);
        for (ii = 0; ii < blkSize; ++ii)
          callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                      0.0, coeff, blkSize);

        // Update the search directions
        // Note: Use KP as a workspace
        memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double));
        callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize,
                      0.0, P.Values(), xrow);

        P.Update(1.0, Z, -1.0);

      } // if (iter == 1)

      K->Apply(P, KP);

      // Compute P^t K P
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, PtKP, blkSize*blkSize);

      // Eigenvalue decomposition of P^t K P
      callLAPACK.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info);
      if (info) {
        // Break the loop as spectral decomposition failed
        break;
      } // if (info)

      // Compute the pseudo-inverse of the eigenvalues
      for (ii = 0; ii < blkSize; ++ii) {
        TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative "
                           "eigenvalue for P^T K P: da[" << ii << "] = "
                           << da[ii] << ".");
        da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii];
      } // for (ii = 0; ii < blkSize; ++ii)

      // Compute P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow,
                    0.0, workD, blkSize);
      MyComm.SumAll(workD, coeff, blkSize*blkSize);

      // Compute the coefficient (P^t K P)^{-1} P^t R
      callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize,
                    0.0, workD, blkSize);
      for (ii = 0; ii < blkSize; ++ii)
        callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize);
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize,
                    0.0, coeff, blkSize);

      // Update the solutions
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize,
                    1.0, valSOL, xrow);

      // Update the residuals
      callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize,
                    1.0, R.Values(), xrow);

      // Check convergence
      R.Norm2(resNorm);
      nFound = 0;
      for (ii = 0; ii < numVec; ++ii) {
        if (resNorm[ii] <= tolCG*initNorm[ii])
          nFound += 1;
      }

      if (localVerbose > 1) {
        std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1;
        std::cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n";
        if (localVerbose > 2) {
          std::cout << std::endl;
          for (ii = 0; ii < numVec; ++ii) {
            std::cout << " ... ";
            std::cout.width(5);
            std::cout << ii << " ... Residual = ";
            std::cout.precision(2);
            std::cout.setf(std::ios::scientific, std::ios::floatfield);
            std::cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << std::endl;
          }
          std::cout << std::endl;
        }
      }

      if (nFound == numVec) {
        break;
      }

    }  // for (iter = 1; iter <= maxIter; ++iter)

    if (useY == false) {
      // Copy the solutions back into Y
      memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double));
    }

    numSolve += nFound;

    if (nFound == numVec) {
      minIter = (iter < minIter) ? iter : minIter;
      maxIter = (iter > maxIter) ? iter : maxIter;
      sumIter += iter;
    }

  } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize)

  return info;
}

Esempio n. 5

Mostra file

File: BlockDACG.cpp Progetto: Tech-XCorp/Trilinos

int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  // 
  //      K X = M X Lambda
  // 
  // using a Block Deflation Accelerated Conjugate Gradient algorithm.
  //
  // Note that if M is not specified, then  K X = X Lambda is solved.
  //
  // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the
  // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J
  // With the notations of this report, the coefficient beta is defined as 
  //                 diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} )
  // 
  // Input variables:
  // 
  // numEigen  (integer) = Number of eigenmodes requested
  // 
  // Q (Epetra_MultiVector) = Converged eigenvectors
  //                   The number of columns of Q must be equal to numEigen + blockSize.
  //                   The rows of Q are distributed across processors.
  //                   At exit, the first numEigen columns contain the eigenvectors requested.
  // 
  // lambda (array of doubles) = Converged eigenvalues
  //                   At input, it must be of size numEigen + blockSize.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  //
  // startingEV (integer) = Number of existing converged eigenmodes
  //
  // Return information on status of computation
  // 
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // 
  // // Failure due to input arguments
  // 
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  //
  // info = - 10 >> Failure during the mass orthonormalization
  // 
  // info = - 20 >> Error in LAPACK during the local eigensolve
  //
  // info = - 30 >> MEMORY
  //

  // Check the input parameters
  
  if (numEigen <= startingEV) {
    return startingEV;
  }

  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize);
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
  }

  int knownEV = startingEV;
  int localVerbose = verbose*(myPid==0);

  // Define local block vectors
  //
  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
  // KX = Working vectors (storing K*X)
  //
  // R = Residuals
  //
  // H = Preconditioned residuals
  //
  // P = Search directions
  // MP = Working vectors (storing M*P if M is specified, else pointing to P)
  // KP = Working vectors (storing K*P)

  int xr = Q.MyLength();
  Epetra_MultiVector X(View, Q, numEigen, blockSize);
  X.Random();

  int tmp;
  tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr;

  double *work1 = new (nothrow) double[tmp]; 
  if (work1 == 0) {
    if (vectWeight)
      delete vectWeight;
    info = -30;
    return info;
  }
  memRequested += sizeof(double)*tmp/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double *tmpD = work1;

  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
  tmpD = (M) ? tmpD + xr*blockSize : tmpD;

  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize);

  // Define arrays
  //
  // theta = Store the local eigenvalues (size: 2*blockSize)
  // normR = Store the norm of residuals (size: blockSize)
  //
  // oldHtR = Store the previous H_i^T*R_i    (size: blockSize)
  // currentHtR = Store the current H_i^T*R_i (size: blockSize)
  //
  // MM = Local mass matrix              (size: 2*blockSize x 2*blockSize)
  // KK = Local stiffness matrix         (size: 2*blockSize x 2*blockSize)
  //
  // S = Local eigenvectors              (size: 2*blockSize x 2*blockSize)

  int lwork2;
  lwork2 = 5*blockSize + 12*blockSize*blockSize;
  double *work2 = new (nothrow) double[lwork2];
  if (work2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] work1;
    info = -30;
    return info;
  }

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  tmpD = work2;

  double *theta = tmpD;
  tmpD = tmpD + 2*blockSize;

  double *normR = tmpD;
  tmpD = tmpD + blockSize;

  double *oldHtR = tmpD;
  tmpD = tmpD + blockSize;
  
  double *currentHtR = tmpD;
  tmpD = tmpD + blockSize;
  memset(currentHtR, 0, blockSize*sizeof(double));
  
  double *MM = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *KK = tmpD;
  tmpD = tmpD + 4*blockSize*blockSize;

  double *S = tmpD;

  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
    if (resHistory == 0) {
      if (vectWeight)
        delete vectWeight;
      delete[] work1;
      delete[] work2;
      info = -30;
      return info;
    }
    historyCount = 0;
  }

  // Miscellaneous definitions

  bool reStart = false;
  numRestart = 0;

  int localSize;
  int twoBlocks = 2*blockSize;
  int nFound = blockSize;
  int i, j;

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M) 
      cout << "K*Q = M*Q D ";
    else
      cout << "K*Q = Q D ";
    if (Prec)
      cout << " with preconditioner";
    cout << endl;
    cout << " *|* Algorithm = DACG (block version)" << endl;
    cout << " *|* Size of blocks = " << blockSize << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.precision(2);
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    cout << " *|* Norm used for convergence: ";
    if (normWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
    else
      cout << "L^2-norm" << endl;
    if (startingEV > 0)
      cout << " *|* Input converged eigenvectors = " << startingEV << endl;
    cout << "\n -- Start iterations -- \n";
  }

  timeOuterLoop -= MyWatch.WallTime();
  for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    if ((outerIter == 1) || (reStart == true)) {

      reStart = false;
      localSize = blockSize;

      if (nFound > 0) {

        Epetra_MultiVector X2(View, X, blockSize-nFound, nFound);
        Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound);
        Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound);

        // Apply the mass matrix to X
        timeMassOp -= MyWatch.WallTime();
        if (M)
          M->Apply(X2, MX2);
        timeMassOp += MyWatch.WallTime();
        massOp += nFound;

        if (knownEV > 0) {
          // Orthonormalize X against the known eigenvectors with Gram-Schmidt
          // Note: Use R as a temporary work space
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          timeOrtho -= MyWatch.WallTime();
          info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values());
          timeOrtho += MyWatch.WallTime();
          // Exit the code if the orthogonalization did not succeed
          if (info < 0) {
            info = -10;
            delete[] work1;
            delete[] work2;
            if (vectWeight)
              delete vectWeight;
            return info;
          }
        }

        // Apply the stiffness matrix to X
        timeStifOp -= MyWatch.WallTime();
        K->Apply(X2, KX2);
        timeStifOp += MyWatch.WallTime();
        stifOp += nFound;

      } // if (nFound > 0)

    } // if ((outerIter == 1) || (reStart == true))
    else {

      // Apply the preconditioner on the residuals
      if (Prec != 0) {
        timePrecOp -= MyWatch.WallTime();
        Prec->ApplyInverse(R, H);
        timePrecOp += MyWatch.WallTime();
        precOp += blockSize;
      }
      else {
        memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double));
      }

      // Compute the product H^T*R
      timeSearchP -= MyWatch.WallTime();      
      memcpy(oldHtR, currentHtR, blockSize*sizeof(double));
      H.Dot(R, currentHtR);
      // Define the new search directions
      if (localSize == blockSize) {
        P.Scale(-1.0, H);
        localSize = twoBlocks;
      } // if (localSize == blockSize)
      else {
        bool hasZeroDot = false;
        for (j = 0; j < blockSize; ++j) {
          if (oldHtR[j] == 0.0) {
            hasZeroDot = true;
            break; 
          }
          callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr);
        }
        if (hasZeroDot == true) {
          // Restart the computation when there is a null dot product
          if (localVerbose > 0) {
            cout << endl;
            cout << " !! Null dot product -- Restart the search space !!\n";
            cout << endl;
          }
          if (blockSize == 1) {
            X.Random();
            nFound = blockSize;
          }
          else {
            Epetra_MultiVector Xinit(View, X, j, blockSize-j);
            Xinit.Random();
            nFound = blockSize - j;
          } // if (blockSize == 1)
          reStart = true;
          numRestart += 1;
          info = 0;
          continue;
        }
        callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values());
      } // if (localSize == blockSize)
      timeSearchP += MyWatch.WallTime();

      // Apply the mass matrix on P
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(P, MP);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      if (knownEV > 0) {
        // Orthogonalize P against the known eigenvectors
        // Note: Use R as a temporary work space
        Epetra_MultiVector copyQ(View, Q, 0, knownEV);
        timeOrtho -= MyWatch.WallTime();
        modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values());
        timeOrtho += MyWatch.WallTime();
      }

      // Apply the stiffness matrix to P
      timeStifOp -= MyWatch.WallTime();
      K->Apply(P, KP);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

    } // if ((outerIter == 1) || (reStart == true))

    // Form "local" mass and stiffness matrices
    // Note: Use S as a temporary workspace
    timeLocalProj -= MyWatch.WallTime();
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr,
                    KK, localSize, S);
    modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr,
                    MM, localSize, S);
    if (localSize > blockSize) {
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr,
                      KK + blockSize*localSize + blockSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize, localSize, S);
      modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr,
                      MM + blockSize*localSize + blockSize, localSize, S);
    } // if (localSize > blockSize)
    timeLocalProj += MyWatch.WallTime();

    // Perform a spectral decomposition
    timeLocalSolve -= MyWatch.WallTime();
    int nevLocal = localSize;
    info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal,
                                  S, localSize, theta, localVerbose,
                                  (blockSize == 1) ? 1: 0);
    timeLocalSolve += MyWatch.WallTime();

    if (info < 0) {
      // Stop when spectral decomposition has a critical failure
      break;
    }

    // Check for restarting
    if ((theta[0] < 0.0) || (nevLocal < blockSize)) {
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter;
        cout << "- Failure for spectral decomposition - RESTART with new random search\n";
      }
      if (blockSize == 1) {
        X.Random();
        nFound = blockSize;
      }
      else {
        Epetra_MultiVector Xinit(View, X, 1, blockSize-1);
        Xinit.Random();
        nFound = blockSize - 1;
      } // if (blockSize == 1)
      reStart = true;
      numRestart += 1;
      info = 0;
      continue;
    } // if ((theta[0] < 0.0) || (nevLocal < blockSize))

    if ((localSize == twoBlocks) && (nevLocal == blockSize)) {
      for (j = 0; j < nevLocal; ++j)
        memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double));
      localSize = blockSize;
    }

    // Check the direction of eigenvectors
    // Note: This sign check is important for convergence
    for (j = 0; j < nevLocal; ++j) {
      double coeff = S[j + j*localSize];
      if (coeff < 0.0)
        callBLAS.SCAL(localSize, -1.0, S + j*localSize);
    }

    // Compute the residuals
    timeResidual -= MyWatch.WallTime();
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr,
                  S, localSize, 0.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                    S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, theta[j], S + j*localSize);
    callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr,
                  S, localSize, 1.0, R.Values(), xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr,
                  S + blockSize, localSize, 1.0, R.Values(), xr);
    }
    for (j = 0; j < blockSize; ++j)
      callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize);
    timeResidual += MyWatch.WallTime();
    
    // Compute the norms of the residuals
    timeNorm -= MyWatch.WallTime();
    if (vectWeight)
      R.NormWeighted(*vectWeight, normR);
    else
      R.Norm2(normR);
    // Scale the norms of residuals with the eigenvalues
    // Count the converged eigenvectors
    nFound = 0;
    for (j = 0; j < blockSize; ++j) {
      normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
      if (normR[j] < tolEigenSolve) 
        nFound += 1;
    }
    timeNorm += MyWatch.WallTime();

    // Store the residual history
    if (localVerbose > 2) {
      memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
      historyCount += 1;
    }

    // Print information on current iteration
    if (localVerbose > 0) {
      cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
      cout << knownEV + nFound << endl;
    }

    if (localVerbose > 1) {
      cout << endl;
      cout.precision(2);
      cout.setf(ios::scientific, ios::floatfield);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
        cout << " = " << normR[i] << endl;
      }
      cout << endl;
      cout.precision(2);
      for (i=0; i<blockSize; ++i) {
        cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
        cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);
        cout << " = " << theta[i] << endl;
      }
      cout << endl;
    }

    if (nFound == 0) {
      // Update the spaces
      // Note: Use H as a temporary work space
      timeLocalUpdate -= MyWatch.WallTime();
      memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, X.Values(), xr);
      memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                    0.0, KX.Values(), xr);
      if (M) {
        memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double));
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize,
                      0.0, MX.Values(), xr);
      }
      if (localSize == twoBlocks) {
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, X.Values(), xr);
        callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr,
                      S + blockSize, localSize, 1.0, KX.Values(), xr);
        if (M) {
          callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr,
                      S + blockSize, localSize, 1.0, MX.Values(), xr);
        }
      } // if (localSize == twoBlocks)
      timeLocalUpdate += MyWatch.WallTime();
      // When required, monitor some orthogonalities
      if (verbose > 2) {
        if (knownEV == 0) {
          accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0);
        }
        else {
          Epetra_MultiVector copyQ(View, Q, 0, knownEV);
          accuracyCheck(&X, &MX, &R, &copyQ, (localSize>blockSize) ? &P : 0);
        }
      } // if (verbose > 2)
      continue;
    } // if (nFound == 0)

    // Order the Ritz eigenvectors by putting the converged vectors at the beginning
    int firstIndex = blockSize;
    for (j = 0; j < blockSize; ++j) {
      if (normR[j] >= tolEigenSolve) {
        firstIndex = j;
        break;
      }
    } // for (j = 0; j < blockSize; ++j)
    while (firstIndex < nFound) {
      for (j = firstIndex; j < blockSize; ++j) {
        if (normR[j] < tolEigenSolve) {
          // Swap the j-th and firstIndex-th position
          callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1);
          callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
          callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
          break;
        }
      } // for (j = firstIndex; j < blockSize; ++j)
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] >= tolEigenSolve) {
          firstIndex = j;
          break;
        }
      } // for (j = 0; j < blockSize; ++j)
    } // while (firstIndex < nFound)

    // Copy the converged eigenvalues
    memcpy(lambda + knownEV, theta, nFound*sizeof(double));

    // Convergence test
    if (knownEV + nFound >= numEigen) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                    S, localSize, 0.0, R.Values(), xr);
      if (localSize > blockSize) {
        callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                      S + blockSize, localSize, 1.0, R.Values(), xr);
      }
      memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double));
      knownEV += nFound;
      if (localVerbose == 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
      }
      break;
    }

    // Store the converged eigenvalues and eigenvectors
    callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr,
                  S, localSize, 0.0, Q.Values() + knownEV*xr, xr);
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr,
                    S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr);
    }
    knownEV += nFound;

    // Define the restarting vectors
    timeRestart -= MyWatch.WallTime();
    int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize;
    double *Snew = S + nFound*localSize;
    memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, X.Values(), xr);
    memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double));
    callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                  Snew, localSize, 0.0, KX.Values(), xr);
    if (M) {
      memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double));
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr,
                    Snew, localSize, 0.0, MX.Values(), xr);
    }
    if (localSize == twoBlocks) {
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr,
                    Snew+blockSize, localSize, 1.0, X.Values(), xr);
      callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr,
                    Snew+blockSize, localSize, 1.0, KX.Values(), xr);
      if (M) {
        callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr,
                      Snew+blockSize, localSize, 1.0, MX.Values(), xr);
      }
    } // if (localSize == twoBlocks)
    if (nevLocal < blockSize + nFound) {
      // Put new random vectors at the end of the block
      Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver);
      Xtmp.Random();
    }
    else {
      nFound = 0;
    } // if (nevLocal < blockSize + nFound)
    reStart = true;
    timeRestart += MyWatch.WallTime();

  } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Clean memory
  delete[] work1;
  delete[] work2;
  if (vectWeight)
    delete vectWeight;

  // Sort the eigenpairs
  timePostProce -= MyWatch.WallTime();
  if ((info == 0) && (knownEV > 0)) {
    mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
  }
  timePostProce += MyWatch.WallTime();

  return (info == 0) ? knownEV : info;

}

Esempio n. 6

Mostra file

File: Ifpack_PointRelaxation.cpp Progetto: KineticTheory/Trilinos

//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();
  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;

        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
      }
    }
    for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
      int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

      int NumEntries;
      int col;
      double diag = d_ptr[i];

      IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                               &Values[0], &Indices[0]));

      for (int m = 0 ; m < NumVectors ; ++m) {

        double dtemp = 0.0;
        for (int k = 0 ; k < NumEntries ; ++k) {

          col = Indices[k];
          dtemp += Values[k] * y2_ptr[m][col];
        }

        y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

      }
    }

    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
}

Esempio n. 7

Mostra file

File: EpetraExt_CrsSingletonFilter_LinearProblem.cpp Progetto: cakeisalie/oomphlib_003

//==============================================================================
int LinearProblem_CrsSingletonFilter::UpdateReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;

  if (Problem==0) EPETRA_CHK_ERR(-1); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-2); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-3); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-4); // Need a LHS
  if (!HaveReducedProblem_) EPETRA_CHK_ERR(-5); // Must have set up reduced problem

  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix
      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (indices global)
      int ierr = ReducedMatrix()->ReplaceGlobalValues(curGRID, NumEntries, 
						      Values, Indices);
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    // Otherwise if singleton row we explicitly eliminate this row and solve for corresponding X value
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	j = ColSingletonPivotLIDs_[ColSingletonCounter];
	double pivot = Values[j];
	if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	ColSingletonPivots_[ColSingletonCounter] = pivot;
	ColSingletonCounter++;
      }
    }
  }

  assert(ColSingletonCounter==NumMyColSingletons_); // Sanity test

  // Update Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_->PutScalar(0.0); // zero out Reduced LHS
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // Zero out temp space
  tempX_->PutScalar(0.0);
  tempB_->PutScalar(0.0);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_->PutScalar(0.0);
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));
    return(0);
}

Esempio n. 8

Mostra file

File: ModeLaplace3DQ2.cpp Progetto: haripandey/trilinos

int ModeLaplace3DQ2::eigenCheck(const Epetra_MultiVector &Q, double *lambda, 
                                double *normWeight, bool /*smallest*/) const {

  using std::cout;
  using std::ios;

  int info = 0;
  int qc = Q.NumVectors();
  int myPid = MyComm.MyPID();

  cout.precision(2);
  cout.setf(ios::scientific, ios::floatfield);

  // Check orthonormality of eigenvectors
  double tmp = myVerify.errorOrthonormality(&Q, M);
  if (myPid == 0)
    cout << " Maximum coefficient in matrix Q^T M Q - I = " << tmp << endl;

  // Print out norm of residuals
  myVerify.errorEigenResiduals(Q, lambda, K, M, normWeight);

  // Check the eigenvalues
  int numX = (int) ceil(sqrt(Lx*Lx*lambda[qc-1]/M_PI/M_PI));
  numX = (numX > 2*nX) ? 2*nX : numX;
  int numY = (int) ceil(sqrt(Ly*Ly*lambda[qc-1]/M_PI/M_PI));
  numY = (numY > 2*nY) ? 2*nY : numY;
  int numZ = (int) ceil(sqrt(Lz*Lz*lambda[qc-1]/M_PI/M_PI));
  numZ = (numZ > 2*nZ) ? 2*nZ : numZ;
  int newSize = (numX-1)*(numY-1)*(numZ-1);
  double *discrete = new (std::nothrow) double[2*newSize];
  if (discrete == 0) {
    return -1;
  }
  double *continuous = discrete + newSize;

  double hx = Lx/nX;
  double hy = Ly/nY;
  double hz = Lz/nZ;

  int i, j, k;
  for (k = 1; k < numZ; ++k) {
    // Compute the coefficient alphaz
    double cosk = cos(k*M_PI*hz/2/Lz);
    double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz));
    double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz);
    double c = -160.0*cosk;
    double delta = sqrt(b*b - 4*a*c);
    double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
    for (j = 1; j < numY; ++j) {
      // Compute the coefficient alphay
      double cosj = cos(j*M_PI*hy/2/Ly);
      a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly));
      b = 48.0 + 32.0*cos(j*M_PI*hy/Ly);
      c = -160.0*cosj;
      delta = sqrt(b*b - 4*a*c);
      double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
      for (i = 1; i < numX; ++i) {
        // Compute the coefficient alphax
        double cosi = cos(i*M_PI*hx/2/Lx);
        a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx));
        b = 48.0 + 32.0*cos(i*M_PI*hx/Lx);
        c = -160.0*cosi;
        delta = sqrt(b*b - 4*a*c);
        double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
        // Compute the continuous eigenvalue
        int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1);
        continuous[pos] = M_PI*M_PI*(i*i/(Lx*Lx) + j*j/(Ly*Ly) + k*k/(Lz*Lz));
        // Compute the discrete eigenvalue
        discrete[pos] = 240.0*(1.0-alphax*cosi)/((8.0+2*alphax*cosi)*(3.0*hx*hx));
        discrete[pos] += 240.0*(1.0-alphay*cosj)/((8.0+2*alphay*cosj)*(3.0*hy*hy));
        discrete[pos] += 240.0*(1.0-alphaz*cosk)/((8.0+2*alphaz*cosk)*(3.0*hz*hz));
      }
    }
  }

  // Sort the eigenvalues in ascending order
  mySort.sortScalars(newSize, continuous);

  int *used = new (std::nothrow) int[newSize];
  if (used == 0) {
    delete[] discrete;
    return -1;
  }

  mySort.sortScalars(newSize, discrete, used);

  int *index = new (std::nothrow) int[newSize];
  if (index == 0) {
    delete[] discrete;
    delete[] used;
    return -1;
  }

  for (i=0; i<newSize; ++i) {
    index[used[i]] = i;
  }
  delete[] used;

  int nMax = myVerify.errorLambda(continuous, discrete, newSize, lambda, qc);

  // Define the exact discrete eigenvectors
  int localSize = Map->NumMyElements();
  double *vQ = new (std::nothrow) double[(nMax+1)*localSize + nMax];
  if (vQ == 0) {
    delete[] discrete;
    delete[] index;
    info = -1;
    return info;
  }

  double *normL2 = vQ + (nMax+1)*localSize;
  Epetra_MultiVector Qex(View, *Map, vQ, localSize, nMax);

  if ((myPid == 0) && (nMax > 0)) {
    cout << endl;
    cout << " --- Relative discretization errors for exact eigenvectors ---" << endl;
    cout << endl;
    cout << "       Cont. Values   Disc. Values     Error      H^1 norm   L^2 norm\n";
  }

  for (k=1; k < numZ; ++k) {
    for (j=1; j < numY; ++j) {
      for (i=1; i < numX; ++i) {
        int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1);
        if (index[pos] < nMax) {
          int ii;
          for (ii=0; ii<localSize; ++ii) {
             // Compute the coefficient alphaz
            double cosk = cos(k*M_PI*hz/2/Lz);
            double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz));
            double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz);
            double c = -160.0*cosk;
            double delta = sqrt(b*b - 4*a*c);
            double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Compute the coefficient alphay
            double cosj = cos(j*M_PI*hy/2/Ly);
            a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly));
            b = 48.0 + 32.0*cos(j*M_PI*hy/Ly);
            c = -160.0*cosj;
            delta = sqrt(b*b - 4*a*c);
            double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Compute the coefficient alphax
            double cosi = cos(i*M_PI*hx/2/Lx);
            a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx));
            b = 48.0 + 32.0*cos(i*M_PI*hx/Lx);
            c = -160.0*cosi;
            delta = sqrt(b*b - 4*a*c);
            double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a;
            // Get the value for this eigenvector
            double coeff = sin(i*(M_PI/Lx)*x[ii])*sin(j*(M_PI/Ly)*y[ii])*sin(k*(M_PI/Lz)*z[ii]);
            if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx)
              coeff *= alphax;
            if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy)
              coeff *= alphay;
            if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz)
              coeff *= alphaz;
            Qex.ReplaceMyValue(ii, index[pos], coeff);
          }
          // Normalize Qex against the mass matrix
          Epetra_MultiVector MQex(View, *Map, vQ + nMax*localSize, localSize, 1);
          Epetra_MultiVector Qi(View, Qex, index[pos], 1);
          M->Apply(Qi, MQex);
          double mnorm = 0.0;
          Qi.Dot(MQex, &mnorm); 
          Qi.Scale(1.0/sqrt(mnorm));
          // Compute the L2 norm
          Epetra_MultiVector shapeInt(View, *Map, vQ + nMax*localSize, localSize, 1);
          for (ii=0; ii<localSize; ++ii) {
            double iX, iY, iZ;
            if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx)
              iX = 2.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))*
                   sqrt(2.0/Lx)*( 3*hx*i*(M_PI/Lx) - 4*sin(i*(M_PI/Lx)*hx) +
                                  cos(i*(M_PI/Lx)*hx)*hx*i*(M_PI/Lx) );
            else
              iX = 8.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))*
                   sqrt(2.0/Lx)*( 2*sin(i*(M_PI/Lx)*0.5*hx) - 
                                  cos(i*(M_PI/Lx)*0.5*hx)*hx*i*(M_PI/Lx));
            if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy)
              iY = 2.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))*
                   sqrt(2.0/Ly)*( 3*hy*j*(M_PI/Ly) - 4*sin(j*(M_PI/Ly)*hy) +
                                  cos(j*(M_PI/Ly)*hy)*hy*j*(M_PI/Ly) );
            else
              iY = 8.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))*
                   sqrt(2.0/Ly)*( 2*sin(j*(M_PI/Ly)*0.5*hy) - 
                                  cos(j*(M_PI/Ly)*0.5*hy)*hy*j*(M_PI/Ly));
            if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz)
              iZ = 2.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))*
                   sqrt(2.0/Lz)*( 3*hz*k*(M_PI/Lz) - 4*sin(k*(M_PI/Lz)*hz) +
                                  cos(k*(M_PI/Lz)*hz)*hz*k*(M_PI/Lz) );
            else
              iZ = 8.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))*
                   sqrt(2.0/Lz)*( 2*sin(k*(M_PI/Lz)*0.5*hz) - 
                                  cos(k*(M_PI/Lz)*0.5*hz)*hz*k*(M_PI/Lz));
            shapeInt.ReplaceMyValue(ii, 0, iX*iY*iZ);
          }
          Qi.Dot(shapeInt, normL2 + index[pos]);
        } // if index[pos] < nMax)
      } // for (i=1; i < numX; ++i)
    } // for (j=1; j < numY; ++j)
  } // for (k=1; k < numZ; ++k)

  if (myPid == 0) {
    for (i = 0; i < nMax; ++i) {
      double normH1 = continuous[i]*(1.0 - 2.0*normL2[i]) + discrete[i];
      normL2[i] = 2.0 - 2.0*normL2[i];
      normH1+= normL2[i];
      // Print out the result
      if (myPid == 0) {
        cout << " ";
        cout.width(4);
        cout << i+1 << ". ";
        cout.setf(ios::scientific, ios::floatfield);
        cout.precision(8);
        cout << continuous[i] << " " << discrete[i] << "  ";
        cout.precision(3);
        cout << fabs(discrete[i] - continuous[i])/continuous[i] << "  ";
        cout << sqrt(fabs(normH1)/(continuous[i]+1.0)) << "  ";
        cout << sqrt(fabs(normL2[i])) << endl;
      }
    } // for (i = 0; i < nMax; ++i)
  } // if (myPid == 0)

  delete[] discrete;
  delete[] index;

  // Check the angles between exact discrete eigenvectors and computed

  myVerify.errorSubspaces(Q, Qex, M);

  delete[] vQ;

  return info;
}

Esempio n. 9

Mostra file

File: LOCA_Epetra_Interface_MultiPoint.C Progetto: haripandey/trilinos

LOCA::Epetra::Interface::MultiPoint::
MultiPoint(
       const Teuchos::RCP<LOCA::Epetra::Interface::Required> &iReq_,
       const Teuchos::RCP< NOX::Epetra::Interface::Jacobian> &iJac_,
       const Epetra_MultiVector &splitMultiVec_, 
       const Teuchos::RCP<Epetra_RowMatrix> &splitJac_,
       const Teuchos::RCP<EpetraExt::MultiComm> &globalComm_) :
  iReq(iReq_),
  iJac(iJac_),
  splitJac(splitJac_), 
  globalComm(globalComm_),
  splitVec(*(splitMultiVec_(0))),
  splitRes(*(splitMultiVec_(0))), 
  jacobian(0), 
  solution(0),
  solutionOverlap(0), 
  overlapImporter(0), 
  timeStepsOnTimeDomain(splitMultiVec_.NumVectors()), 
  numTimeDomains(globalComm_->NumSubDomains()),
  timeDomain(globalComm_->SubDomainRank()), 
  conStep(0),
  rowStencil(0),
  rowIndex(0)
{

   if (globalComm->MyPID()==0) {
     // TODO: pass in globalData and use output stream
     cout  << "----------MultiPoint Partition Info------------"
           << "\n\tNumProcs              = " << globalComm->NumProc()
           << "\n\tSpatial Decomposition = " << splitMultiVec_.Comm().NumProc()
           << "\n\tNumber of Domains     = " << numTimeDomains
           << "\n\tSteps on Domain 0     = " << timeStepsOnTimeDomain
           << "\n\tTotal Number of Steps = " << globalComm->NumTimeSteps();
    cout   << "\n-----------------------------------------------" << endl;
    }

   // Construct global block matrix graph from split jacobian and stencil,
   // which is just diagonal in this case

   rowStencil = new std::vector< std::vector<int> >(timeStepsOnTimeDomain);
   rowIndex = new std::vector<int>;
   for (int i=0; i < timeStepsOnTimeDomain; i++) {
     (*rowStencil)[i].push_back(0);
     (*rowIndex).push_back(i + globalComm->FirstTimeStepOnDomain());
   }

   jacobian = new EpetraExt::BlockCrsMatrix(*splitJac, *rowStencil, 
					    *rowIndex, *globalComm);

   // Construct global solution vector, the overlap vector, 
   //and importer between them
   solution = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), 
					 jacobian->RowMap());
   solutionOverlap = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), 
						jacobian->ColMap());
  
   overlapImporter = new Epetra_Import(solutionOverlap->Map(), solution->Map());


   // Load initial guess into block solution vector
   for (int i=0; i < timeStepsOnTimeDomain; i++) 
           solution->LoadBlockValues(*(splitMultiVec_(i)), (*rowIndex)[i]);
}

Esempio n. 10

Mostra file

File: Stokhos_ApproxSchurComplementPreconditioner.cpp Progetto: 00liujj/trilinos

int 
Stokhos::ApproxSchurComplementPreconditioner::
ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const
{
#ifdef STOKHOS_TEUCHOS_TIME_MONITOR
  TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time");
#endif

  // We have to be careful if Input and Result are the same vector.
  // If this is the case, the only possible solution is to make a copy
  const Epetra_MultiVector *input = &Input;
  bool made_copy = false;
  if (Input.Values() == Result.Values()) {
    input = new Epetra_MultiVector(Input);
    made_copy = true;
  } 

  // Allocate temporary storage
  int m = input->NumVectors();
  if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m)
    rhs_block = 
      Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m));
  if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec)
    tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, 
					      m*max_num_mat_vec));
  j_ptr.resize(m*max_num_mat_vec);
  mj_indices.resize(m*max_num_mat_vec);
  
  // Extract blocks
  EpetraExt::BlockMultiVector input_block(View, *base_map, *input);
  EpetraExt::BlockMultiVector result_block(View, *base_map, Result);

  result_block.PutScalar(0.0);

  // Set right-hand-side to input_block
  rhs_block->Update(1.0, input_block, 0.0);

  // At level l, linear system has the structure
  // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ]
  // [ C_l     D_l ][ u_l^l     ]   [ r_l^l     ]

  for (int l=P; l>=1; l--) {
    // Compute D_l^{-1} r_l^l
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);

    // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l
    multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block);
  }

  // Solve A_0 u_0 = r_0
  divide_diagonal_block(0, 1, *rhs_block, result_block);

  for (int l=1; l<=P; l++) {
    // Compute r_l^l - C_l*u_l^{l-1}
    multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block);

    // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1})
    divide_diagonal_block(block_indices[l], block_indices[l+1], 
			  *rhs_block, result_block);
  }

  if (made_copy)
    delete input;

  return 0; 
}

Esempio n. 11

Mostra file

File: Albany_SolutionResponseFunction.cpp Progetto: ImmutableLtd/Albany

void
Albany::SolutionResponseFunction::
cullSolution(const Epetra_MultiVector& x, Epetra_MultiVector& x_culled) const
{
  x_culled.Import(x, *importer, Insert);
}

Esempio n. 12

Mostra file

File: Ifpack_Chebyshev.cpp Progetto: gitter-badger/quinoa

//==============================================================================
int Ifpack_Chebyshev::
ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  
  if (!IsComputed())
    IFPACK_CHK_ERR(-3);

  if (PolyDegree_ == 0)
    return 0;

  int nVec = X.NumVectors();
  int len = X.MyLength();
  if (nVec != Y.NumVectors())
    IFPACK_CHK_ERR(-2);

  Time_->ResetStartTime();

  // AztecOO gives X and Y pointing to the same memory location,
  // need to create an auxiliary vector, Xcopy
  Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0])
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
  else
    Xcopy = Teuchos::rcp( &X, false );

  double **xPtr = 0, **yPtr = 0;
  Xcopy->ExtractView(&xPtr);
  Y.ExtractView(&yPtr);

#ifdef HAVE_IFPACK_EPETRAEXT
  EpetraExt_PointToBlockDiagPermute* IBD=0;
  if (UseBlockMode_) IBD=&*InvBlockDiagonal_;
#endif
  

  //--- Do a quick solve when the matrix is identity
  double *invDiag=0;
  if(!UseBlockMode_) invDiag=InvDiagonal_->Values();
  if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) {
#ifdef HAVE_IFPACK_EPETRAEXT
    if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y);
    else
#endif
    if (nVec == 1) {
      double *yPointer = yPtr[0], *xPointer = xPtr[0];
      for (int i = 0; i < len; ++i)
        yPointer[i] = xPointer[i]*invDiag[i];
    }
    else {
      int i, k;
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i];
        for (k = 0; k < nVec; ++k)
          yPtr[k][i] = xPtr[k][i] * coeff;
      }
    } // if (nVec == 1)
    return 0;
  } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_))

  //--- Initialize coefficients
  // Note that delta stores the inverse of ML_Cheby::delta
  double alpha = LambdaMax_ / EigRatio_;
  double beta = 1.1 * LambdaMax_;
  double delta = 2.0 / (beta - alpha);
  double theta = 0.5 * (beta + alpha);
  double s1 = theta * delta;

  //--- Define vectors
  // In ML_Cheby, V corresponds to pAux and W to dk
  Epetra_MultiVector V(X);
  Epetra_MultiVector W(X);
#ifdef HAVE_IFPACK_EPETRAEXT
  Epetra_MultiVector Temp(X);
#endif
  
  double *vPointer = V.Values(), *wPointer = W.Values();

  double oneOverTheta = 1.0/theta;
  int i, j, k;


  //--- If solving normal equations, multiply RHS by A^T
  if(SolveNormalEquations_){
    Apply_Transpose(Operator_,Y,V);
    Y=V;
  }

  // Do the smoothing when block scaling is turned OFF
  // --- Treat the initial guess
  if (ZeroStartingSolution_ == false) {
    Operator_->Apply(Y, V);
    // Compute W = invDiag * ( X - V )/ Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0);
      IBD->ApplyInverse(Temp,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i)
        wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta;
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = (xPtr[k][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
    } // if (nVec == 1)
    // Update the vector Y
    Y.Update(1.0, W, 1.0);
  }
  else {
    // Compute W = invDiag * X / Theta
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      IBD->ApplyInverse(X,W);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(W,Temp);
	Apply_Transpose(Operator_,Temp,W);
      }

      W.Scale(oneOverTheta);
      Y.Update(1.0, W, 0.0);      
    }
    else
#endif
    if (nVec == 1) {
      double *xPointer = xPtr[0];
      for (i = 0; i < len; ++i){
        wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta;
      }
      memcpy(yPtr[0], wPointer, len*sizeof(double));
    }
    else {
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*oneOverTheta;
        double *wi = wPointer + i;
        for (k = 0; k < nVec; ++k) {
          *wi = xPtr[k][i] * coeff;
          wi = wi + len;
        }
      }
      for (k = 0; k < nVec; ++k)
        memcpy(yPtr[k], wPointer + k*len, len*sizeof(double));
    } // if (nVec == 1)
  } // if (ZeroStartingSolution_ == false)
  
  //--- Apply the polynomial
  double rhok = 1.0/s1, rhokp1;
  double dtemp1, dtemp2;
  int degreeMinusOne = PolyDegree_ - 1;
  if (nVec == 1) {
    double *xPointer = xPtr[0];
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //NTS: We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }

      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i)
        wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]);
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif

      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  }
  else {
    for (k = 0; k < degreeMinusOne; ++k) {
      Operator_->Apply(Y, V);
      rhokp1 = 1.0 / (2.0*s1 - rhok);
      dtemp1 = rhokp1 * rhok;
      dtemp2 = 2.0 * rhokp1 * delta;
      rhok = rhokp1;
      // Compute W = dtemp1 * W
      W.Scale(dtemp1);
      // Compute W = W + dtemp2 * invDiag * ( X - V )
#ifdef HAVE_IFPACK_EPETRAEXT    
    if(UseBlockMode_) {
      //We can clobber V since it will be reset in the Apply
      V.Update(dtemp2,X,-dtemp2);
      IBD->ApplyInverse(V,Temp);

      // Perform additional matvecs for normal equations
      // CMS: Testing this only in block mode FOR NOW
      if(SolveNormalEquations_){
	IBD->ApplyInverse(V,Temp);
	Apply_Transpose(Operator_,Temp,V);
      }


      W.Update(1.0,Temp,1.0);
    }
    else{
#endif
      for (i = 0; i < len; ++i) {
        double coeff = invDiag[i]*dtemp2;
        double *wi = wPointer + i, *vi = vPointer + i;
        for (j = 0; j < nVec; ++j) {
          *wi += (xPtr[j][i] - (*vi)) * coeff;
          wi = wi + len; vi = vi + len;
        }
      }
#ifdef HAVE_IFPACK_EPETRAEXT
    }
#endif      
      // Update the vector Y
      Y.Update(1.0, W, 1.0);
    } // for (k = 0; k < degreeMinusOne; ++k)
  } // if (nVec == 1)

  
  // Flops are updated in each of the following. 
  ++NumApplyInverse_;
  ApplyInverseTime_ += Time_->ElapsedTime();
  return(0);
}

Esempio n. 13

Mostra file

File: Amesos_Scalapack.cpp Progetto: 00liujj/trilinos

int Amesos_Scalapack::Solve() { 
  
  if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl;
  
  NumSolve_++;
  
  Epetra_MultiVector   *vecX = Problem_->GetLHS() ; 
  Epetra_MultiVector   *vecB = Problem_->GetRHS() ; 
  
  //
  //  Compute the number of right hands sides 
  //  (and check that X and B have the same shape) 
  //
  int nrhs; 
  if ( vecX == 0 ) { 
    nrhs = 0 ;
    EPETRA_CHK_ERR( vecB != 0 ) ; 
  } else { 
    nrhs = vecX->NumVectors() ; 
    EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; 
  }
  
  Epetra_MultiVector *ScalapackB =0;
  Epetra_MultiVector *ScalapackX =0;
  //
  //  Extract Scalapack versions of X and B 
  //
  double *ScalapackXvalues ;
  
  Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
  Time_->ResetStartTime(); // track time to broadcast vectors
  //
  //  Copy B to the scalapack version of B
  //
  const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap();
  Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; 
  
  Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap );
  ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ;
  ScalapackB = ScalapackBextract ; 
  ScalapackX = ScalapackXextract ; 
  
  VecTime_ += Time_->ElapsedTime();
  
  //
  //  Call SCALAPACKs PDGETRS to perform the solve
  //
  
  int DescX[10];  
  
  ScalapackX->Scale(1.0, *ScalapackB) ;  
  
  int ScalapackXlda ; 
  
  Time_->ResetStartTime(); // tract time to solve
  
  //
  //  Setup DescX 
  //
  
  if( nrhs > nb_ ) {
    EPETRA_CHK_ERR( -2 );  
  }
  
  int Ierr[1] ; 
  Ierr[0] = 0 ; 
  const int zero = 0 ; 
  const int one = 1 ; 
  if ( iam_ < nprow_ * npcol_ ) {
    assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; 
    
    if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = "  <<  ScalapackXlda 
		      << " lda_ = "  << lda_ 
		      << " nprow_ = "  << nprow_ 
		      << " npcol_ = "  << npcol_ 
		      << " myprow_ = "  << myprow_ 
		      << " mypcol_ = "  << mypcol_ 
		      << " iam_ = "  << iam_ << std::endl ;
    if (  TwoD_distribution_ )    assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; 
    
    DESCINIT_F77(DescX, 
		 &NumGlobalElements_, 
		 &nrhs, 
		 &nb_,
		 &nb_,
		 &zero,
		 &zero,
		 &ictxt_,
		 &lda_,
		 Ierr ) ;
    assert( Ierr[0] == 0 ) ; 
    
    //
    //  For the 1D data distribution, we factor the transposed 
    //  matrix, hence we must invert the sense of the transposition
    //
    char trans = 'N';
    if ( TwoD_distribution_ ) {
      if ( UseTranspose() ) trans = 'T' ;
    } else {
      
      if ( ! UseTranspose() ) trans = 'T' ;
    }
    
    if ( nprow_ * npcol_ == 1 ) { 
      DGETRS_F77(&trans,
		 &NumGlobalElements_,  
		 &nrhs, 
		 &DenseA_[0],
		 &lda_,
		 &Ipiv_[0],
		 ScalapackXvalues,
		 &lda_,
		 Ierr ) ;
    } else { 
      PDGETRS_F77(&trans,
		  &NumGlobalElements_,  
		  &nrhs, 
		  &DenseA_[0],
		  &one,
		  &one, 
		  DescA_,
		  &Ipiv_[0],
		  ScalapackXvalues,
		  &one,
		  &one, 
		  DescX,
		  Ierr ) ;
    }
  }
  
  SolTime_ += Time_->ElapsedTime();
  
  Time_->ResetStartTime();  // track time to broadcast vectors
  //
  //  Copy X back to the original vector
  // 
  Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ );
  vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ;
  delete ScalapackBextract ;
  delete ScalapackXextract ;
  
  VecTime_ += Time_->ElapsedTime();
  
  //  All processes should return the same error code
  if ( nprow_ * npcol_ < Comm().NumProc() ) 
    Comm().Broadcast( Ierr, 1, 0 ) ; 
  
  // MS // compute vector norms
  if( ComputeVectorNorms_ == true || verbose_ == 2 ) {
    double NormLHS, NormRHS;
    for( int i=0 ; i<nrhs ; ++i ) {
      assert((*vecX)(i)->Norm2(&NormLHS)==0);
      assert((*vecB)(i)->Norm2(&NormRHS)==0);
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS
	     << ", ||b|| = " << NormRHS << std::endl;
      }
    }
  }
  
  // MS // compute true residual
  if( ComputeTrueResidual_ == true || verbose_ == 2  ) {
    double Norm;
    Epetra_MultiVector Ax(vecB->Map(),nrhs);
    for( int i=0 ; i<nrhs ; ++i ) {
      (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax));
      (Ax.Update(1.0, *((*vecB)(i)), -1.0));
      (Ax.Norm2(&Norm));
      
      if( verbose_ && Comm().MyPID() == 0 ) {
	std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl;
      }
    }
  }
  
  return Ierr[0];
  
}

Esempio n. 14

Mostra file

File: Amesos_Klu.cpp Progetto: colinpotter/trilinos

//=============================================================================
int Amesos_Klu::Solve() 
{
  Epetra_MultiVector* vecX = 0 ;
  Epetra_MultiVector* vecB = 0 ;

#ifdef HAVE_AMESOS_EPETRAEXT
  Teuchos::RCP<Epetra_MultiVector> vecX_rcp;
  Teuchos::RCP<Epetra_MultiVector> vecB_rcp;
#endif
  
#ifdef Bug_8212
  //  This demonstrates Bug #2812 - Valgrind does not catch this
  //  memory leak
  lose_this_ = (int *) malloc( 300 ) ;
  
#ifdef Bug_8212_B
  //  This demonstrates Bug #2812 - Valgrind does catch this
  //  use of unitialized data - but only in TestOptions/TestOptions.exe 
  //  not in Test_Basic/amesos_test.exe 	
  //  		
    if ( lose_this_[0] == 12834 ) { 
	     std::cout << __FILE__ << "::"  << __LINE__ << " very unlikely to happen " << std::endl ; 
    }
#endif
#endif

  if ( !TrustMe_  ) { 

    SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false);
    SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false);
    
    Epetra_MultiVector* OrigVecX ;
    Epetra_MultiVector* OrigVecB ;

    if (IsNumericFactorizationOK_ == false)
      AMESOS_CHK_ERR(NumericFactorization());
    
    ResetTimer(1);
    
    //
    //  Reindex the LHS and RHS 
    //
    OrigVecX = Problem_->GetLHS() ;
    OrigVecB = Problem_->GetRHS() ;
    
    if ( Reindex_ ) { 
#ifdef HAVE_AMESOS_EPETRAEXT
      vecX_rcp = StdIndexDomain_->StandardizeIndex( *OrigVecX ) ;
      vecB_rcp = StdIndexRange_->StandardizeIndex( *OrigVecB ) ;

      vecX = &*vecX_rcp;
      vecB = &*vecB_rcp;
#else
      AMESOS_CHK_ERR( -13 ) ; // Amesos_Klu can't handle non-standard indexing without EpetraExt 
#endif
    } else {
      vecX = OrigVecX ;
      vecB = OrigVecB ;
    } 
    
    if ((vecX == 0) || (vecB == 0))
      AMESOS_CHK_ERR(-1); // something wrong in input
    
    //  Extract Serial versions of X and B
    
    ResetTimer(0);

    //  Copy B to the serial version of B
    //
    if (UseDataInPlace_ == 1) {
#ifdef HAVE_AMESOS_EPETRAEXT
      if(vecX_rcp==Teuchos::null)
         SerialX_ = Teuchos::rcp(vecX,false);
      else
         SerialX_ = vecX_rcp;

      if(vecB_rcp==Teuchos::null)
         SerialB_ = Teuchos::rcp(vecB,false);
      else 
         SerialB_ = vecB_rcp;
#else
      SerialB_ = Teuchos::rcp(vecB,false);
      SerialX_ = Teuchos::rcp(vecX,false);
#endif
      NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
    } else {
      assert (UseDataInPlace_ == 0);
      
      if( NumVectors_ != Problem_->GetRHS()->NumVectors() ) {
	NumVectors_ = Problem_->GetRHS()->NumVectors() ; 
	SerialXextract_ = rcp( new Epetra_MultiVector(*SerialMap_,NumVectors_));
	SerialBextract_ = rcp (new Epetra_MultiVector(*SerialMap_,NumVectors_));
      }
      if (NumVectors_ != vecB->NumVectors())
	AMESOS_CHK_ERR(-1); // internal error 
      
      //ImportRangeToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecB->Map() ) );
      //if ( SerialBextract_->Import(*vecB,*ImportRangeToSerial_,Insert) )
      Epetra_Import *UseImport;
      if(!UseTranspose_) UseImport=&*ImportRangeToSerial_;
      else UseImport=&*ImportDomainToSerial_;      
      if ( SerialBextract_->Import(*vecB,*UseImport,Insert) )
	AMESOS_CHK_ERR( -1 ) ; // internal error
      
      SerialB_ = Teuchos::rcp(&*SerialBextract_,false) ;
      SerialX_ = Teuchos::rcp(&*SerialXextract_,false) ;
    }
    
    VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);

    //  Call KLU to perform the solve
    
    ResetTimer(0);
    if (MyPID_ == 0) {
      AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
      AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
      if (SerialXlda_ != NumGlobalElements_)
	AMESOS_CHK_ERR(-1);
    }

    OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
  }
  else
  {
     SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false) ;
     SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false) ;
     NumVectors_ = SerialX_->NumVectors();
    if (MyPID_ == 0) {
      AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ ));
      AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ ));
    }
  }

  if ( MyPID_ == 0) {
    if ( NumVectors_ == 1 ) {
      for ( int i = 0 ; i < NumGlobalElements_ ; i++ ) 
	SerialXBvalues_[i] = SerialBvalues_[i] ;
    } else {
      SerialX_->Scale(1.0, *SerialB_ ) ;    // X = B (Klu overwrites B with X)
    }
    if (UseTranspose()) {
      amesos_klu_solve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
			SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
    } else {
      amesos_klu_tsolve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_,
			 SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ );
    }
  }

  if ( !TrustMe_ ) {
    SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
    
    //  Copy X back to the original vector
    
    ResetTimer(0);
    ResetTimer(1);
    
    if (UseDataInPlace_ == 0) {
      Epetra_Import *UseImport;
      if(!UseTranspose_) UseImport=&*ImportDomainToSerial_;
      else UseImport=&*ImportRangeToSerial_;      
      //        ImportDomainToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecX->Map() ) );
      vecX->Export( *SerialX_, *UseImport, Insert ) ;
      
    } // otherwise we are already in place.
    
    VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
    
#if 0
    //
    //  ComputeTrueResidual causes TestOptions to fail on my linux box 
    //  Bug #1417
    if (ComputeTrueResidual_)
      ComputeTrueResidual(*SerialMatrix_, *vecX, *vecB, UseTranspose(), "Amesos_Klu");
#endif
    
    if (ComputeVectorNorms_)
      ComputeVectorNorms(*vecX, *vecB, "Amesos_Klu");
    
    OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);
    
  }
  ++NumSolve_;
  
  return(0) ;
}

Esempio n. 15

Mostra file

File: Ifpack_PointRelaxation.cpp Progetto: KineticTheory/Trilinos

//==============================================================================
int Ifpack_PointRelaxation::
ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int NumVectors = X.NumVectors();

  int Length = Matrix().MaxNumEntries();
  std::vector<int> Indices(Length);
  std::vector<double> Values(Length);

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_)
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  else
    Y2 = Teuchos::rcp( &Y, false );

  // extract views (for nicer and faster code)
  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int j = 0; j < NumSweeps_ ; j++) {

    // data exchange is here, once per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    // FIXME: do I really need this code below?
    if (NumVectors == 1) {

      double* y0_ptr = y_ptr[0];
      double* y20_ptr = y2_ptr[0];
      double* x0_ptr = x_ptr[0];

      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[col];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }
      else {
        /* Backward Mode */
        for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
          int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];

          int NumEntries;
          int col;
          (void) col; // Forestall compiler warning for unused variable.
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));
          double dtemp = 0.0;
          for (int k = 0 ; k < NumEntries ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y20_ptr[i];
          }

          y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp);
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int i = 0 ; i < NumMyRows_ ; ++i)
          y0_ptr[i] = y20_ptr[i];

    }
    else {
      if(!DoBackwardGS_){
        /* Forward Mode */
        for (int i = 0 ; i < NumMyRows_ ; ++i) {

          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);
          }
        }
      }
      else {
        /* Backward Mode */
        for (int i = NumMyRows_  - 1 ; i > -1 ; --i) {
          int NumEntries;
          int col;
          IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries,
                                                   &Values[0], &Indices[0]));

          for (int m = 0 ; m < NumVectors ; ++m) {

            double dtemp = 0.0;
            for (int k = 0 ; k < NumEntries ; ++k) {

              col = Indices[k];
              dtemp += Values[k] * y2_ptr[m][col];
            }

            y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp);

          }
        }
      }

      // using Export() sounded quite expensive
      if (IsParallel_)
        for (int m = 0 ; m < NumVectors ; ++m)
          for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
            int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii];
            y_ptr[m][i] = y2_ptr[m][i];
          }
    }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_);
#endif

  return(0);
} //ApplyInverseGS_RowMatrix()

Esempio n. 16

Mostra file

File: Amesos_Umfpack.cpp Progetto: 00liujj/trilinos

//=============================================================================
int Amesos_Umfpack::Solve() 
{ 
  // if necessary, perform numeric factorization. 
  // This may call SymbolicFactorization() as well.
  if (!IsNumericFactorizationOK_)
    AMESOS_CHK_ERR(NumericFactorization()); 

  ResetTimer(1);

  Epetra_MultiVector* vecX = Problem_->GetLHS(); 
  Epetra_MultiVector* vecB = Problem_->GetRHS(); 

  if ((vecX == 0) || (vecB == 0))
    AMESOS_CHK_ERR(-1);

  int NumVectors = vecX->NumVectors(); 
  if (NumVectors != vecB->NumVectors())
    AMESOS_CHK_ERR(-1);

  Epetra_MultiVector *SerialB, *SerialX; 

  //  Extract Serial versions of X and B 
  //
  double *SerialXvalues ;
  double *SerialBvalues ;

  Epetra_MultiVector* SerialXextract = 0;
  Epetra_MultiVector* SerialBextract = 0;
    
  //  Copy B to the serial version of B
  //
  ResetTimer(0);
  
  if (IsLocal_ == 1) { 
    SerialB = vecB ; 
    SerialX = vecX ; 
  } else { 
    assert (IsLocal_ == 0);
    SerialXextract = new Epetra_MultiVector(SerialMap(),NumVectors); 
    SerialBextract = new Epetra_MultiVector(SerialMap(),NumVectors); 

    SerialBextract->Import(*vecB,Importer(),Insert);
    SerialB = SerialBextract; 
    SerialX = SerialXextract; 
  } 

  VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);
  
  //  Call UMFPACK to perform the solve
  //  Note:  UMFPACK uses a Compressed Column Storage instead of compressed row storage, 
  //  Hence to compute A X = B, we ask UMFPACK to perform A^T X = B and vice versa

  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1);

  ResetTimer(0);

  int SerialBlda, SerialXlda ; 
  int UmfpackRequest = UseTranspose()?UMFPACK_A:UMFPACK_At ;
  int status = 0;

  if ( MyPID_ == 0 ) {
    int ierr;
    ierr = SerialB->ExtractView(&SerialBvalues, &SerialBlda);
    assert (ierr == 0);
    ierr = SerialX->ExtractView(&SerialXvalues, &SerialXlda);
    assert (ierr == 0);
    assert( SerialBlda == NumGlobalElements_ ) ; 
    assert( SerialXlda == NumGlobalElements_ ) ; 
    
    for ( int j =0 ; j < NumVectors; j++ ) { 
      double *Control = (double *) NULL, *Info = (double *) NULL ;

      status = umfpack_di_solve (UmfpackRequest, &Ap[0], 
				     &Ai[0], &Aval[0], 
				     &SerialXvalues[j*SerialXlda], 
				     &SerialBvalues[j*SerialBlda], 
				     Numeric, Control, Info) ;
    }
  }
    
  if (status) AMESOS_CHK_ERR(status);

  SolveTime_ = AddTime("Total solve time", SolveTime_, 0);
  
  //  Copy X back to the original vector
  
  ResetTimer(0);
  ResetTimer(1);

  if ( IsLocal_ == 0 ) {
    vecX->Export(*SerialX, Importer(), Insert ) ;
    if (SerialBextract) delete SerialBextract ;
    if (SerialXextract) delete SerialXextract ;
  }

  VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0);

  if (ComputeTrueResidual_)
  {
    Epetra_RowMatrix* Matrix = 
      dynamic_cast<Epetra_RowMatrix*>(Problem_->GetOperator());
    ComputeTrueResidual(*Matrix, *vecX, *vecB, UseTranspose(), "Amesos_Umfpack");
  }

  if (ComputeVectorNorms_) {
    ComputeVectorNorms(*vecX, *vecB, "Amesos_Umfpack");
  }

  NumSolve_++;

  OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); // Amesos overhead

  return(0);
}

Esempio n. 17

Mostra file

File: Ifpack_PointRelaxation.cpp Progetto: KineticTheory/Trilinos

int Ifpack_PointRelaxation::
ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const
{
  int* IndexOffset;
  int* Indices;
  double* Values;
  IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values));

  int NumVectors = X.NumVectors();

  Teuchos::RefCountPtr< Epetra_MultiVector > Y2;
  if (IsParallel_) {
    Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) );
  }
  else
    Y2 = Teuchos::rcp( &Y, false );

  double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr;
  X.ExtractView(&x_ptr);
  Y.ExtractView(&y_ptr);
  Y2->ExtractView(&y2_ptr);
  Diagonal_->ExtractView(&d_ptr);

  for (int iter = 0 ; iter < NumSweeps_ ; ++iter) {

    // only one data exchange per sweep
    if (IsParallel_)
      IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert));

    if(!DoBackwardGS_){
      /* Forward Mode */
      for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;

          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;
        }
      }
    }
    else {
      /* Backward Mode */
      for (int ii = NumLocalSmoothingIndices_  - 1 ; ii > -1 ; --ii) {
        int i=LocalSmoothingIndices_[ii];

        int col;
        double diag = d_ptr[i];

        for (int m = 0 ; m < NumVectors ; ++m) {

          double dtemp = 0.0;
          for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) {

            col = Indices[k];
            dtemp += Values[k] * y2_ptr[m][col];
          }

          y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag;

        }
      }
    }


    if (IsParallel_)
      for (int m = 0 ; m < NumVectors ; ++m)
        for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) {
          int i=LocalSmoothingIndices_[ii];
          y_ptr[m][i] = y2_ptr[m][i];
        }
  }

#ifdef IFPACK_FLOPCOUNTERS
  ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_);
#endif
  return(0);
} //ApplyInverseGS_LocalFastCrsMatrix()

Esempio n. 18

Mostra file

File: Epetra_FastCrsMatrix.cpp Progetto: gitter-badger/quinoa

//=============================================================================
int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
  //
  // This function forms the product Y = A * Y or Y = A' * X
  //
  if (X.NumVectors()==1 && Y.NumVectors()==1) {
    double * xp = (double *) X[0];
    double * yp = (double *) Y[0];
    Epetra_Vector x(View, X.Map(), xp);
    Epetra_Vector y(View, Y.Map(), yp);
    return(Multiply(TransA, x, y));
  }
  if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled.

  int i, j, k;
  int * NumEntriesPerRow = NumEntriesPerRow_;
  int ** Indices = Indices_;
  double ** Values = Values_;

  double **Xp = (double**)X.Pointers();
  double **Yp = (double**)Y.Pointers();

  int NumVectors = X.NumVectors();
  int NumMyCols_ = NumMyCols();


  // Need to better manage the Import and Export vectors:
  // - Need accessor functions
  // - Need to make the NumVector match (use a View to do this)
  // - Need to look at RightScale and ColSum routines too.

  if (!TransA) {

    // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
    if (Importer()!=0) {
      if (ImportVector_!=0) {
	if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
      }
      if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed
      ImportVector_->Import(X, *Importer(), Insert);
      Xp = (double**)ImportVector_->Pointers();
    }

    // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
    if (Exporter()!=0) {
      if (ExportVector_!=0) {
	if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;}
      }
      if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed
      Yp = (double**)ExportVector_->Pointers();
    }

    // Do actual computation

    for (i=0; i < NumMyRows_; i++) {
      int      NumEntries = *NumEntriesPerRow++;
      int *    RowIndices = *Indices++;
      double * RowValues  = *Values++;
      for (k=0; k<NumVectors; k++) {
	double sum = 0.0;
	for (j=0; j < NumEntries; j++) sum += RowValues[j] * Xp[k][RowIndices[j]];
	Yp[k][i] = sum;
      }
    }
    if (Exporter()!=0) Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
  }
  else { // Transpose operation


    // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors

    if (Exporter()!=0) {
      if (ExportVector_!=0) {
	if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;}
      }
      if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed
      ExportVector_->Import(X, *Exporter(), Insert);
      Xp = (double**)ExportVector_->Pointers();
    }

    // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
    if (Importer()!=0) {
      if (ImportVector_!=0) {
	if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;}
      }
      if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed
      Yp = (double**)ImportVector_->Pointers();
    }

    // Do actual computation



    for (k=0; k<NumVectors; k++) 
      for (i=0; i < NumMyCols_; i++) Yp[k][i] = 0.0; // Initialize y for transpose multiply
    
    for (i=0; i < NumMyRows_; i++) {
      int      NumEntries = *NumEntriesPerRow++;
      int *    RowIndices = *Indices++;
      double * RowValues  = *Values++;
      for (k=0; k<NumVectors; k++) {
	for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] += RowValues[j] * Xp[k][i];
      }
    }
    if (Importer()!=0) Y.Export(*ImportVector_, *Importer(), Add); // Fill Y with Values from export vector
  }

  UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
  return(0);
}

Esempio n. 19

Mostra file

File: EpetraExt_CrsSingletonFilter_LinearProblem.cpp Progetto: cakeisalie/oomphlib_003

//==============================================================================
int LinearProblem_CrsSingletonFilter::ConstructReducedProblem(Epetra_LinearProblem * Problem) {

  int i, j;
  if (HaveReducedProblem_) EPETRA_CHK_ERR(-1); // Setup already done once.  Cannot do it again
  if (Problem==0) EPETRA_CHK_ERR(-2); // Null problem pointer

  FullProblem_ = Problem;
  FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix());
  if (FullMatrix_==0) EPETRA_CHK_ERR(-3); // Need a RowMatrix
  if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-4); // Need a RHS
  if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-5); // Need a LHS
  // Generate reduced row and column maps

  Epetra_MapColoring & RowMapColors = *RowMapColors_;
  Epetra_MapColoring & ColMapColors = *ColMapColors_;

  ReducedMatrixRowMap_ = RowMapColors.GenerateMap(0);
  ReducedMatrixColMap_ = ColMapColors.GenerateMap(0);

  // Create domain and range map colorings by exporting map coloring of column and row maps

  if (FullMatrix()->RowMatrixImporter()!=0) {
    Epetra_MapColoring DomainMapColors(FullMatrixDomainMap());
    EPETRA_CHK_ERR(DomainMapColors.Export(*ColMapColors_, *FullMatrix()->RowMatrixImporter(), AbsMax));
    OrigReducedMatrixDomainMap_ = DomainMapColors.GenerateMap(0);
  }
  else
    OrigReducedMatrixDomainMap_ = ReducedMatrixColMap_;

  if (FullMatrixIsCrsMatrix_) {
    if (FullCrsMatrix()->Exporter()!=0) { // Non-trivial exporter
      Epetra_MapColoring RangeMapColors(FullMatrixRangeMap());
      EPETRA_CHK_ERR(RangeMapColors.Export(*RowMapColors_, *FullCrsMatrix()->Exporter(), 
					   AbsMax));
      ReducedMatrixRangeMap_ = RangeMapColors.GenerateMap(0);
    }
    else
      ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;
  }
  else
    ReducedMatrixRangeMap_ = ReducedMatrixRowMap_;

  // Check to see if the reduced system domain and range maps are the same.
  // If not, we need to remap entries of the LHS multivector so that they are distributed
  // conformally with the rows of the reduced matrix and the RHS multivector
  SymmetricElimination_ = ReducedMatrixRangeMap_->SameAs(*OrigReducedMatrixDomainMap_);
  if (!SymmetricElimination_) 
    ConstructRedistributeExporter(OrigReducedMatrixDomainMap_, ReducedMatrixRangeMap_, 
				  RedistributeDomainExporter_, ReducedMatrixDomainMap_);
  else {
    ReducedMatrixDomainMap_ = OrigReducedMatrixDomainMap_;
    OrigReducedMatrixDomainMap_ = 0;
    RedistributeDomainExporter_ = 0;
  }
  
  // Create pointer to Full RHS, LHS
  Epetra_MultiVector * FullRHS = FullProblem()->GetRHS();
  Epetra_MultiVector * FullLHS = FullProblem()->GetLHS();
  int NumVectors = FullLHS->NumVectors();

  // Create importers
//  cout << "RedDomainMap\n";
//  cout << *ReducedMatrixDomainMap();
//  cout << "FullDomainMap\n";
//  cout << FullMatrixDomainMap();
  Full2ReducedLHSImporter_ = new Epetra_Import(*ReducedMatrixDomainMap(), FullMatrixDomainMap());
//  cout << "RedRowMap\n";
//  cout << *ReducedMatrixRowMap();
//  cout << "FullRHSMap\n";
//  cout << FullRHS->Map();
  Full2ReducedRHSImporter_ = new Epetra_Import(*ReducedMatrixRowMap(), FullRHS->Map());

  // Construct Reduced Matrix
  ReducedMatrix_ = new Epetra_CrsMatrix(Copy, *ReducedMatrixRowMap(), *ReducedMatrixColMap(), 0);

  // Create storage for temporary X values due to explicit elimination of rows
  tempExportX_ = new Epetra_MultiVector(FullMatrixColMap(), NumVectors);

  int NumEntries;
  int * Indices;
  double * Values;
  int NumMyRows = FullMatrix()->NumMyRows();
  int ColSingletonCounter = 0;
  for (i=0; i<NumMyRows; i++) {
    int curGRID = FullMatrixRowMap().GID(i);
    if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix

      EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (Indices are global)
      
      int ierr = ReducedMatrix()->InsertGlobalValues(curGRID, NumEntries, 
						     Values, Indices); // Insert into reduce matrix
      // Positive errors will occur because we are submitting col entries that are not part of
      // reduced system.  However, because we specified a column map to the ReducedMatrix constructor
      // these extra column entries will be ignored and we will be politely reminded by a positive
      // error code
      if (ierr<0) EPETRA_CHK_ERR(ierr); 
    }
    else {
      EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row
      if (NumEntries==1) {
	double pivot = Values[0];
	if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue
	int indX = Indices[0];
	for (j=0; j<NumVectors; j++)
	  (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot;
      }
      // Otherwise, this is a singleton column and we will scan for the pivot element needed 
      // for post-solve equations
      else {
	int targetCol = ColSingletonColLIDs_[ColSingletonCounter];
	for (j=0; j<NumEntries; j++) {
	  if (Indices[j]==targetCol) {
	    double pivot = Values[j];
	    if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue
	    ColSingletonPivotLIDs_[ColSingletonCounter] = j; // Save for later use
	    ColSingletonPivots_[ColSingletonCounter] = pivot;
	    ColSingletonCounter++;
	    break;
	  }
	}
      }
    }
  }

  // Now convert to local indexing.  We have constructed things so that the domain and range of the
  // matrix will have the same map.  If the reduced matrix domain and range maps were not the same, the
  // differences were addressed in the ConstructRedistributeExporter() method
  EPETRA_CHK_ERR(ReducedMatrix()->FillComplete(*ReducedMatrixDomainMap(), *ReducedMatrixRangeMap()));

  // Construct Reduced LHS (Puts any initial guess values into reduced system)

  ReducedLHS_ = new Epetra_MultiVector(*ReducedMatrixDomainMap(), NumVectors);
  EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert));
  FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them

  // Construct Reduced RHS

  // First compute influence of already-known values of X on RHS
  tempX_ = new Epetra_MultiVector(FullMatrixDomainMap(), NumVectors);
  tempB_ = new Epetra_MultiVector(FullRHS->Map(), NumVectors);
  
  //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX
  // Also inject into full X since we already know the solution

  if (FullMatrix()->RowMatrixImporter()!=0) {
    EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
    EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add));
  }
  else {
    tempX_->Update(1.0, *tempExportX_, 0.0);
    FullLHS->Update(1.0, *tempExportX_, 0.0);
  }


  EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_));

  EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values

  ReducedRHS_ = new Epetra_MultiVector(*ReducedMatrixRowMap(), FullRHS->NumVectors());
  EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert));

  // Finally construct Reduced Linear Problem
  ReducedProblem_ = new Epetra_LinearProblem(ReducedMatrix_, ReducedLHS_, ReducedRHS_);

  double fn = FullMatrix()->NumGlobalRows();
  double fnnz = FullMatrix()->NumGlobalNonzeros();
  double rn = ReducedMatrix()->NumGlobalRows();
  double rnnz = ReducedMatrix()->NumGlobalNonzeros();

  RatioOfDimensions_ = rn/fn;
  RatioOfNonzeros_ = rnnz/fnnz;
  HaveReducedProblem_ = true;
  
  return(0);
}

Esempio n. 20

Mostra file

File: Epetra_FastCrsMatrix.cpp Progetto: gitter-badger/quinoa

//=============================================================================
int Epetra_FastCrsMatrix::Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
  //
  // This function find Y such that LY = X or UY = X or the transpose cases.
  //
  if (X.NumVectors()==1 && Y.NumVectors()==1) {
    double * xp = (double *) X[0];
    double * yp = (double *) Y[0];
    Epetra_Vector x(View, X.Map(), xp);
    Epetra_Vector y(View, Y.Map(), yp);
    return(Solve(Upper, Trans, UnitDiagonal, x, y));
  }
  if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled.

  if ((Upper) && (!UpperTriangular())) EPETRA_CHK_ERR(-2);
  if ((!Upper) && (!LowerTriangular())) EPETRA_CHK_ERR(-3);
  if ((!UnitDiagonal) && (NoDiagonal())) EPETRA_CHK_ERR(-4); // If matrix has no diagonal, we must use UnitDiagonal
  if ((!UnitDiagonal) && (NumMyDiagonals()<NumMyRows_)) EPETRA_CHK_ERR(-5); // Need each row to have a diagonal

  int i, j, j0, k;
  int * NumEntriesPerRow = NumEntriesPerRow_;
  int ** Indices = Indices_;
  double ** Values = Values_;
  double diag;

  // If upper, point to last row
  if ((Upper && !Trans) || (!Upper && Trans)) {
    NumEntriesPerRow += NumMyRows_-1;
    Indices += NumMyRows_-1;
    Values += NumMyRows_-1;
  }

  double **Xp = (double**)X.Pointers();
  double **Yp = (double**)Y.Pointers();

  int NumVectors = X.NumVectors();

  if (!Trans) {
    

    if (Upper) {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      for (i=NumMyRows_-1; i >=0; i--) {
	int      NumEntries = *NumEntriesPerRow--;
	int *    RowIndices = *Indices--;
	double * RowValues  = *Values--;
	if (!UnitDiagonal) diag = 1.0/RowValues[0]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  double sum = 0.0;
	  for (j=j0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]];
	  
	  if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum;
	  else Yp[k][i] = (Xp[k][i] - sum)*diag;
	}
      }
    }
    else {
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      for (i=0; i < NumMyRows_; i++) {
	int      NumEntries = *NumEntriesPerRow++ - j0;
	int *    RowIndices = *Indices++;
	double * RowValues  = *Values++;
	if (!UnitDiagonal) diag = 1.0/RowValues[NumEntries]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  double sum = 0.0;
	  for (j=0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]];
	  
	  if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum;
	  else Yp[k][i] = (Xp[k][i] - sum)*diag;
	}
      }
    }
  }
  // ***********  Transpose case *******************************

  else {

    for (k=0; k<NumVectors; k++) 
      if (Yp[k]!=Xp[k]) for (i=0; i < NumMyRows_; i++) Yp[k][i] = Xp[k][i]; // Initialize y for transpose multiply
    
    if (Upper) {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      
      for (i=0; i < NumMyRows_; i++) {
	int      NumEntries = *NumEntriesPerRow++;
	int *    RowIndices = *Indices++;
	double * RowValues  = *Values++;
	if (!UnitDiagonal) diag = 1.0/RowValues[j0]; // Take inverse of diagonal once for later use
	for (k=0; k<NumVectors; k++) {
	  if (!UnitDiagonal) Yp[k][i] = Yp[k][i]*diag;
	  for (j=j0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i];
	}
      }
    }
    else {
      
      j0 = 1;
      if (NoDiagonal()) j0--; // Include first term if no diagonal
      
      for (i=NumMyRows_-1; i>=0; i--) {
	int      NumEntries = *NumEntriesPerRow-- - j0;
	int *    RowIndices = *Indices--;
	double * RowValues  = *Values--;
	for (k=0; k<NumVectors; k++) {
	  if (!UnitDiagonal)  Yp[k][i] = Yp[k][i]/Xp[k][i];
	  for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i];
        }
      }
    }
  }
  
  UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
  return(0);
}

Esempio n. 21

Mostra file

File: BlockPCGSolver.cpp Progetto: ChiahungTai/Trilinos

int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const {

  int info = 0;
  int localVerbose = verbose*(MyComm.MyPID() == 0);

  int xr = X.MyLength();

  int wSize = 3*xr;

  if (lWorkSpace < wSize) {
    if (workSpace)
      delete[] workSpace;
    workSpace = new (std::nothrow) double[wSize];
    if (workSpace == 0) {
      info = -1;
      return info;
    }
    lWorkSpace = wSize;
  } // if (lWorkSpace < wSize)

  double *pointer = workSpace;

  Epetra_Vector r(View, X.Map(), pointer);
  pointer = pointer + xr;

  Epetra_Vector p(View, X.Map(), pointer);
  pointer = pointer + xr;

  // Note: Kp and z uses the same memory space
  Epetra_Vector Kp(View, X.Map(), pointer);
  Epetra_Vector z(View, X.Map(), pointer);

  double tmp;
  double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0;
  double tolSquare = tolCG*tolCG;

  memcpy(r.Values(), X.Values(), xr*sizeof(double));
  tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
  MyComm.SumAll(&tmp, &initNorm, 1);

  Y.PutScalar(0.0);

  if (localVerbose > 1) {
    std::cout << std::endl;
    std::cout  << " --- PCG Iterations --- " << std::endl;
  }

  int iter;
  for (iter = 1; iter <= iterMax; ++iter) {

    if (Prec) {
      Prec->ApplyInverse(r, z);
    }
    else {
      memcpy(z.Values(), r.Values(), xr*sizeof(double));
    }

    if (iter == 1) {
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      memcpy(p.Values(), z.Values(), xr*sizeof(double));
    }
    else {
      oldRZ = newRZ;
      tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1);
      MyComm.SumAll(&tmp, &newRZ, 1);
      p.Update(1.0, z, newRZ/oldRZ);
    }

    K->Apply(p, Kp);

    tmp = callBLAS.DOT(xr, p.Values(), 1, Kp.Values(), 1);
    MyComm.SumAll(&tmp, &alpha, 1);
    alpha = newRZ/alpha;

    TEUCHOS_TEST_FOR_EXCEPTION(alpha <= 0.0, std::runtime_error,
                         " !!! Non-positive value for p^TKp (" << alpha << ") !!!");

    callBLAS.AXPY(xr, alpha, p.Values(), 1, Y.Values(), 1);

    alpha *= -1.0;
    callBLAS.AXPY(xr, alpha, Kp.Values(), 1, r.Values(), 1);

    // Check convergence
    tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1);
    MyComm.SumAll(&tmp, &rNorm, 1);

    if (localVerbose > 1) {
      std::cout  << "   Iter. " << iter;
      std::cout.precision(4);
      std::cout.setf(std::ios::scientific, std::ios::floatfield);
      std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    }

    if (rNorm <= tolSquare*initNorm)
      break;

  } // for (iter = 1; iter <= iterMax; ++iter)

  if (localVerbose == 1) {
    std::cout << std::endl;
    std::cout << " --- End of PCG solve ---" << std::endl;
    std::cout << "   Iter. " << iter;
    std::cout.precision(4);
    std::cout.setf(std::ios::scientific, std::ios::floatfield);
    std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl;
    std::cout << std::endl;
  }

  if (localVerbose > 1) {
    std::cout << std::endl;
  }

  numSolve += 1;

  minIter = (iter < minIter) ? iter : minIter;
  maxIter = (iter > maxIter) ? iter : maxIter;
  sumIter += iter;

  return info;
}

Esempio n. 22

Mostra file

File: ml_FaceMatrixFreePreconditioner.cpp Progetto: KineticTheory/Trilinos

// ================================================ ====== ==== ==== == =
//! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y
int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{
  const Epetra_MultiVector *B;
  Epetra_MultiVector *Bcopy=0;

  /* Sanity Checks */
  int NumVectors=B_.NumVectors();
  if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1);
  if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1);

  Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false);
  Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false);
  Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false);

  /* Deal with the B==X case */
  if (B_.Pointers()[0] == X.Pointers()[0]){
    Bcopy=new Epetra_MultiVector(B_);
    B=Bcopy;
    X.PutScalar(0.0);
  }
  else B=&B_;


  for(int i=0;i<num_cycles;i++){
    /* Pre-smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

    if(MaxLevels > 0){
      if(i != 0
#ifdef HAVE_ML_IFPACK
         || Smoother_
#endif
         ){
        /* Calculate Residual (r_e = b - (S+M+Addon) * x) */
        ML_CHK_ERR(Operator_->Apply(X,r_edge));
        ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0));

        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node));
      }
      else{
        /* Xfer to coarse grid (r_n = P' * r_e) */
        ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node));
      }

      /* AMG on coarse grid  (e_n = (CoarseMatrix)^{-1} r_n) */
      ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node));

      /* Xfer back to fine grid (e_e = P * e_n) */
      ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge));

      /* Add in correction (x = x + e_e)        */
      ML_CHK_ERR(X.Update(1.0,e_edge,1.0));
    }/*end if*/

    /* Post-Smoothing */
#ifdef HAVE_ML_IFPACK
    if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X));
#endif

  }/*end for*/

  /* Cleanup */
  if(Bcopy) delete Bcopy;

  return 0;
}/*end ApplyInverse*/

Esempio n. 23

Mostra file

File: ml_RefMaxwell.cpp Progetto: 00liujj/trilinos

// ================================================ ====== ==== ==== == =
//! Implicitly applies in the inverse in an additive format
int  ML_Epetra::RefMaxwellPreconditioner::ApplyInverse_Implicit_Additive(const Epetra_MultiVector& B, Epetra_MultiVector& X) const
{
#ifdef ML_TIMING
  double t_time,t_diff;
  StartTimer(&t_time);
#endif

  int NumVectors=B.NumVectors();
  Epetra_MultiVector TempE1(X.Map(),NumVectors,false);
  Epetra_MultiVector TempE2(X.Map(),NumVectors,true);
  Epetra_MultiVector TempN1(*NodeMap_,NumVectors,false);
  Epetra_MultiVector TempN2(*NodeMap_,NumVectors,true);
  Epetra_MultiVector Resid(B.Map(),NumVectors);

  /* Pre-Smoothing */
#ifdef HAVE_ML_IFPACK
  if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));}
  else
#endif
  if(PreEdgeSmoother) ML_CHK_ERR(PreEdgeSmoother->ApplyInverse(B,X));

  /* Build Residual */
  ML_CHK_ERR(SM_Matrix_->Multiply(false,X,TempE1));
  ML_CHK_ERR(Resid.Update(-1.0,TempE1,1.0,B,0.0));

  if(!HasOnlyDirichletNodes){
    ML_CHK_ERR(D0_Matrix_->Multiply(true,Resid,TempN1));
  }

  /* Precondition (1,1) block (additive)*/
  ML_CHK_ERR(EdgePC->ApplyInverse(Resid,TempE2));

  /* Precondition (2,2) block (additive)*/
  if(!HasOnlyDirichletNodes){
    ML_CHK_ERR(NodePC->ApplyInverse(TempN1,TempN2));

    /* EXPERIMENTAL: Local Nodal Stuff, if active */
    if(use_local_nodal_solver){
      const Epetra_Map& LocalMap=LocalNodalMatrix->DomainMap();
      Epetra_MultiVector TempNL1(LocalMap,NumVectors,true);
      Epetra_MultiVector TempNL2(LocalMap,NumVectors,true);
      Epetra_MultiVector TempN3(*NodeMap_,NumVectors,true);

      NodesToLocalNodes->Multiply(true,TempN1,TempNL1);
      LocalNodalSolver->ApplyInverse(TempNL1,TempNL2);
      NodesToLocalNodes->Multiply(false,TempNL2,TempN3);
      TempN2.Update(1.0,TempN3,1.0);
    }/*end if*/

    D0_Matrix_->Multiply(false,TempN2,TempE1);
  }/*end if*/

  /* Update solution */
  if(HasOnlyDirichletNodes) X.Update(1.0,TempE2,1.0);
  else X.Update(1.0,TempE1,1.0,TempE2,1.0);

  /* Post-Smoothing */
#ifdef HAVE_ML_IFPACK
  if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));}
  else
#endif
    if(PostEdgeSmoother) ML_CHK_ERR(PostEdgeSmoother->ApplyInverse(B,X));


#ifdef ML_TIMING
  StopTimer(&t_time,&t_diff);
  /* Output */
  ML_Comm *comm_;
  ML_Comm_Create(&comm_);
  this->ApplicationTime_+= t_diff;
  ML_Comm_Destroy(&comm_);
#endif

  return 0;
}

Esempio n. 24

Mostra file

File: Ifpack_SORa.cpp Progetto: Russell-Jones-OxPhys/Trilinos

int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
  if(!IsComputed_) return -1;
  Time_.ResetStartTime();
  bool initial_guess_is_zero=false;
  const int lclNumRows = W_->NumMyRows();
  const int NumVectors = X.NumVectors();
  Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);

  double omega=GetOmega();

  // need to create an auxiliary vector, Xcopy
  Teuchos::RCP<const Epetra_MultiVector> Xcopy;
  if (X.Pointers()[0] == Y.Pointers()[0]){
    Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
    // Since the user didn't give us anything better, our initial guess is zero.
    Y.Scale(0.0);
    initial_guess_is_zero=true;
  }
  else
    Xcopy = Teuchos::rcp( &X, false );

  Teuchos::RCP< Epetra_MultiVector > T2;
  // Note: Assuming that the matrix has an importer.  Ifpack_PointRelaxation doesn't do this, but given that
  // I have a CrsMatrix, I'm probably OK.
  // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine
  // if things are on or off processor.
  // Note: T2 must be zero'd out
  if (IsParallel_ && W_->Importer())  T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
  else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));

  // Pointer grabs
  int* rowptr,*colind;
  double *values;
  double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
  T2->ExtractView(&t2_ptr);
  Y.ExtractView(&y_ptr);
  Temp.ExtractView(&t_ptr);
  Xcopy->ExtractView(&x_ptr);
  Wdiag_->ExtractView(&d_ptr);
  IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));


  for(int i=0; i<NumSweeps_; i++){
    // Calculate b-Ax
    if(!initial_guess_is_zero  || i > 0) {
      A_->Apply(Y,Temp);
      Temp.Update(1.0,*Xcopy,-1.0);
    }
    else
      Temp.Update(1.0,*Xcopy,0.0);

    // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated
    // in this sweep before they are used, so we don't need to reset T2 to zero here.

    // Do backsolve & update
    // x = x  + W^{-1} (b - A x)
    for(int j=0; j<lclNumRows; j++){
      double diag=d_ptr[j];
      for (int m=0 ; m<NumVectors; m++) {
        double dtmp=0.0;
        // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
        t2_ptr[m][j]=0.0;
        for(int k=rowptr[j];k<rowptr[j+1];k++){
          dtmp+= values[k]*t2_ptr[m][colind[k]];
        }
        // Yes, we need to update both of these.
        t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;
        y_ptr[m][j] += omega*t2_ptr[m][j];
      }
    }
  }

  // Counter update
  NumApplyInverse_++;
  ApplyInverseTime_ += Time_.ElapsedTime();
  return 0;
}

Esempio n. 25

Mostra file

// Apply the preconditioner w/ RHS B and get result X
int ML_Epetra::LevelWrap::ApplyInverse(const Epetra_MultiVector& B, Epetra_MultiVector& X_) const{
#ifdef ML_TIMING
  double t_time,t_diff;
  StartTimer(&t_time);
#endif
   
  // Sanity Checks
  if (!B.Map().SameAs(OperatorDomainMap())) return -1;
  if (!X_.Map().SameAs(OperatorRangeMap())) return -1;
  if (!X_.Map().SameAs(B.Map())) return -1;
  if (B.NumVectors() != X_.NumVectors()) return -1;

  // Build new work vector X 
  Epetra_MultiVector X(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp0(X_.Map(),X_.NumVectors(),true);
  Epetra_MultiVector tmp1(P0_->DomainMap(),X_.NumVectors(),true);
  Epetra_MultiVector tmp2(P0_->DomainMap(),X_.NumVectors(),true);
  
  // Pre Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){
    Smoother_->ApplyInverse(B,X);
  }

  // Form coarse residual
  A0_->Apply(X,tmp0);
  tmp0.Update(1.0,B,-1.0); 
  if(use_pt_) P0_->Multiply(true,tmp0,tmp1);
  else R0_->Multiply(false,tmp0,tmp1);

  // Solve coarse problem
  A1prec_->ApplyInverse(tmp1,tmp2);

  // Update solution
  P0_->Multiply(false,tmp2,tmp0);
  X.Update(1.0,tmp0,1.0);

  // Post Smoother
  if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){
    Smoother_->ApplyInverse(B,X);
  }

  // Copy to output
  X_=X;

#ifdef ML_TIMING
  StopTimer(&t_time,&t_diff);
  /* Output */
  ML_Comm *comm_;
  ML_Comm_Create(&comm_);
  ApplicationTime_+= t_diff;
  if(FirstApplication_){
    FirstApplication_=false;
    FirstApplicationTime_=ApplicationTime_;
  }/*end if*/
  ML_Comm_Destroy(&comm_);
#endif  

  return 0;
}

Esempio n. 26

Mostra file

File: shylu_schur.cpp Progetto: Russell-Jones-OxPhys/Trilinos

/* Computes the approximate Schur complement for the wide separator */
Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config,
    shylu_symbolic *ssym,   // symbolic structure
    Epetra_CrsMatrix *G, Epetra_CrsMatrix *R,
    Epetra_LinearProblem *LP, Amesos_BaseSolver *solver,
    Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C,
    Epetra_Map *localDRowMap)
{
    int i;
    double relative_thres = config->relative_threshold;

    // Need to create local G (block diagonal portion) , R, C

    // Get row map of G
    //Epetra_Map CrMap = C->RowMap();
    //int *c_rows = CrMap.MyGlobalElements();
    //int *c_cols = (C->ColMap()).MyGlobalElements();
    //int c_totalElems = CrMap.NumGlobalElements();
    //int c_localElems = CrMap.NumMyElements();
    //int c_localcolElems = (C->ColMap()).NumMyElements();

    Epetra_Map GrMap = G->RowMap();
    int *g_rows = GrMap.MyGlobalElements();
    //int g_totalElems = GrMap.NumGlobalElements();
    int g_localElems = GrMap.NumMyElements();

    //Epetra_Map RrMap = R->RowMap();
    //int *r_rows = RrMap.MyGlobalElements();
    //int *r_cols = (R->ColMap()).MyGlobalElements();
    //int r_totalElems = RrMap.NumGlobalElements();
    //int r_localElems = RrMap.NumMyElements();
    //int r_localcolElems = (R->ColMap()).NumMyElements();

    Epetra_SerialComm LComm;
    Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm);

    int nentries1, gid;
    // maxentries is the maximum of all three possible matrices as the arrays
    // are reused between the three
    int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries());
    maxentries = max(maxentries, G->MaxNumEntries());

    double *values1 = new double[maxentries];
    double *values2 = new double[maxentries];
    double *values3 = new double[maxentries];
    int *indices1 = new int[maxentries];
    int *indices2 = new int[maxentries];
    int *indices3 = new int[maxentries];

    // Sbar - Approximate Schur complement
    Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix(
                                            Copy, GrMap, g_localElems));

    // Include only the block diagonal elements of G in localG
    Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false);
    int cnt, scnt;
    for (i = 0; i < g_localElems ; i++)
    {
        gid = g_rows[i];
        G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1);

        cnt = 0;
        scnt = 0;
        for (int j = 0 ; j < nentries1 ; j++)
        {
            if (G->LRID(indices1[j]) != -1)
            {
                values2[cnt] = values1[j];
                indices2[cnt++] = indices1[j];
            }
            else
            {
                // Add it to Sbar immediately
                values3[scnt] = values1[j];
                indices3[scnt++] = indices1[j];
            }
        }

        localG.InsertGlobalValues(gid, cnt, values2, indices2);
        Sbar->InsertGlobalValues(gid, scnt, values3, indices3);
    }
    localG.FillComplete();
    //cout << "Created local G matrix" << endl;

    int nvectors = 16;
    /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC,
                                        localDRowMap, nvectors);*/
    ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver,
                                        ifSolver, C, localDRowMap, nvectors);

#ifdef DUMP_MATRICES
    //ostringstream fnamestr;
    //fnamestr << "localC" << C->Comm().MyPID() << ".mat";
    //string Cfname = fnamestr.str();
    //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC);

    //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm());
    //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg =
                        //new EpetraExt::CrsMatrix_Reindex( defMapg );
    //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG );
    //ReIdx_MatTransg->fwd();
    //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G);
#endif

    //cout << " totalElems in Schur Complement" << totalElems << endl;
    //cout << myPID << " localElems" << localElems << endl;

    // **************** Two collectives here *********************
#ifdef TIMING_OUTPUT
    Teuchos::Time ftime("setup time");
    ftime.start();
#endif
#ifdef TIMING_OUTPUT
    Teuchos::Time app_time("setup time");
#endif

    int nentries;
    // size > maxentries as there could be fill
    // TODO: Currently the size of the two arrays can be one, Even if we switch
    // the loop below the size of the array required is nvectors. Fix it
    double *values = new double[nvectors];
    int *indices = new int[nvectors];
    double *vecvalues;
#ifdef SHYLU_DEBUG
    // mfh 25 May 2015: Don't declare this variable if it's not used.
    // It's only used if SHYLU_DEBUG is defined.
    int dropped = 0;
#endif // SHYLU_DEBUG
    double *maxvalue = new double[nvectors];
#ifdef TIMING_OUTPUT
    ftime.start();
#endif
    int findex = g_localElems / nvectors ;

    int cindex;
    // int mypid = C->Comm().MyPID(); // unused
    Epetra_MultiVector probevec (G_localRMap, nvectors);
    Epetra_MultiVector Scol (G_localRMap, nvectors);
    probevec.PutScalar(0.0);
    for (i = 0 ; i < findex*nvectors ; i+=nvectors)
    {
        // Set the probevec to find block columns of S.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
            //if (mypid == 0)
            //cout << "Changing row to 1.0 " << g_rows[cindex] << endl;
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec, Scol);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif

        // Reset the probevec to all zeros.
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0);
        }

        Scol.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol[k];
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

    if (i < g_localElems)
    {
        nvectors = g_localElems - i;
        probeop.ResetTempVectors(nvectors);
        Epetra_MultiVector probevec1 (G_localRMap, nvectors);
        Epetra_MultiVector Scol1 (G_localRMap, nvectors);

        probevec1.PutScalar(0.0);
        for (int k = 0; k < nvectors; k++)
        {
            cindex = k+i;
            // TODO: Can do better than this, just need to go to the column map
            // of C, there might be null columns in C
            probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0);
        }

#ifdef TIMING_OUTPUT
        app_time.start();
#endif
        probeop.Apply(probevec1, Scol1);
#ifdef TIMING_OUTPUT
        app_time.stop();
#endif
        Scol1.MaxValue(maxvalue);
        nentries = 0;
        for (int j = 0 ; j < g_localElems ; j++)
        {
            //cout << "MAX" << maxvalue << endl;
            for (int k = 0; k < nvectors; k++)
            {
                cindex = k+i;
                vecvalues = Scol1[k];
                //nentries = 0; // inserting one entry in each row for now
                if ((g_rows[cindex] == g_rows[j])  ||
                (abs(vecvalues[j]/maxvalue[k]) > relative_thres))
                // diagonal entry or large entry.
                {
                    values[nentries] = vecvalues[j];
                    indices[nentries++] = g_rows[cindex];
                }
#ifdef SHYLU_DEBUG
                else if (vecvalues[j] != 0.0)
                {
                    dropped++;
                }
#endif // SHYLU_DEBUG
            }
            Sbar->InsertGlobalValues(g_rows[j], nentries, values,
                        indices);
            nentries = 0;
        }
    }

#ifdef TIMING_OUTPUT
    ftime.stop();
    cout << "Time in finding and dropping entries" << ftime.totalElapsedTime()
                     << endl;
    ftime.reset();
    cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl;
    probeop.PrintTimingInfo();
#endif
    Sbar->FillComplete();

#ifdef DUMP_MATRICES
    Epetra_Map defMap2(-1, g_localElems, 0, C->Comm());
    EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 =
                        new EpetraExt::CrsMatrix_Reindex( defMap2 );
    Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar );
    ReIdx_MatTrans2->fwd();
    EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S);
#endif

#ifdef SHYLU_DEBUG
    cout << "#dropped entries" << dropped << endl;
#endif
    delete[] values;
    delete[] indices;
    delete[] values1;
    delete[] indices1;
    delete[] values2;
    delete[] indices2;
    delete[] values3;
    delete[] indices3;
    delete[] maxvalue;

    return Sbar;
}

Esempio n. 27

Mostra file

File: Amesos_TestMultiSolver.cpp Progetto: cakeisalie/oomphlib_003

//
//  Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using blocked right hand sides
//  and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  Amesos_TestMultiSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		      SparseSolverType SparseSolver, bool transpose,
		      int special, AMESOS_MatrixType matrix_type ) {


  int iam = Comm.MyPID() ;

  
  //  int hatever;
  //  if ( iam == 0 )  std::cin >> hatever ; 
  Comm.Barrier();


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    NonContiguousMap = true; 
    // Call routine to read in unsymmetric Triplet matrix
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, 
							readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);

  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    
    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 
      superluserial.Solve() ; 
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist.Solve( true ) ) ;
#endif 
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist2.Solve( true ) ) ;
#endif 
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ; 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 
    
      EPETRA_CHK_ERR( dscpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( umfpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Klu klu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ; 
      EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superluserial.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superluserial.NumericFactorization(  ) ); 

      EPETRA_CHK_ERR( superluserial.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARKLETE
    } else if ( SparseSolver == PARKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Parklete parklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superludist superludist( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 

      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.Solve( ) ); 
#endif
#ifdef TEST_SPOOLESSERIAL 
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
    //    SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;


    if ( false && iam == 0 ) { 

      std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; 
      for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) {
	std::cout << "i=" << i 
	     << " error = " << error[i] 
	     << " xnorm = " << xnorm[i] 
	     << " residual = " << residual[i] 
	     << " bnorm = " << bnorm[i] 
	     << std::endl ; 
      
      }
    
      std::cout << std::endl << " max_resid = " << max_resid ; 
      std::cout << " max_error = " << max_error << std::endl ; 
      std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ;

    }
  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

return 0 ;
}

Esempio n. 28

Mostra file

File: Davidson.cpp Progetto: Tech-XCorp/Trilinos

int Davidson::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) {

  // Computes the smallest eigenvalues and the corresponding eigenvectors
  // of the generalized eigenvalue problem
  // 
  //      K X = M X Lambda
  // 
  // using a generalized Davidson algorithm
  //
  // Note that if M is not specified, then  K X = X Lambda is solved.
  // 
  // Input variables:
  // 
  // numEigen  (integer) = Number of eigenmodes requested
  // 
  // Q (Epetra_MultiVector) = Converged eigenvectors
  //                   The number of columns of Q must be at least numEigen + blockSize.
  //                   The rows of Q are distributed across processors.
  //                   At exit, the first numEigen columns contain the eigenvectors requested.
  // 
  // lambda (array of doubles) = Converged eigenvalues
  //                   At input, it must be of size numEigen + blockSize.
  //                   At exit, the first numEigen locations contain the eigenvalues requested.
  //
  // startingEV (integer) = Number of existing converged eigenvectors
  //                   We assume that the user has check the eigenvectors and 
  //                   their M-orthonormality.
  //
  // Return information on status of computation
  // 
  // info >=   0 >> Number of converged eigenpairs at the end of computation
  // 
  // // Failure due to input arguments
  // 
  // info = -  1 >> The stiffness matrix K has not been specified.
  // info = -  2 >> The maps for the matrix K and the matrix M differ.
  // info = -  3 >> The maps for the matrix K and the preconditioner P differ.
  // info = -  4 >> The maps for the vectors and the matrix K differ.
  // info = -  5 >> Q is too small for the number of eigenvalues requested.
  // info = -  6 >> Q is too small for the computation parameters.
  //
  // info = -  8 >> The number of blocks is too small for the number of eigenvalues.
  // 
  // info = - 10 >> Failure during the mass orthonormalization
  // 
  // info = - 30 >> MEMORY
  //

  // Check the input parameters
  
  if (numEigen <= startingEV) {
    return startingEV;
  }

  int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen));
  if (info < 0)
    return info;

  int myPid = MyComm.MyPID();

  if (numBlock*blockSize < numEigen) {
    if (myPid == 0) {
      cerr << endl;
      cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than ";
      cerr << " the number of eigenvalues !!!\n";
      cerr << " Number of blocks = " << numBlock << endl;
      cerr << " Size of blocks = " << blockSize << endl;
      cerr << " Number of eigenvalues = " << numEigen << endl;
      cerr << endl;
    }
    return -8;
  }

  // Get the weight for approximating the M-inverse norm
  Epetra_Vector *vectWeight = 0;
  if (normWeight) {
    vectWeight = new Epetra_Vector(View, Q.Map(), normWeight);
  }

  int knownEV = startingEV;
  int localVerbose = verbose*(myPid==0);

  // Define local block vectors
  //
  // MX = Working vectors (storing M*X if M is specified, else pointing to X)
  // KX = Working vectors (storing K*X)
  //
  // R = Residuals

  int xr = Q.MyLength();
  int dimSearch = blockSize*numBlock;

  Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize);
  if (knownEV > 0) {
    Epetra_MultiVector copyX(View, Q, knownEV, blockSize);
    copyX.Random();
  }
  else {
    X.Random();
  }

  int tmp;
  tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr;

  double *work1 = new (nothrow) double[tmp]; 
  if (work1 == 0) {
    if (vectWeight)
      delete vectWeight;
    info = -30;
    return info;
  }
  memRequested += sizeof(double)*tmp/(1024.0*1024.0);

  highMem = (highMem > currentSize()) ? highMem : currentSize();

  double *tmpD = work1;

  Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize);
  tmpD = tmpD + xr*blockSize;

  Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize);
  tmpD = (M) ? tmpD + xr*blockSize : tmpD;

  Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize);

  // Define arrays
  //
  // theta = Store the local eigenvalues (size: dimSearch)
  // normR = Store the norm of residuals (size: blockSize)
  //
  // KK = Local stiffness matrix         (size: dimSearch x dimSearch)
  //
  // S = Local eigenvectors              (size: dimSearch x dimSearch)
  //
  // tmpKK = Local workspace             (size: blockSize x blockSize)

  int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize;
  double *work2 = new (nothrow) double[lwork2];
  if (work2 == 0) {
    if (vectWeight)
      delete vectWeight;
    delete[] work1;
    info = -30;
    return info;
  }

  memRequested += sizeof(double)*lwork2/(1024.0*1024.0);
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  tmpD = work2;

  double *theta = tmpD;
  tmpD = tmpD + dimSearch;

  double *normR = tmpD;
  tmpD = tmpD + blockSize;

  double *KK = tmpD;
  tmpD = tmpD + dimSearch*dimSearch;
  memset(KK, 0, dimSearch*dimSearch*sizeof(double));

  double *S = tmpD;
  tmpD = tmpD + dimSearch*dimSearch;

  double *tmpKK = tmpD;

  // Define an array to store the residuals history
  if (localVerbose > 2) {
    resHistory = new (nothrow) double[maxIterEigenSolve*blockSize];
    spaceSizeHistory = new (nothrow) int[maxIterEigenSolve];
    if ((resHistory == 0) || (spaceSizeHistory == 0)) {
      if (vectWeight)
        delete vectWeight;
      delete[] work1;
      delete[] work2;
      info = -30;
      return info;
    }
    historyCount = 0;
  }

  // Miscellaneous definitions

  bool reStart = false;
  numRestart = 0;

  bool criticalExit = false;

  int bStart = 0;
  int offSet = 0;
  numBlock = (dimSearch/blockSize) - (knownEV/blockSize);

  int nFound = blockSize;
  int i, j;

  if (localVerbose > 0) {
    cout << endl;
    cout << " *|* Problem: ";
    if (M)
      cout << "K*Q = M*Q D ";
    else
      cout << "K*Q = Q D ";
    if (Prec)
      cout << " with preconditioner";
    cout << endl;
    cout << " *|* Algorithm = Davidson algorithm (block version)" << endl;
    cout << " *|* Size of blocks = " << blockSize << endl;
    cout << " *|* Largest size of search space = " << numBlock*blockSize << endl;
    cout << " *|* Number of requested eigenvalues = " << numEigen << endl;
    cout.precision(2);
    cout.setf(ios::scientific, ios::floatfield);
    cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl;
    cout << " *|* Norm used for convergence: ";
    if (vectWeight)
      cout << "weighted L2-norm with user-provided weights" << endl;
    else
      cout << "L^2-norm" << endl;
    if (startingEV > 0)
      cout << " *|* Input converged eigenvectors = " << startingEV << endl;
    cout << "\n -- Start iterations -- \n";
  }

  int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);

  timeOuterLoop -= MyWatch.WallTime();
  outerIter = 0;
  while (outerIter <= maxIterEigenSolve) {

    highMem = (highMem > currentSize()) ? highMem : currentSize();

    int nb;
    for (nb = bStart; nb < maxBlock; ++nb) {

      outerIter += 1;
      if (outerIter > maxIterEigenSolve)
        break;

      int localSize = nb*blockSize;

      Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize);

      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(Xcurrent, MX);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      // Orthonormalize X against the known eigenvectors and the previous vectors
      // Note: Use R as a temporary work space
      timeOrtho -= MyWatch.WallTime();
      if (nb == bStart) {
        if (nFound > 0) {
          if (knownEV == 0) {
            info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values());
          }
          else {
            Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
            info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values());
          }
        }
        nFound = 0;
      }
      else {
        Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
        info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values());
      }
      timeOrtho += MyWatch.WallTime();

      // Exit the code when the number of vectors exceeds the space dimension
      if (info < 0) {
        delete[] work1;
        delete[] work2;
        if (vectWeight)
          delete vectWeight;
        return -10;
      }

      timeStifOp -= MyWatch.WallTime();
      K->Apply(Xcurrent, KX);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

      // Check the orthogonality properties of X
      if (verbose > 2) {
        if (knownEV + localSize == 0)
          accuracyCheck(&Xcurrent, &MX, 0);
        else {
          Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize);
          accuracyCheck(&Xcurrent, &MX, &copyQ);
        }
        if (localVerbose > 0)
          cout << endl;
      } // if (verbose > 2)

      // Define the local stiffness matrix
      // Note: S is used as a workspace
      timeLocalProj -= MyWatch.WallTime();
      for (j = 0; j <= nb; ++j) {
        callBLAS.GEMM('T', 'N', blockSize, blockSize, xr,
                      1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr,
                      0.0, tmpKK, blockSize);
        MyComm.SumAll(tmpKK, S, blockSize*blockSize);
        int iC;
        for (iC = 0; iC < blockSize; ++iC) {
          double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch;
          memcpy(Kpointer, S + iC*blockSize, blockSize*sizeof(double));
        }
      }
      timeLocalProj += MyWatch.WallTime();

      // Perform a spectral decomposition
      timeLocalSolve -= MyWatch.WallTime();
      int nevLocal = localSize + blockSize;
      info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0,
                                    nevLocal, S, dimSearch, theta, localVerbose, 10);
      timeLocalSolve += MyWatch.WallTime();

      if (info != 0) {
        // Stop as spectral decomposition has a critical failure
        if (info < 0) {
          criticalExit = true;
          break;
        }
        // Restart as spectral decomposition failed
        if (localVerbose > 0) {
          cout << " Iteration " << outerIter;
          cout << "- Failure for spectral decomposition - RESTART with new random search\n";
        }
        reStart = true;
        numRestart += 1;
        timeRestart -= MyWatch.WallTime();
        Epetra_MultiVector Xinit(View, X, knownEV, blockSize);
        Xinit.Random();
        timeRestart += MyWatch.WallTime();
        nFound = blockSize;
        bStart = 0;
        break;
      } // if (info != 0)

      // Update the search space
      // Note: Use KX as a workspace
      timeLocalUpdate -= MyWatch.WallTime();
      callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr,
                    S, dimSearch, 0.0, KX.Values(), xr);
      timeLocalUpdate += MyWatch.WallTime();

      // Apply the mass matrix for the next block
      timeMassOp -= MyWatch.WallTime();
      if (M)
        M->Apply(KX, MX);
      timeMassOp += MyWatch.WallTime();
      massOp += blockSize;

      // Apply the stiffness matrix for the next block
      timeStifOp -= MyWatch.WallTime();
      K->Apply(KX, R);
      timeStifOp += MyWatch.WallTime();
      stifOp += blockSize;

      // Form the residuals
      timeResidual -= MyWatch.WallTime();
      if (M) {
        for (j = 0; j < blockSize; ++j) {
          callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr);
        }
      }
      else {
        // Note KX contains the updated block
        for (j = 0; j < blockSize; ++j) {
          callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr);
        }
      }
      timeResidual += MyWatch.WallTime();
      residual += blockSize;

      // Compute the norm of residuals
      timeNorm -= MyWatch.WallTime();
      if (vectWeight) {
        R.NormWeighted(*vectWeight, normR);
      }
      else {
        R.Norm2(normR);
      }
      // Scale the norms of residuals with the eigenvalues
      // Count the number of converged eigenvectors
      nFound = 0;
      for (j = 0; j < blockSize; ++j) {
        normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j];
        if (normR[j] < tolEigenSolve)
          nFound += 1;
      } // for (j = 0; j < blockSize; ++j)
      timeNorm += MyWatch.WallTime();

      // Store the residual history
      if (localVerbose > 2) {
        memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double));
        spaceSizeHistory[historyCount] = localSize + blockSize;
        historyCount += 1;
      }
      maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize;
      sumSpaceSize += localSize + blockSize;

      // Print information on current iteration
      if (localVerbose > 0) {
        cout << " Iteration " << outerIter << " - Number of converged eigenvectors ";
        cout << knownEV + nFound << endl;
      } // if (localVerbose > 0)

      if (localVerbose > 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
        cout.precision(2);
        for (i=0; i<nevLocal; ++i) {
          cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i;
          cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield);  
          cout << " = " << theta[i] << endl;
        }
        cout << endl;
      }

      // Exit the loop to treat the converged eigenvectors
      if (nFound > 0) {
        nb += 1;
        offSet = 0;
        break;
      }

      // Apply the preconditioner on the residuals
      // Note: Use KX as a workspace
      if (maxBlock == 1) {
        if (Prec) {
          timePrecOp -= MyWatch.WallTime();
          Prec->ApplyInverse(R, Xcurrent);
          timePrecOp += MyWatch.WallTime();
          precOp += blockSize;
        }
        else {
          memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*sizeof(double));
        }
        timeRestart -= MyWatch.WallTime();
        Xcurrent.Update(1.0, KX, -1.0);
        timeRestart += MyWatch.WallTime();
        break;
      } // if (maxBlock == 1)

      if (nb == maxBlock - 1) {
        nb += 1;
        break;
      }

      Epetra_MultiVector Xnext(View, X, knownEV+localSize+blockSize, blockSize);
      if (Prec) {
        timePrecOp -= MyWatch.WallTime();
        Prec->ApplyInverse(R, Xnext);
        timePrecOp += MyWatch.WallTime();
        precOp += blockSize;
      }
      else {
        memcpy(Xnext.Values(), R.Values(), blockSize*xr*sizeof(double));
      }

    } // for (nb = bStart; nb < maxBlock; ++nb)

    if (outerIter > maxIterEigenSolve)
      break;

    if (reStart == true) {
      reStart = false;
      continue;
    }

    if (criticalExit == true)
      break;

    // Store the final converged eigenvectors
    if (knownEV + nFound >= numEigen) {
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] < tolEigenSolve) {
          memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
          lambda[knownEV] = theta[j];
          knownEV += 1;
        }
      }
      if (localVerbose == 1) {
        cout << endl;
        cout.precision(2);
        cout.setf(ios::scientific, ios::floatfield);
        for (i=0; i<blockSize; ++i) {
          cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i;
          cout << " = " << normR[i] << endl;
        }
        cout << endl;
      }  
      break;
    } // if (knownEV + nFound >= numEigen)

    // Treat the particular case of 1 block
    if (maxBlock == 1) {
      if (nFound > 0) {
        double *Xpointer = X.Values() + (knownEV+nFound)*xr;
        nFound = 0;
        for (j = 0; j < blockSize; ++j) {
          if (normR[j] < tolEigenSolve) {
            memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double));
            lambda[knownEV] = theta[j];
            knownEV += 1;
            nFound += 1;
          }
          else {
            memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double));
          }
        }
        Epetra_MultiVector Xnext(View, X, knownEV + blockSize - nFound, nFound);
        Xnext.Random();
      }
      else {
        nFound = blockSize;
      }
      continue;
    }

    // Define the restarting block when maxBlock > 1
    if (nFound > 0) {
      int firstIndex = blockSize;
      for (j = 0; j < blockSize; ++j) {
        if (normR[j] >= tolEigenSolve) {
          firstIndex = j;
          break;
        }
      } // for (j = 0; j < blockSize; ++j)
      while (firstIndex < nFound) {
        for (j = firstIndex; j < blockSize; ++j) {
          if (normR[j] < tolEigenSolve) {
            // Swap the j-th and firstIndex-th position
            callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1);
            callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1);
            callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1);
            break;
          }
        } // for (j = firstIndex; j < blockSize; ++j)
        for (j = 0; j < blockSize; ++j) {
          if (normR[j] >= tolEigenSolve) {
            firstIndex = j;
            break;
          }
        } // for (j = 0; j < blockSize; ++j)
      } // while (firstIndex < nFound)

      // Copy the converged eigenvalues
      memcpy(lambda + knownEV, theta, nFound*sizeof(double));

    } // if (nFound > 0)

    // Define the restarting size
    bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0;

    // Define the restarting space and local stiffness
    timeRestart -= MyWatch.WallTime();
    memset(KK, 0, nb*blockSize*dimSearch*sizeof(double));
    for (j = 0; j < bStart*blockSize; ++j) {
      KK[j + j*dimSearch] = theta[j + nFound];
    }
    // Form the restarting space
    int oldCol = nb*blockSize;
    int newCol = nFound + (bStart+1)*blockSize;
    newCol = (newCol > oldCol) ? oldCol : newCol;
    callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info);
    callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta,
                      X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info);
    timeRestart += MyWatch.WallTime();

    if (nFound == 0)
      offSet += 1;

    knownEV += nFound;
    maxBlock = (dimSearch/blockSize) - (knownEV/blockSize);

    // Put random vectors if the Rayleigh Ritz vectors are not enough
    newCol = nFound + (bStart+1)*blockSize;
    if (newCol > oldCol) {
      Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound);
      Xnext.Random();
      continue;
    }

    nFound = 0;

  } // while (outerIter <= maxIterEigenSolve)
  timeOuterLoop += MyWatch.WallTime();
  highMem = (highMem > currentSize()) ? highMem : currentSize();

  // Clean memory
  delete[] work1;
  delete[] work2;
  if (vectWeight)
    delete vectWeight;

  // Sort the eigenpairs
  timePostProce -= MyWatch.WallTime();
  if ((info == 0) && (knownEV > 0)) {
    mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength());
  }
  timePostProce += MyWatch.WallTime();

  return (info == 0) ? knownEV : info;

}

Esempio n. 29

Mostra file

File: shylu_driver.cpp Progetto: 00liujj/trilinos

int main(int argc, char *argv[])
{
#ifdef HAVE_MPI
    Teuchos::GlobalMPISession mpiSession(&argc, &argv, 0);
    Epetra_MpiComm Comm(MPI_COMM_WORLD);
#else
    Epetra_SerialComm Comm;
#endif
    int nProcs, myPID ;
    Teuchos::ParameterList pLUList ;        // ParaLU parameters
    Teuchos::ParameterList isoList ;        // Isorropia parameters
    Teuchos::ParameterList shyLUList ;    // shyLU parameters
    Teuchos::ParameterList ifpackList ;    // shyLU parameters
    string ipFileName = "ShyLU.xml";       // TODO : Accept as i/p

    nProcs = mpiSession.getNProc();
    myPID = Comm.MyPID();

    if (myPID == 0)
    {
        cout <<"Parallel execution: nProcs="<< nProcs << endl;
    }

    // =================== Read input xml file =============================
    Teuchos::updateParametersFromXmlFile(ipFileName, &pLUList);
    isoList = pLUList.sublist("Isorropia Input");
    shyLUList = pLUList.sublist("ShyLU Input");
    shyLUList.set("Outer Solver Library", "AztecOO");
    // Get matrix market file name
    string MMFileName = Teuchos::getParameter<string>(pLUList, "mm_file");
    string prec_type = Teuchos::getParameter<string>(pLUList, "preconditioner");
    int maxiters = Teuchos::getParameter<int>(pLUList, "Outer Solver MaxIters");
    double tol = Teuchos::getParameter<double>(pLUList, "Outer Solver Tolerance");
    string rhsFileName = pLUList.get<string>("rhs_file", "");

    if (myPID == 0)
    {
        cout << "Input :" << endl;
        cout << "ParaLU params " << endl;
        pLUList.print(std::cout, 2, true, true);
        cout << "Matrix market file name: " << MMFileName << endl;
    }

    // ==================== Read input Matrix ==============================
    Epetra_CrsMatrix *A;
    Epetra_MultiVector *b1;

    int err = EpetraExt::MatrixMarketFileToCrsMatrix(MMFileName.c_str(), Comm,
                                                        A);
    //EpetraExt::MatlabFileToCrsMatrix(MMFileName.c_str(), Comm, A);
    //assert(err != 0);
    //cout <<"Done reading the matrix"<< endl;
    int n = A->NumGlobalRows();
    //cout <<"n="<< n << endl;

    // Create input vectors
    Epetra_Map vecMap(n, 0, Comm);
    if (rhsFileName != "")
    {
        err = EpetraExt::MatrixMarketFileToMultiVector(rhsFileName.c_str(),
                                         vecMap, b1);
    }
    else
    {
        b1 = new Epetra_MultiVector(vecMap, 1, false);
        b1->PutScalar(1.0);
    }

    Epetra_MultiVector x(vecMap, 1);
    //cout << "Created the vectors" << endl;

    // Partition the matrix with hypergraph partitioning and redisstribute
    Isorropia::Epetra::Partitioner *partitioner = new
                            Isorropia::Epetra::Partitioner(A, isoList, false);
    partitioner->partition();
    Isorropia::Epetra::Redistributor rd(partitioner);

    Epetra_CrsMatrix *newA;
    Epetra_MultiVector *newX, *newB; 
    rd.redistribute(*A, newA);
    delete A;
    A = newA;

    rd.redistribute(x, newX);
    rd.redistribute(*b1, newB);

    Epetra_LinearProblem problem(A, newX, newB);

    AztecOO solver(problem);

    ifpackList ;
    Ifpack_Preconditioner *prec;
    ML_Epetra::MultiLevelPreconditioner *MLprec;
    if (prec_type.compare("ShyLU") == 0)
    {
        prec = new Ifpack_ShyLU(A);
        prec->SetParameters(shyLUList);
        prec->Initialize();
        prec->Compute();
        //(dynamic_cast<Ifpack_ShyLU *>(prec))->JustTryIt();
        //cout << " Going to set it in solver" << endl ;
        solver.SetPrecOperator(prec);
        //cout << " Done setting the solver" << endl ;
    }
    else if (prec_type.compare("ILU") == 0)
    {
        ifpackList.set( "fact: level-of-fill", 1 );
        prec = new Ifpack_ILU(A);
        prec->SetParameters(ifpackList);
        prec->Initialize();
        prec->Compute();
        solver.SetPrecOperator(prec);
    }
    else if (prec_type.compare("ILUT") == 0)
    {
        ifpackList.set( "fact: ilut level-of-fill", 2 );
        ifpackList.set( "fact: drop tolerance", 1e-8);
        prec = new Ifpack_ILUT(A);
        prec->SetParameters(ifpackList);
        prec->Initialize();
        prec->Compute();
        solver.SetPrecOperator(prec);
    }
    else if (prec_type.compare("ML") == 0)
    {
        Teuchos::ParameterList mlList; // TODO : Take it from i/p
        MLprec = new ML_Epetra::MultiLevelPreconditioner(*A, mlList, true);
        solver.SetPrecOperator(MLprec);
    }

    solver.SetAztecOption(AZ_solver, AZ_gmres);
    solver.SetMatrixName(333);
    //solver.SetAztecOption(AZ_output, 1);
    //solver.SetAztecOption(AZ_conv, AZ_Anorm);
    //cout << "Going to iterate for the global problem" << endl;

    solver.Iterate(maxiters, tol);

    // compute ||Ax - b||
    double Norm;
    Epetra_MultiVector Ax(vecMap, 1);

    Epetra_MultiVector *newAx; 
    rd.redistribute(Ax, newAx);
    A->Multiply(false, *newX, *newAx);
    newAx->Update(1.0, *newB, -1.0);
    newAx->Norm2(&Norm);
    double ANorm = A->NormOne();

    cout << "|Ax-b |/|A| = " << Norm/ANorm << endl;

    delete newAx;
    if (prec_type.compare("ML") == 0)
    {
        delete MLprec;
    }
    else
    {
        delete prec;
    }

    delete b1;
    delete newX;
    delete newB;
    delete A;
    delete partitioner;
}

Esempio n. 30

Mostra file

File: shylu_probing_operator.cpp Progetto: gitter-badger/quinoa

int ShyLU_Probing_Operator::Apply(const Epetra_MultiVector &X,
            Epetra_MultiVector &Y) const
{
#ifdef TIMING_OUTPUT
    apply_time_->start();
#endif

    int nvectors = X.NumVectors();
    bool local = (C_->Comm().NumProc() == 1);
    int err;
    //cout << "No of colors after probing" << nvectors << endl;

#ifdef TIMING_OUTPUT
    matvec_time_->start();
#endif

    err = G_->Multiply(false, X, *temp2);
    assert(err == 0);
    if (!local)
        err = C_->Multiply(false, X, *temp);
    else
    {
        // localize X
        double *values;
        int mylda;
        X.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(X.Map().NumMyElements(), X.Map().NumMyElements(),
                   X.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Xl(View, SerialMap, values, mylda, X.NumVectors());
       err = C_->Multiply(false, Xl, *temp);
    }
    assert(err == 0);

#ifdef TIMING_OUTPUT
    matvec_time_->stop();
#endif

    int nrows = C_->RowMap().NumMyElements();

#ifdef DEBUG
    cout << "DEBUG MODE" << endl;
    assert(nrows == localDRowMap_->NumGlobalElements());

    int gids[nrows], gids1[nrows];
    C_->RowMap().MyGlobalElements(gids);
    localDRowMap_->MyGlobalElements(gids1);

    for (int i = 0; i < nrows; i++)
    {
       assert(gids[i] == gids1[i]);
    }
#endif

#ifdef TIMING_OUTPUT
    localize_time_->start();
#endif

    //int err;
    int lda;
    double *values;
    if (!local)
    {
        err = temp->ExtractView(&values, &lda);
        assert (err == 0);

        // copy to local vector //TODO: OMP parallel
        assert(lda == nrows);

    //#pragma omp parallel for shared(nvectors, nrows, values)
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = ltemp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    localize_time_->stop();
    trisolve_time_->start();
#endif

    if (!local)
    {
        LP_->SetRHS(ltemp.getRawPtr());
    }
    else
    {
        //LP_->SetRHS(temp.getRawPtr());
    }
    //LP_->SetLHS(localX.getRawPtr());

    //TODO: Why not just in Reset(). Check the distr path.
    ssym_->OrigLP->SetLHS(localX.getRawPtr());
    ssym_->OrigLP->SetRHS(temp.getRawPtr());
    ssym_->ReIdx_LP->fwd();
    solver_->Solve();

#ifdef TIMING_OUTPUT
    trisolve_time_->stop();
    dist_time_->start();
#endif

    if (!local)
    {
        err = localX->ExtractView(&values, &lda);
        assert (err == 0);

        //Copy back to dist vector //TODO: OMP parallel
    //#pragma omp parallel for
        for (int v = 0; v < nvectors; v++)
        {
           for (int i = 0; i < nrows; i++)
           {
               err = temp->ReplaceMyValue(i, v, values[i+v*lda]);
               assert (err == 0);
           }
        }
    }

#ifdef TIMING_OUTPUT
    dist_time_->stop();
    matvec2_time_->start();
#endif

    if (!local)
    {
        R_->Multiply(false, *temp, Y);
    }
    else
    {
        // Should Y be localY in Multiply and then exported to Y ?? TODO:
        // Use view mode ?
        double *values;
        int mylda;
        Y.ExtractView(&values, &mylda);

       Epetra_SerialComm LComm;        // Use Serial Comm for the local blocks.
       Epetra_Map SerialMap(Y.Map().NumMyElements(), Y.Map().NumMyElements(),
                   Y.Map().MyGlobalElements(), 0, LComm);
       Epetra_MultiVector Yl(View, SerialMap, values, mylda, Y.NumVectors());
        R_->Multiply(false, *localX, Yl);
    }

#ifdef TIMING_OUTPUT
    matvec2_time_->stop();
    update_time_->start();
#endif

    err = Y.Update(1.0, *temp2, -1.0);
    //cout << Y.MyLength() << " " << temp2.MyLength() << endl;
    assert(err == 0);

#ifdef TIMING_OUTPUT
    update_time_->stop();
    apply_time_->stop();
#endif
    cntApply++;
    return 0;
}