コード例 #1
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
		    Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {

  Epetra_MultiVector z(*b);
  Epetra_MultiVector r(*b);
  Epetra_SerialDenseVector resvec(b->NumVectors());

  Epetra_Flops flopcounter;
  Epetra_Time timer(A->Comm());

  for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans

    bool TransA = (j==1);

    //10 matvecs
    for( int i = 0; i < 10; ++i )
      A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact

    double elapsed_time = timer.ElapsedTime();
    double total_flops = A->Flops();

    // Compute residual
    if (TransA)
      r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
      r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z


    if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
    double MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
		      << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
    if (summary) {
      if (A->Comm().NumProc()==1) {
	if (TransA) cout << "TransMv" << '\t';
	else cout << "NoTransMv" << '\t';
      cout << MFLOPs << endl;
コード例 #2
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
int main(int argc, char *argv[])
  int ierr = 0, i, j, k;
  bool debug = false;

  Epetra_MpiComm Comm( MPI_COMM_WORLD );
  Epetra_SerialComm Comm;

  bool verbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  if (verbose && Comm.MyPID()==0)
    cout << Epetra_Version() << endl << endl;

  int rank = Comm.MyPID();
  //  char tmp;
  //  if (rank==0) cout << "Press any key to continue..."<< endl;
  //  if (rank==0) cin >> tmp;
  //  Comm.Barrier();

  Comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose) cout << Comm <<endl;

  //  bool verbose1 = verbose;

  // Redefine verbose to only print on PE 0
  if (verbose && rank!=0) verbose = false;
  int N = 20;
  int NRHS = 4;
  double * A = new double[N*N];
  double * A1 = new double[N*N];
  double * X = new double[(N+1)*NRHS];
  double * X1 = new double[(N+1)*NRHS];
  int LDX = N+1;
  int LDX1 = N+1;
  double * B = new double[N*NRHS];
  double * B1 = new double[N*NRHS];
  int LDB = N;
  int LDB1 = N;

  int LDA = N;
  int LDA1 = LDA;
  double OneNorm1;
  bool Transpose = false;

  Epetra_SerialDenseSolver solver;
  Epetra_SerialDenseMatrix * Matrix;
  for (int kk=0; kk<2; kk++) {
    for (i=1; i<=N; i++) {
      GenerateHilbert(A, LDA, i);
      OneNorm1 = 0.0;
      for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n

      if (kk==0) {
	Matrix = new Epetra_SerialDenseMatrix(View, A, LDA, i, i);
	LDA1 = LDA;
      else {
	Matrix = new Epetra_SerialDenseMatrix(Copy, A, LDA, i, i);
	LDA1 = i;

      GenerateHilbert(A1, LDA1, i);
      if (kk==1) {
	Transpose = true;

      for (k=0; k<NRHS; k++)
	for (j=0; j<i; j++) {
	  B[j+k*LDB] = 1.0/((double) (k+3)*(j+3));
	  B1[j+k*LDB1] = B[j+k*LDB1];
      Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS);
      Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS);

      solver.SetVectors(Epetra_X, Epetra_B);

      ierr = check(solver, A1, LDA1,  i, NRHS, OneNorm1, B1, LDB1,  X1, LDX1, Transpose, verbose);
      assert (ierr>-1);
      delete Matrix;
      if (ierr!=0) {
	if (verbose) cout << "Factorization failed due to bad conditioning.  This is normal if RCOND is small."
			  << endl;

  delete [] A;
  delete [] A1;
  delete [] X;
  delete [] X1;
  delete [] B;
  delete [] B1;

  // Now test norms and scaling functions

  Epetra_SerialDenseMatrix D;
  double ScalarA = 2.0;

  int DM = 10;
  int DN = 8;
  D.Shape(DM, DN);
  for (j=0; j<DN; j++)
    for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ;

  //cout << D << endl;

  double NormInfD_ref = (double)(DM*(DN*(DN+1))/2);
  double NormOneD_ref = (double)((DM*DN*(DM*DN+1))/2 - (DM*(DN-1)*(DM*(DN-1)+1))/2 );

  double NormInfD = D.NormInf();
  double NormOneD = D.NormOne();

  if (verbose) {
    cout << " *** Before scaling *** " << endl
	 << " Computed one-norm of test matrix = " << NormOneD << endl
	 << " Expected one-norm                = " << NormOneD_ref << endl
	 << " Computed inf-norm of test matrix = " << NormInfD << endl
	 << " Expected inf-norm                = " << NormInfD_ref << endl;
  D.Scale(ScalarA); // Scale entire D matrix by this value
  NormInfD = D.NormInf();
  NormOneD = D.NormOne();
  if (verbose) {
    cout << " *** After scaling *** " << endl
	 << " Computed one-norm of test matrix = " << NormOneD << endl
	 << " Expected one-norm                = " << NormOneD_ref*ScalarA << endl
	 << " Computed inf-norm of test matrix = " << NormInfD << endl
	 << " Expected inf-norm                = " << NormInfD_ref*ScalarA << endl;

  // Now test that A.Multiply(false, x, y) produces the same result
  // as y.Multiply('N','N', 1.0, A, x, 0.0).

  N = 10;
  int M = 10;
  LDA = N;
  Epetra_SerialDenseMatrix smallA(N, M, false);
  Epetra_SerialDenseMatrix x(N, 1, false);
  Epetra_SerialDenseMatrix y1(N, 1, false);
  Epetra_SerialDenseMatrix y2(N, 1, false);

  for(i=0; i<N; ++i) {
    for(j=0; j<M; ++j) {
      smallA(i,j) = 1.0*i+2.0*j+1.0;
    x(i,0) = 1.0;
    y1(i,0) = 0.0;
    y2(i,0) = 0.0;

  //quick check of operator==
  if (x == y1) {
    if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, "
        << "erroneously returned true." << std::endl;

  //quick check of operator!=
  if (x != x) {
    if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, "
        << "erroneously returned true." << std::endl;

  int err1 = smallA.Multiply(false, x, y1);
  int err2 = y2.Multiply('N','N', 1.0, smallA, x, 0.0);
  if (err1 != 0 || err2 != 0) {
    if (verbose) cout << "err in Epetra_SerialDenseMatrix::Multiply"<<endl;

  for(i=0; i<N; ++i) {
    if (y1(i,0) != y2(i,0)) {
      if (verbose) cout << "different versions of Multiply don't match."<<endl;

  // Now test for larger system, both correctness and performance.

  N = 2000;
  NRHS = 5;
  LDA = N;
  LDB = N;
  LDX = N;

  if (verbose) cout << "\n\nComputing factor of an " << N << " x " << N << " general matrix...Please wait.\n\n" << endl;

  // Define A and X

  A = new double[LDA*N];
  X = new double[LDB*NRHS];

  for (j=0; j<N; j++) {
    for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k));
    for (i=0; i<N; i++) {
      if (i==((j+2)%N)) A[i+j*LDA] = 100.0 + i;
      else A[i+j*LDA] = -11.0/((double) (i+5)*(j+2));

  // Define Epetra_SerialDenseMatrix object

  Epetra_SerialDenseMatrix BigMatrix(Copy, A, LDA, N, N);
  Epetra_SerialDenseMatrix OrigBigMatrix(View, A, LDA, N, N);

  Epetra_SerialDenseSolver BigSolver;

  // Time factorization

  Epetra_Flops counter;
  Epetra_Time Timer(Comm);
  double tstart = Timer.ElapsedTime();
  ierr = BigSolver.Factor();
  if (ierr!=0 && verbose) cout << "Error in factorization = "<<ierr<< endl;
  double time = Timer.ElapsedTime() - tstart;

  double FLOPS = counter.Flops();
  double MFLOPS = FLOPS/time/1000000.0;
  if (verbose) cout << "MFLOPS for Factorization = " << MFLOPS << endl;

  // Define Left hand side and right hand side
  Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS);
  Epetra_SerialDenseMatrix RHS;
  RHS.Shape(N,NRHS); // Allocate RHS

  // Compute RHS from A and X

  Epetra_Flops RHS_counter;
  tstart = Timer.ElapsedTime();
  RHS.Multiply('N', 'N', 1.0, OrigBigMatrix, LHS, 0.0);
  time = Timer.ElapsedTime() - tstart;

  Epetra_SerialDenseMatrix OrigRHS = RHS;

  FLOPS = RHS_counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << endl;

  // Set LHS and RHS and solve
  BigSolver.SetVectors(LHS, RHS);

  tstart = Timer.ElapsedTime();
  ierr = BigSolver.Solve();
  if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl;
  else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl;
  time = Timer.ElapsedTime() - tstart;

  FLOPS = BigSolver.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << endl;

  double * resid = new double[NRHS];
  bool OK = Residual(N, NRHS, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(),
		     OrigRHS.A(), OrigRHS.LDA(), resid);

  if (verbose) {
    if (!OK) cout << "************* Residual do not meet tolerance *************" << endl;
    for (i=0; i<NRHS; i++)
      cout << "Residual[" << i <<"] = "<< resid[i] << endl;
    cout  << endl;

  // Solve again using the Epetra_SerialDenseVector class for LHS and RHS

  Epetra_SerialDenseVector X2;
  Epetra_SerialDenseVector B2;
  int length = BigMatrix.N();
  {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2

  tstart = Timer.ElapsedTime();
  B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2
  time = Timer.ElapsedTime() - tstart;

  Epetra_SerialDenseVector OrigB2 = B2;

  FLOPS = RHS_counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) cout << "MFLOPS to build single RHS = " << MFLOPS << endl;

  // Set LHS and RHS and solve
  BigSolver.SetVectors(X2, B2);

  tstart = Timer.ElapsedTime();
  ierr = BigSolver.Solve();
  time = Timer.ElapsedTime() - tstart;
  if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl;
  else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl;

  FLOPS = counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) cout << "MFLOPS to solve single RHS = " << MFLOPS << endl;

  OK = Residual(N, 1, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(), OrigB2.A(),
		OrigB2.LDA(), resid);

  if (verbose) {
    if (!OK) cout << "************* Residual do not meet tolerance *************" << endl;
      cout << "Residual = "<< resid[0] << endl;
  delete [] resid;
  delete [] A;
  delete [] X;

  // Now test default constructor and index operators

  N = 5;
  Epetra_SerialDenseMatrix C; // Implicit call to default constructor, should not need to call destructor
  C.Shape(5,5); // Make it 5 by 5
  double * C1 = new double[N*N];
  GenerateHilbert(C1, N, N); // Generate Hilber matrix

  C1[1+2*N] = 1000.0;  // Make matrix nonsymmetric

  // Fill values of C with Hilbert values
  for (i=0; i<N; i++)
    for (j=0; j<N; j++)
      C(i,j) = C1[i+j*N];

  // Test if values are correctly written and read
  for (i=0; i<N; i++)
    for (j=0; j<N; j++) {
      assert(C(i,j) == C1[i+j*N]);
      assert(C(i,j) == C[j][i]);

  if (verbose)
    cout << "Default constructor and index operator check OK.  Values of Hilbert matrix = "
	 << endl << C << endl
	 << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << endl;

  delete [] C1;

  // now test sized/shaped constructor
  Epetra_SerialDenseMatrix shapedMatrix(10, 12);
  assert(shapedMatrix.M() == 10);
  assert(shapedMatrix.N() == 12);
  for(i = 0; i < 10; i++)
    for(j = 0; j < 12; j++)
      assert(shapedMatrix(i, j) == 0.0);
  Epetra_SerialDenseVector sizedVector(20);
  assert(sizedVector.Length() == 20);
  for(i = 0; i < 20; i++)
    assert(sizedVector(i) == 0.0);
  if (verbose)
    cout << "Shaped/sized constructors check OK." << endl;

  // test Copy/View mode in op= and cpy ctr
  int temperr = 0;
  temperr = matrixAssignment(verbose, debug);
  if(verbose && temperr == 0)
    cout << "Operator = checked OK." << endl;
  EPETRA_TEST_ERR(temperr, ierr);
  temperr = matrixCpyCtr(verbose, debug);
  if(verbose && temperr == 0)
    cout << "Copy ctr checked OK." << endl;
  EPETRA_TEST_ERR(temperr, ierr);

  // Test some vector methods

  Epetra_SerialDenseVector v1(3);
  v1[0] = 1.0;
  v1[1] = 3.0;
  v1[2] = 2.0;

  Epetra_SerialDenseVector v2(3);
  v2[0] = 2.0;
  v2[1] = 1.0;
  v2[2] = -2.0;

  temperr = 0;
  if (v1.Norm1()!=6.0) temperr++;
  if (fabs(sqrt(14.0)-v1.Norm2())>1.0e-6) temperr++;
  if (v1.NormInf()!=3.0) temperr++;
  if(verbose && temperr == 0)
    cout << "Vector Norms checked OK." << endl;
  temperr = 0;
  if (v1.Dot(v2)!=1.0) temperr++;
  if(verbose && temperr == 0)
    cout << "Vector Dot product checked OK." << endl;

  MPI_Finalize() ;

/* end main
return ierr ;
コード例 #3
int main(int argc, char *argv[])
  int ierr = 0, i;


  // Initialize MPI


  Epetra_MpiComm Comm( MPI_COMM_WORLD );


  Epetra_SerialComm Comm;


  int MyPID = Comm.MyPID();
  int NumProc = Comm.NumProc();
  bool verbose = (MyPID==0);

  if (verbose)
    cout << Epetra_Version() << endl << endl;

  cout << Comm << endl;

  // Get the number of local equations from the command line
  if (argc!=2)
     if (verbose) 
       cout << "Usage: " << argv[0] << " number_of_equations" << endl;
  long long NumGlobalElements = std::atoi(argv[1]);

  if (NumGlobalElements < NumProc)
     if (verbose)
       cout << "numGlobalBlocks = " << NumGlobalElements 
	    << " cannot be < number of processors = " << NumProc << endl;

  // Construct a Map that puts approximately the same number of 
  // equations on each processor.

  Epetra_Map Map(NumGlobalElements, 0LL, Comm);
  // Get update list and number of local equations from newly created Map.

  int NumMyElements = Map.NumMyElements();

  std::vector<long long> MyGlobalElements(NumMyElements);

  // Create an integer vector NumNz that is used to build the Petra Matrix.
  // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation 
  // on this processor

    std::vector<int> NumNz(NumMyElements);

  // We are building a tridiagonal matrix where each row has (-1 2 -1)
  // So we need 2 off-diagonal terms (except for the first and last equation)

  for (i=0; i<NumMyElements; i++)
    if (MyGlobalElements[i]==0 || MyGlobalElements[i] == NumGlobalElements-1)
      NumNz[i] = 2;
      NumNz[i] = 3;

  // Create a Epetra_Matrix

  Epetra_CrsMatrix A(Copy, Map, &NumNz[0]);
  // Add  rows one-at-a-time
  // Need some vectors to help
  // Off diagonal Values will always be -1

  std::vector<double> Values(2);
  Values[0] = -1.0; Values[1] = -1.0;
  std::vector<long long> Indices(2);
  double two = 2.0;
  int NumEntries;
  for (i=0; i<NumMyElements; i++)
    if (MyGlobalElements[i]==0)
	Indices[0] = 1;
	NumEntries = 1;
    else if (MyGlobalElements[i] == NumGlobalElements-1)
	Indices[0] = NumGlobalElements-2;
	NumEntries = 1;
	Indices[0] = MyGlobalElements[i]-1;
	Indices[1] = MyGlobalElements[i]+1;
	NumEntries = 2;
     ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
     // Put in the diagonal entry
     ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]);
  // Finish up
  ierr = A.FillComplete();

  // Create vectors for Power method

  // variable needed for iteration
  double lambda = 0.0;
  int niters = (int) NumGlobalElements*10;
  double tolerance = 1.0e-2;

  // Iterate
  Epetra_Flops counter;
  Epetra_Time timer(Comm);
  ierr += power_method(A, lambda, niters, tolerance, verbose);
  double elapsed_time = timer.ElapsedTime();
  double total_flops =counter.Flops();
  double MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) 
    cout << "\n\nTotal MFLOPs for first solve = " << MFLOPs << endl<< endl;

  // Increase diagonal dominance
  if (verbose) 
    cout << "\nIncreasing magnitude of first diagonal term, solving again\n\n"
		    << endl;

  if (A.MyGlobalRow(0)) {
    int numvals = A.NumGlobalEntries(0);
    std::vector<double> Rowvals(numvals);
    std::vector<long long> Rowinds(numvals);
    A.ExtractGlobalRowCopy(0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A[0,0]
    for (i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0;

    A.ReplaceGlobalValues(0, numvals, &Rowvals[0], &Rowinds[0]);
  // Iterate (again)
  lambda = 0.0;
  ierr += power_method(A, lambda, niters, tolerance, verbose);
  elapsed_time = timer.ElapsedTime();
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) 
    cout << "\n\nTotal MFLOPs for second solve = " << MFLOPs << endl<< endl;

  // Release all objects
  MPI_Finalize() ;

/* end main
return ierr ;
コード例 #4
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
int main(int argc, char *argv[]) {

  Epetra_MpiComm Comm (MPI_COMM_WORLD);
  Epetra_SerialComm Comm;

  cout << Comm << endl;

  int MyPID = Comm.MyPID();

  bool verbose = false; 
  if (MyPID==0) verbose = true;

  if(argc < 2 && verbose) {
    cerr << "Usage: " << argv[0] 
	 << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl
	 << "where:" << endl
	 << "HB_filename        - filename and path of a Harwell-Boeing data set" << endl
	 << "level_fill         - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl
	 << "level_overlap      - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl
	 << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl
	 << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl
	 << "To specify a non-default value for one of these parameters, you must specify all" << endl
	 << " preceding values but not any subsequent parameters. Example:" << endl
	 << "ifpackHbSerialMsr.exe mymatrix.hb 1  - loads mymatrix.hb, uses level fill of one, all other values are defaults" << endl
	 << endl;


  // Uncomment the next three lines to debug in mpi mode
  //int tmp;
  //if (MyPID==0) cin >> tmp;

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;
  if (Comm.MyPID()==0)	{
    cout << "\n\n****************************************************" << endl;
    cout << "\n Vector redistribute  time (sec) = " << vectorRedistributeTime<< endl;
    cout << "    Matrix redistribute time (sec) = " << matrixRedistributeTime << endl;
    cout << "    Transform to Local  time (sec) = " << fillCompleteTime << endl<< endl;
  Epetra_Vector tmp1(*readMap);
  Epetra_Vector tmp2(map);
  readA->Multiply(false, *readxexact, tmp1);

  A.Multiply(false, xexact, tmp2);
  double residual;
  if (verbose) cout << "Norm of Ax from file            = " << residual << endl;
  if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl;

  //cout << "A from file = " << *readA << endl << endl << endl;

  //cout << "A after dist = " << A << endl << endl << endl;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;


  // Construct ILU preconditioner

  double elapsed_time, total_flops, MFLOPs;
  Epetra_Time timer(Comm);

  int LevelFill = 0;
  if (argc > 2)  LevelFill = atoi(argv[2]);
  if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
  int Overlap = 0;
  if (argc > 3) Overlap = atoi(argv[3]);
  if (verbose) cout << "Using Level Overlap = " << Overlap << endl;
  double Athresh = 0.0;
  if (argc > 4) Athresh = atof(argv[4]);
  if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl;

  double Rthresh = 1.0;
  if (argc > 5) Rthresh = atof(argv[5]);
  if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl;

  Ifpack_IlukGraph * IlukGraph = 0;
  Ifpack_CrsRiluk * ILUK = 0;

  if (LevelFill>-1) {
    elapsed_time = timer.ElapsedTime();
    IlukGraph = new Ifpack_IlukGraph(A.Graph(), LevelFill, Overlap);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;

    Epetra_Flops fact_counter;
    elapsed_time = timer.ElapsedTime();
    ILUK = new Ifpack_CrsRiluk(*IlukGraph);
    int initerr = ILUK->InitValues(A);
    if (initerr!=0) cout << Comm << "InitValues error = " << initerr;
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    //cout << *ILUK << endl;
  double Condest;
  ILUK->Condest(false, Condest);

  if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  int Maxiter = 500;
  double Tolerance = 1.0E-14;

  Epetra_Vector xcomp(map);
  Epetra_Vector resid(map);

  Epetra_Flops counter;

  elapsed_time = timer.ElapsedTime();

  BiCGSTAB(A, xcomp, b, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;

  resid.Update(1.0, xcomp, -1.0, xexact, 0.0); // resid = xcomp - xexact


  if (verbose) cout << "Norm of the difference between exact and computed solutions = " << residual << endl;


  if (ILUK!=0) delete ILUK;
  if (IlukGraph!=0) delete IlukGraph;
  MPI_Finalize() ;

return 0 ;
コード例 #5
main (int argc, char *argv[])
  // These "using" statements make the code a bit more concise.
  using std::cout;
  using std::endl;

  int ierr = 0, i;

  // If Trilinos was built with MPI, initialize MPI, otherwise
  // initialize the serial "communicator" that stands in for MPI.
  MPI_Init (&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
  Epetra_SerialComm Comm;

  const int MyPID = Comm.MyPID();
  const int NumProc = Comm.NumProc();
  // We only allow (MPI) Process 0 to write to stdout.
  const bool verbose = (MyPID == 0);
  const int NumGlobalElements = 100;

  if (verbose)
    cout << Epetra_Version() << endl << endl;

  // Asking the Epetra_Comm to print itself is a good test for whether
  // you are running in an MPI environment.  However, it will print
  // something on all MPI processes, so you should remove it for a
  // large-scale parallel run.
  cout << Comm << endl;

  if (NumGlobalElements < NumProc)
      if (verbose)
        cout << "numGlobalBlocks = " << NumGlobalElements 
             << " cannot be < number of processors = " << NumProc << endl;
      std::exit (EXIT_FAILURE);

  // Construct a Map that puts approximately the same number of rows
  // of the matrix A on each processor.
  Epetra_Map Map (NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.
  int NumMyElements = Map.NumMyElements();

  std::vector<int> MyGlobalElements(NumMyElements);

  // NumNz[i] is the number of nonzero elements in row i of the sparse
  // matrix on this MPI process.  Epetra_CrsMatrix uses this to figure
  // out how much space to allocate.
  std::vector<int> NumNz (NumMyElements);

  // We are building a tridiagonal matrix where each row contains the
  // nonzero elements (-1 2 -1).  Thus, we need 2 off-diagonal terms,
  // except for the first and last row of the matrix.
  for (int i = 0; i < NumMyElements; ++i)
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1)
      NumNz[i] = 2; // First or last row
      NumNz[i] = 3; // Not the (first or last row)

  // Create the Epetra_CrsMatrix.
  Epetra_CrsMatrix A (Copy, Map, &NumNz[0]);

  // Add rows to the sparse matrix one at a time.
  std::vector<double> Values(2);
  Values[0] = -1.0; Values[1] = -1.0;
  std::vector<int> Indices(2);
  const double two = 2.0;
  int NumEntries;

  for (int i = 0; i < NumMyElements; ++i)
      if (MyGlobalElements[i] == 0)
        { // The first row of the matrix.
          Indices[0] = 1;
          NumEntries = 1;
      else if (MyGlobalElements[i] == NumGlobalElements - 1)
        { // The last row of the matrix.
          Indices[0] = NumGlobalElements-2;
          NumEntries = 1;
        { // Any row of the matrix other than the first or last.
          Indices[0] = MyGlobalElements[i]-1;
          Indices[1] = MyGlobalElements[i]+1;
          NumEntries = 2;
      ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      assert (ierr==0);
      // Insert the diagonal entry.
      ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]);

  // Finish up.  We can call FillComplete() with no arguments, because
  // the matrix is square.
  ierr = A.FillComplete ();
  assert (ierr==0);

  // Parameters for the power method.
  const int niters = NumGlobalElements*10;
  const double tolerance = 1.0e-2;

  // Run the power method.  Keep track of the flop count and the total
  // elapsed time.
  Epetra_Flops counter;
  Epetra_Time timer(Comm);
  double lambda = 0.0;
  ierr += powerMethod (lambda, A, niters, tolerance, verbose);
  double elapsedTime = timer.ElapsedTime ();
  double totalFlops =counter.Flops ();
  // Mflop/s: Million floating-point arithmetic operations per second.
  double Mflop_per_s = totalFlops / elapsedTime / 1000000.0;

  if (verbose) 
    cout << endl << endl << "Total Mflop/s for first solve = " 
         << Mflop_per_s << endl<< endl;

  // Increase the first (0,0) diagonal entry of the matrix.
  if (verbose) 
    cout << endl << "Increasing magnitude of first diagonal term, solving again"
         << endl << endl << endl;

  if (A.MyGlobalRow (0)) {
    int numvals = A.NumGlobalEntries (0);
    std::vector<double> Rowvals (numvals);
    std::vector<int> Rowinds (numvals);
    A.ExtractGlobalRowCopy (0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A(0,0)
    for (int i = 0; i < numvals; ++i) 
      if (Rowinds[i] == 0) 
        Rowvals[i] *= 10.0;

    A.ReplaceGlobalValues (0, numvals, &Rowvals[0], &Rowinds[0]);

  // Run the power method again.  Keep track of the flop count and the
  // total elapsed time.
  lambda = 0.0;
  ierr += powerMethod (lambda, A, niters, tolerance, verbose);
  elapsedTime = timer.ElapsedTime();
  totalFlops = counter.Flops();
  Mflop_per_s = totalFlops / elapsedTime / 1000000.0;

  if (verbose) 
    cout << endl << endl << "Total Mflop/s for second solve = " 
         << Mflop_per_s << endl << endl;

  MPI_Finalize() ;

  return ierr;
コード例 #6
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
// *************************************************************
// main program - This benchmark code reads a Harwell-Boeing data
//                set and finds the minimal eigenvalue of the matrix
//                using inverse iteration.
// *************************************************************
int main(int argc, char *argv[]) {

  Epetra_MpiComm Comm (MPI_COMM_WORLD);
  Epetra_SerialComm Comm;

  cout << Comm << endl;

  int MyPID = Comm.MyPID();

  bool verbose = false;
  if (MyPID==0) verbose = true; // Print out detailed results (turn off for best performance)

  if(argc != 2) {
    if (verbose) cerr << "Usage: " << argv[0] << " HB_data_file" << endl;
    exit(1); // Error

  // Define pointers that will be set by HB read function

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA;
  Epetra_Vector * readx;
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;

  // Call function to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Not interested in x, b or xexact for an eigenvalue problem
  delete readx;
  delete readb;
  delete readxexact;

#ifdef EPETRA_MPI // If running in parallel, we need to distribute matrix across all PEs.

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);

  A.Export(*readA, exporter, Add);

  delete readA;
  delete readMap;

#else // If not running in parallel, we do not need to distribute the matrix
  Epetra_CrsMatrix & A = *readA;

  // Create flop counter to collect all FLOPS
  Epetra_Flops counter;

  double lambda = 0; // Minimal eigenvalue returned here
  // Call inverse iteration solver
  Epetra_Time timer(Comm);
  invIteration(A, lambda, verbose);
  double elapsedTime = timer.ElapsedTime();
  double totalFlops = counter.Flops();
  double MFLOPS = totalFlops/elapsedTime/1000000.0;

  cout << endl
       << "*************************************************" << endl
       << " Approximate smallest eigenvalue = " << lambda << endl
       << "    Total Time    = " << elapsedTime << endl
       << "    Total FLOPS   = " << totalFlops << endl
       << "    Total MFLOPS  = " << MFLOPS << endl
       << "*************************************************" << endl;

  // All done
  delete readA;
  delete readMap;

return (0);
コード例 #7
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL,
		      Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU,
		      bool StaticProfile, bool verbose, bool summary) {

  if (L->NoDiagonal()) {
    bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
    btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
  if (U->NoDiagonal()) {
    bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
    btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU

  Epetra_MultiVector z(*bL);
  Epetra_MultiVector r(*bL);
  Epetra_SerialDenseVector resvec(bL->NumVectors());

  Epetra_Flops flopcounter;
  Epetra_Time timer(L->Comm());
  std::string statdyn =        "dynamic";
  if (StaticProfile) statdyn = "static ";

  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans

    bool TransA = (j==1 || j==3);
    std::string contig = "without";
    if (j>1) contig =    "with   ";

    if (j==2) {


    //10 lower solves
    bool Upper = false;
    bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
    Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
    for( int i = 0; i < 10; ++i )
      L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt

    double elapsed_time = timer.ElapsedTime();
    double total_flops = L->Flops();

    // Compute residual
    r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z

    if (resvec.NormInf()>0.000001) {
      cout << "resvec = " << resvec << endl;
      cout << "z = " << z << endl;
      cout << "xexactL = " << *xexactL << endl;
      cout << "r = " << r << endl;

    if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
    double MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
		      << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
    if (summary) {
      if (L->Comm().NumProc()==1) {
	if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
	else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
      cout << MFLOPs << endl;

    //10 upper solves
    Upper = true;
    UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
    b = TransA ? btU : bU;  // solve with the appropriate b vector
    for( int i = 0; i < 10; ++i )
      U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt

    elapsed_time = timer.ElapsedTime();
    total_flops = U->Flops();

    // Compute residual
    r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z

    if (resvec.NormInf()>0.001) {
      cout << "U = " << *U << endl;
      //cout << "resvec = " << resvec << endl;
      cout << "z = " << z << endl;
      cout << "xexactU = " << *xexactU << endl;
      //cout << "r = " << r << endl;
      cout << "b = " << *b << endl;

    if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
		      << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
    if (summary) {
      if (L->Comm().NumProc()==1) {
	if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
	else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
      cout << MFLOPs << endl;
コード例 #8
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
		    Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {

  Epetra_MultiVector z(*b);
  Epetra_MultiVector r(*b);
  Epetra_SerialDenseVector resvec(b->NumVectors());

  Epetra_Flops flopcounter;
  Epetra_Time timer(A->Comm());
  std::string statdyn =        "dynamic";
  if (StaticProfile) statdyn = "static ";

  for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans

    bool TransA = (j==1 || j==3);
    std::string contig = "without";
    if (j>1) contig =    "with   ";

    int kstart = 1;
    int kstart = 0;
    for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply

      std::string oldnew = "old";
      if (k>0) oldnew =    "new";

      if (j==2) A->OptimizeStorage();


      if (k==0) {
	//10 matvecs
	for( int i = 0; i < 10; ++i )
	  A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
      else {
	//10 matvecs
	for( int i = 0; i < 10; ++i )
	  A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact

      double elapsed_time = timer.ElapsedTime();
      double total_flops = A->Flops();

      // Compute residual
      if (TransA)
	r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
	r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z


      if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
      double MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
			<< ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
      if (summary) {
	if (A->Comm().NumProc()==1) {
	  if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
	  else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
	cout << MFLOPs << endl;
コード例 #9
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
int main(int argc, char *argv[])
  int ierr = 0;
  double elapsed_time;
  double total_flops;
  double MFLOPs;


  // Initialize MPI
  Epetra_MpiComm comm( MPI_COMM_WORLD );
  Epetra_SerialComm comm;

  bool verbose = false;
  bool summary = false;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;

  // Check if we should print verbose results to standard out
  if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;

  if(argc < 6) {
    cerr << "Usage: " << argv[0]
         << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
         << "where:" << endl
         << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
         << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
         << "NumProcX          - Number of processors to use in X direction" << endl
         << "NumProcY          - Number of processors to use in Y direction" << endl
         << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
         << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
         << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
	 << " Serial example:" << endl
         << argv[0] << " 16 12 1 1 25 -v" << endl
	 << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
	 << " MPI example:" << endl
         << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
	 << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
	 << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
         << endl;

    //char tmp;
    //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
    //if (comm.MyPID()==0) cin >> tmp;

  comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose && comm.MyPID()==0)
    cout << Epetra_Version() << endl << endl;
  if (summary && comm.MyPID()==0) {
    if (comm.NumProc()==1)
      cout << Epetra_Version() << endl << endl;
      cout << endl << endl; // Print two blank line to keep output columns lined up

  if (verbose) cout << comm <<endl;

  // Redefine verbose to only print on PE 0

  if (verbose && comm.MyPID()!=0) verbose = false;
  if (summary && comm.MyPID()!=0) summary = false;

  int numNodesX = atoi(argv[1]);
  int numNodesY = atoi(argv[2]);
  int numProcsX = atoi(argv[3]);
  int numProcsY = atoi(argv[4]);
  int numPoints = atoi(argv[5]);

  if (verbose || (summary && comm.NumProc()==1)) {
    cout << " Number of local nodes in X direction  = " << numNodesX << endl
	 << " Number of local nodes in Y direction  = " << numNodesY << endl
	 << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
	 << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
	 << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
	 << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
	 << " Number of Processors in X direction   = " << numProcsX << endl
	 << " Number of Processors in Y direction   = " << numProcsY << endl
	 << " Number of Points in stencil           = " << numPoints << endl << endl;
  // Print blank line to keep output columns lined up
  if (summary && comm.NumProc()>1)
    cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;

  if (numProcsX*numProcsY!=comm.NumProc()) {
    cerr << "Number of processors = " << comm.NumProc() << endl
	 << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;

  if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
    cerr << "Number of points specified = " << numPoints << endl
	 << " is not 5, 9, 25" << endl << endl;

  if (numNodesX*numNodesY<=0) {
    cerr << "Product of number of nodes is <= zero" << endl << endl;

  Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
  Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
  if (numPoints==5) {

     // Generate a 5-point 2D Finite Difference matrix
    Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0;
    Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;

     // Generate a 2-point 2D Lower triangular Finite Difference matrix
    XLoff[0] = -1; XLoff[1] =  0;
    YLoff[0] =  0; YLoff[1] = -1;

     // Generate a 3-point 2D upper triangular Finite Difference matrix
    XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0;
    YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
  else if (numPoints==9) {
    // Generate a 9-point 2D Finite Difference matrix
    Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1;
    Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1;
    Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1;
    Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0;
    Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1;
    Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1;

    // Generate a 5-point lower triangular 2D Finite Difference matrix
    XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1;
    YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1;
    XLoff[3] = -1;  XLoff[4] =  0;
    YLoff[3] =  0;  YLoff[4] =  0;

    // Generate a 4-point upper triangular 2D Finite Difference matrix
    XUoff[0] =  1;
    YUoff[0] =  0;
    XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1;
    YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1;

  else {
    // Generate a 25-point 2D Finite Difference matrix
    int xi = 0, yi = 0;
    int xo = -2, yo = -2;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
    xo = -2, yo++;
    Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
    Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;

    // Generate a 13-point lower triangular 2D Finite Difference matrix
    xi = 0, yi = 0;
    xo = -2, yo = -2;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
    xo = -2, yo++;
    XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
    YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;

    // Generate a 13-point upper triangular 2D Finite Difference matrix
    xi = 0, yi = 0;
    xo = 0, yo = 0;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
    xo = -2, yo++;
    XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
    YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;


  Epetra_Map * map;
  Epetra_Map * mapL;
  Epetra_Map * mapU;
  Epetra_CrsMatrix * A;
  Epetra_CrsMatrix * L;
  Epetra_CrsMatrix * U;
  Epetra_MultiVector * b;
  Epetra_MultiVector * bt;
  Epetra_MultiVector * xexact;
  Epetra_MultiVector * bL;
  Epetra_MultiVector * btL;
  Epetra_MultiVector * xexactL;
  Epetra_MultiVector * bU;
  Epetra_MultiVector * btU;
  Epetra_MultiVector * xexactU;
  Epetra_SerialDenseVector resvec(0);

  Epetra_Flops flopcounter;
  Epetra_Time timer(comm);

  int jstop = 1;
  int jstop = 1;
  int jstop = 2;
  for (int j=0; j<jstop; j++) {
    for (int k=1; k<17; k++) {
      if (k<3 || (k%4==0 && k<9)) {
      if (k<6 || k%4==0) {
      if (k<7 || k%2==0) {
      int nrhs=k;
      if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";

      bool StaticProfile = (j!=0);
      if (verbose) {
        if (StaticProfile) cout << " static profile\n";
        else cout << " dynamic profile\n";
      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
			 Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
			 map, A, b, bt, xexact, StaticProfile, false);


      Epetra_JadMatrix JA(*A);
      elapsed_time = timer.ElapsedTime();
      if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;

      //cout << "A = " << *A << endl;
      //cout << "JA = " << JA << endl;

      runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);

      runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);

      delete A;
      delete b;
      delete bt;
      delete xexact;

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
			 XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
			 mapL, L, bL, btL, xexactL, StaticProfile, true);

      GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
			 XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
			 mapU, U, bU, btU, xexactU, StaticProfile, true);

      runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);

      delete L;
      delete bL;
      delete btL;
      delete xexactL;
      delete mapL;

      delete U;
      delete bU;
      delete btU;
      delete xexactU;
      delete mapU;

      Epetra_MultiVector q(*map, nrhs);
      Epetra_MultiVector z(q);
      Epetra_MultiVector r(q);

      delete map;



      //10 norms
      for( int i = 0; i < 10; ++i )
	q.Norm2( resvec.Values() );

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Norm2" << '\t';
	cout << MFLOPs << endl;


      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Dot(z, resvec.Values());

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "DotProd" << '\t';
	cout << MFLOPs << endl;


      //10 dot's
      for( int i = 0; i < 10; ++i )
	q.Update(1.0, z, 1.0, r, 0.0);

      elapsed_time = timer.ElapsedTime();
      total_flops = q.Flops();
      MFLOPs = total_flops/elapsed_time/1000000.0;
      if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;

      if (summary) {
	if (comm.NumProc()==1) cout << "Update" << '\t';
	cout << MFLOPs << endl;
  MPI_Finalize() ;

return ierr ;

// Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
// nx      (In) - number of grid points in x direction
// ny      (In) - number of grid points in y direction
//   The total number of equations will be nx*ny ordered such that the x direction changes
//   most rapidly:
//      First equation is at point (0,0)
//      Second at                  (1,0)
//       ...
//      nx equation at             (nx-1,0)
//      nx+1st equation at         (0,1)

// numPoints (In) - number of points in finite difference stencil
// xoff    (In) - stencil offsets in x direction (of length numPoints)
// yoff    (In) - stencil offsets in y direction (of length numPoints)
//   A standard 5-point finite difference stencil would be described as:
//     numPoints = 5
//     xoff = [-1, 1, 0,  0, 0]
//     yoff = [ 0, 0, 0, -1, 1]

// nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed

// comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
// map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
// A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
//                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
//                diagonal will be slightly diag dominant.
// b      (Out) - Generated RHS.  Values satisfy b = A*xexact
// bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
// xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact

// Note: Caller of this function is responsible for deleting all output objects.

void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_Vector *& b,
			Epetra_Vector *& bt,
			Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_MultiVector * b1, * bt1, * xexact1;
  GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
		     xoff, yoff, 1, comm, verbose, summary,
		     map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);

  b = dynamic_cast<Epetra_Vector *>(b1);
  bt = dynamic_cast<Epetra_Vector *>(bt1);
  xexact = dynamic_cast<Epetra_Vector *>(xexact1);


void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
			int * xoff, int * yoff, int nrhs,
			const Epetra_Comm  &comm, bool verbose, bool summary,
			Epetra_Map *& map,
			Epetra_CrsMatrix *& A,
			Epetra_MultiVector *& b,
			Epetra_MultiVector *& bt,
			Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {

  Epetra_Time timer(comm);
  // Determine my global IDs
  long long * myGlobalElements;
  GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);

  int numMyEquations = numNodesX*numNodesY;

  map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
  delete [] myGlobalElements;

  long long numGlobalEquations = map->NumGlobalElements64();

  int profile = 0; if (StaticProfile) profile = numPoints;


  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
    A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix


  if (MakeLocalOnly)
    A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
    A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix


  long long * indices = new long long[numPoints];
  double * values = new double[numPoints];

  double dnumPoints = (double) numPoints;
  int nx = numNodesX*numProcsX;

  for (int i=0; i<numMyEquations; i++) {

    long long rowID = map->GID64(i);
    int numIndices = 0;

    for (int j=0; j<numPoints; j++) {
      long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
      if (colID>-1 && colID<numGlobalEquations) {
	indices[numIndices] = colID;
	double value = - ((double) rand())/ ((double) RAND_MAX);
	if (colID==rowID)
	  values[numIndices++] = dnumPoints - value; // Make diagonal dominant
	  values[numIndices++] = value;
    //cout << "Building row " << rowID << endl;
    A->InsertGlobalValues(rowID, numIndices, values, indices);

  delete [] indices;
  delete [] values;
  double insertTime = timer.ElapsedTime();
  double fillCompleteTime = timer.ElapsedTime();

  if (verbose)
    cout << "Time to insert matrix values = " << insertTime << endl
	 << "Time to complete fill        = " << fillCompleteTime << endl;
  if (summary) {
    if (comm.NumProc()==1) cout << "InsertTime" << '\t';
    cout << insertTime << endl;
    if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
    cout << fillCompleteTime << endl;

  if (nrhs<=1) {
    b = new Epetra_Vector(*map);
    bt = new Epetra_Vector(*map);
    xexact = new Epetra_Vector(*map);
  else {
    b = new Epetra_MultiVector(*map, nrhs);
    bt = new Epetra_MultiVector(*map, nrhs);
    xexact = new Epetra_MultiVector(*map, nrhs);

  xexact->Random(); // Fill xexact with random values

  A->Multiply(false, *xexact, *b);
  A->Multiply(true, *xexact, *bt);

コード例 #10
ファイル: cxx_main.cpp プロジェクト: 00liujj/trilinos
int main(int argc, char *argv[])
  int ierr = 0, i, j, k;

  Epetra_MpiComm Comm( MPI_COMM_WORLD );
  Epetra_SerialComm Comm;

  bool verbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  if(verbose && Comm.MyPID()==0)
    std::cout << Epetra_Version() << std::endl << std::endl;

  int rank = Comm.MyPID();
  //  char tmp;
  //  if (rank==0) std::cout << "Press any key to continue..."<< std::endl;
  //  if (rank==0) cin >> tmp;
  //  Comm.Barrier();

  Comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  if (verbose) std::cout << Comm << std::endl;

  //  bool verbose1 = verbose;

  // Redefine verbose to only print on PE 0
  if (verbose && rank!=0) verbose = false;

  int N = 20;
  int NRHS = 4;
  double * A = new double[N*N];
  double * A1 = new double[N*N];
  double * X = new double[(N+1)*NRHS];
  double * X1 = new double[(N+1)*NRHS];
  int LDX = N+1;
  int LDX1 = N+1;
  double * B = new double[N*NRHS];
  double * B1 = new double[N*NRHS];
  int LDB = N;
  int LDB1 = N;

  int LDA = N;
  int LDA1 = LDA;
  double OneNorm1;
  bool Upper = false;

  Epetra_SerialSpdDenseSolver solver;
  Epetra_SerialSymDenseMatrix * Matrix;
  for (int kk=0; kk<2; kk++) {
    for (i=1; i<=N; i++) {
      GenerateHilbert(A, LDA, i);
      OneNorm1 = 0.0;
      for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n

      if (kk==0) {
	Matrix = new Epetra_SerialSymDenseMatrix(View, A, LDA, i);
	LDA1 = LDA;
      else {
	Matrix = new Epetra_SerialSymDenseMatrix(Copy, A, LDA, i);
	LDA1 = i;
      GenerateHilbert(A1, LDA1, i);
      if (kk==1) {
	Upper = true;

      for (k=0; k<NRHS; k++)
	for (j=0; j<i; j++) {
	  B[j+k*LDB] = 1.0/((double) (k+3)*(j+3));
	  B1[j+k*LDB1] = B[j+k*LDB1];
      Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS);
      Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS);
      solver.SetVectors(Epetra_X, Epetra_B);

      ierr = check(solver, A1, LDA1,  i, NRHS, OneNorm1, B1, LDB1,  X1, LDX1, Upper, verbose);
      assert (ierr>-1);
      delete Matrix;
      if (ierr!=0) {
	if (verbose) std::cout << "Factorization failed due to bad conditioning.  This is normal if SCOND is small."
			  << std::endl;

  delete [] A;
  delete [] A1;
  delete [] X;
  delete [] X1;
  delete [] B;
  delete [] B1;

  // Now test norms and scaling functions

  Epetra_SerialSymDenseMatrix D;
  double ScalarA = 2.0;

  int DM = 10;
  int DN = 10;
  for (j=0; j<DN; j++)
    for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ;

  //std::cout << D << std::endl;

  double NormInfD_ref = (double)(DM*(DN*(DN+1))/2);
  double NormOneD_ref = NormInfD_ref;

  double NormInfD = D.NormInf();
  double NormOneD = D.NormOne();

  if (verbose) {
    std::cout << " *** Before scaling *** " << std::endl
	 << " Computed one-norm of test matrix = " << NormOneD << std::endl
	 << " Expected one-norm                = " << NormOneD_ref << std::endl
	 << " Computed inf-norm of test matrix = " << NormInfD << std::endl
	 << " Expected inf-norm                = " << NormInfD_ref << std::endl;
  D.Scale(ScalarA); // Scale entire D matrix by this value

  //std::cout << D << std::endl;

  NormInfD = D.NormInf();
  NormOneD = D.NormOne();
  if (verbose) {
    std::cout << " *** After scaling *** " << std::endl
	 << " Computed one-norm of test matrix = " << NormOneD << std::endl
	 << " Expected one-norm                = " << NormOneD_ref*ScalarA << std::endl
	 << " Computed inf-norm of test matrix = " << NormInfD << std::endl
	 << " Expected inf-norm                = " << NormInfD_ref*ScalarA << std::endl;

  // Now test for larger system, both correctness and performance.

  N = 2000;
  NRHS = 5;
  LDA = N;
  LDB = N;
  LDX = N;

  if (verbose) std::cout << "\n\nComputing factor of an " << N << " x " << N << " SPD matrix...Please wait.\n\n" << std::endl;

  // Define A and X

  A = new double[LDA*N];
  X = new double[LDB*NRHS];

  for (j=0; j<N; j++) {
    for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k));
    for (i=0; i<N; i++) {
      if (i==j) A[i+j*LDA] = 100.0 + i;
      else A[i+j*LDA] = -1.0/((double) (i+10)*(j+10));

  // Define Epetra_SerialDenseMatrix object

  Epetra_SerialSymDenseMatrix BigMatrix(Copy, A, LDA, N);
  Epetra_SerialSymDenseMatrix OrigBigMatrix(View, A, LDA, N);

  Epetra_SerialSpdDenseSolver BigSolver;

  // Time factorization

  Epetra_Flops counter;
  Epetra_Time Timer(Comm);
  double tstart = Timer.ElapsedTime();
  ierr = BigSolver.Factor();
  if (ierr!=0 && verbose) std::cout << "Error in factorization = "<<ierr<< std::endl;
  double time = Timer.ElapsedTime() - tstart;

  double FLOPS = counter.Flops();
  double MFLOPS = FLOPS/time/1000000.0;
  if (verbose) std::cout << "MFLOPS for Factorization = " << MFLOPS << std::endl;

  // Define Left hand side and right hand side
  Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS);
  Epetra_SerialDenseMatrix RHS;
  RHS.Shape(N,NRHS); // Allocate RHS

  // Compute RHS from A and X

  Epetra_Flops RHS_counter;
  tstart = Timer.ElapsedTime();
  RHS.Multiply('L', 1.0, OrigBigMatrix, LHS, 0.0); // Symmetric Matrix-multiply
  time = Timer.ElapsedTime() - tstart;

  Epetra_SerialDenseMatrix OrigRHS = RHS;

  FLOPS = RHS_counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) std::cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << std::endl;

  // Set LHS and RHS and solve
  BigSolver.SetVectors(LHS, RHS);

  tstart = Timer.ElapsedTime();
  ierr = BigSolver.Solve();
  if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl;
  else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl;
  time = Timer.ElapsedTime() - tstart;

  FLOPS = BigSolver.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) std::cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << std::endl;

  double * resid = new double[NRHS];
  bool OK = Residual(N, NRHS, A, LDA, BigSolver.X(), BigSolver.LDX(),
		     OrigRHS.A(), OrigRHS.LDA(), resid);

  if (verbose) {
    if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl;
    for (i=0; i<NRHS; i++)
      std::cout << "Residual[" << i <<"] = "<< resid[i] << std::endl;
    std::cout  << std::endl;

  // Solve again using the Epetra_SerialDenseVector class for LHS and RHS

  Epetra_SerialDenseVector X2;
  Epetra_SerialDenseVector B2;
  int length = BigMatrix.N();
  {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2

  tstart = Timer.ElapsedTime();
  B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2
  time = Timer.ElapsedTime() - tstart;

  Epetra_SerialDenseVector OrigB2 = B2;

  FLOPS = RHS_counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) std::cout << "MFLOPS to build single RHS = " << MFLOPS << std::endl;

  // Set LHS and RHS and solve
  BigSolver.SetVectors(X2, B2);

  tstart = Timer.ElapsedTime();
  ierr = BigSolver.Solve();
  time = Timer.ElapsedTime() - tstart;
  if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl;
  else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl;

  FLOPS = counter.Flops();
  MFLOPS = FLOPS/time/1000000.0;
  if (verbose) std::cout << "MFLOPS to solve single RHS = " << MFLOPS << std::endl;

  OK = Residual(N, 1, A, LDA, BigSolver.X(), BigSolver.LDX(), OrigB2.A(),
		OrigB2.LDA(), resid);

  if (verbose) {
    if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl;
      std::cout << "Residual = "<< resid[0] << std::endl;
  delete [] resid;
  delete [] A;
  delete [] X;

  // Now test default constructor and index operators

  N = 5;
  Epetra_SerialSymDenseMatrix C; // Implicit call to default constructor, should not need to call destructor
  C.Shape(5); // Make it 5 by 5
  double * C1 = new double[N*N];
  GenerateHilbert(C1, N, N); // Generate Hilber matrix

  C1[1+2*N] = 1000.0;  // Make matrix nonsymmetric

  // Fill values of C with Hilbert values
  for (i=0; i<N; i++)
    for (j=0; j<N; j++)
      C(i,j) = C1[i+j*N];

  // Test if values are correctly written and read
  for (i=0; i<N; i++)
    for (j=0; j<N; j++) {
      assert(C(i,j) == C1[i+j*N]);
      assert(C(i,j) == C[j][i]);

  if (verbose)
    std::cout << "Default constructor and index operator check OK.  Values of Hilbert matrix = "
	 << std::endl << C << std::endl
	 << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << std::endl;

  delete [] C1;

  MPI_Finalize() ;

/* end main
return ierr ;
コード例 #11
ファイル: cxx_main.cpp プロジェクト: cakeisalie/oomphlib_003
int main(int argc, char *argv[])
  int ierr = 0, forierr = 0;
  bool debug = false;


  // Initialize MPI

  int rank; // My process ID

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  Epetra_MpiComm Comm( MPI_COMM_WORLD );


  int rank = 0;
  Epetra_SerialComm Comm;


  bool verbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  int verbose_int = verbose ? 1 : 0;
  Comm.Broadcast(&verbose_int, 1, 0);
  verbose = verbose_int==1 ? true : false;

  //  char tmp;
  //  if (rank==0) cout << "Press any key to continue..."<< std::endl;
  //  if (rank==0) cin >> tmp;
  //  Comm.Barrier();

  Comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  int MyPID = Comm.MyPID();
  int NumProc = Comm.NumProc();

  if(verbose && MyPID==0)
    cout << Epetra_Version() << std::endl << std::endl;

  if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc
		    << " is alive."<<endl;

  bool verbose1 = verbose;

  // Redefine verbose to only print on PE 0
  if(verbose && rank!=0) 
		verbose = false;

  int NumMyEquations = 10000;
  int NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3);
  if(MyPID < 3) 

  // Construct a Map that puts approximately the same Number of equations on each processor

  Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0, Comm);
  // Get update list and number of local equations from newly created Map
  int* MyGlobalElements = new int[Map.NumMyElements()];

  // Create an integer vector NumNz that is used to build the Petra Matrix.
  // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor

  int* NumNz = new int[NumMyEquations];

  // We are building a tridiagonal matrix where each row has (-1 2 -1)
  // So we need 2 off-diagonal terms (except for the first and last equation)

  for (int i = 0; i < NumMyEquations; i++)
    if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1))
      NumNz[i] = 1;
      NumNz[i] = 2;

  // Create a Epetra_Matrix

  Epetra_CrsMatrix A(Copy, Map, NumNz);
  // Add  rows one-at-a-time
  // Need some vectors to help
  // Off diagonal Values will always be -1

  double* Values = new double[2];
  Values[0] = -1.0; 
	Values[1] = -1.0;
  int* Indices = new int[2];
  double two = 2.0;
  int NumEntries;

  forierr = 0;
  for (int i = 0; i < NumMyEquations; i++) {
    if(MyGlobalElements[i] == 0) {
			Indices[0] = 1;
			NumEntries = 1;
    else if (MyGlobalElements[i] == NumGlobalEquations-1) {
			Indices[0] = NumGlobalEquations-2;
			NumEntries = 1;
    else {
			Indices[0] = MyGlobalElements[i]-1;
			Indices[1] = MyGlobalElements[i]+1;
			NumEntries = 2;
		forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0);
		forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i)>0); // Put in the diagonal entry

  int * indexOffsetTmp;
  int * indicesTmp;
  double * valuesTmp;
  // Finish up
  EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr);  // Should fail
  EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr);  // Should fail
  EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==0),ierr);  // Should succeed
  const Epetra_CrsGraph & GofA = A.Graph();
  EPETRA_TEST_ERR((indicesTmp!=GofA[0] || valuesTmp!=A[0]),ierr); // Extra check to see if operator[] is consistent
  int NumMyNonzeros = 3 * NumMyEquations;
  if(A.LRID(0) >= 0) 
		NumMyNonzeros--; // If I own first global row, then there is one less nonzero
  if(A.LRID(NumGlobalEquations-1) >= 0) 
		NumMyNonzeros--; // If I own last global row, then there is one less nonzero
  EPETRA_TEST_ERR(check(A, NumMyEquations, NumGlobalEquations, NumMyNonzeros, 3*NumGlobalEquations-2, 
	       MyGlobalElements, verbose),ierr);
  forierr = 0;
  for (int i = 0; i < NumMyEquations; i++) 
		forierr += !(A.NumGlobalEntries(MyGlobalElements[i])==NumNz[i]+1);
  forierr = 0;
  for (int i = 0; i < NumMyEquations; i++) 
		forierr += !(A.NumMyEntries(i)==NumNz[i]+1);

  if (verbose) cout << "\n\nNumEntries function check OK" << std::endl<< std::endl;


  // Create vectors for Power method

  Epetra_Vector q(Map);
  Epetra_Vector z(Map);
  Epetra_Vector resid(Map);

  // variable needed for iteration
  double lambda = 0.0;
  // int niters = 10000;
  int niters = 200;
  double tolerance = 1.0e-1;

  // Iterate

  Epetra_Flops flopcounter;

  Epetra_Time timer(Comm);
  EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  double elapsed_time = timer.ElapsedTime();
  double total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops();
  double MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) cout << "\n\nTotal MFLOPs for first solve = " << MFLOPs << std::endl<< std::endl;

  // Solve transpose problem

  if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n"
		    << std::endl;
  // Iterate
  lambda = 0.0;
  EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime();
  total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) cout << "\n\nTotal MFLOPs for transpose solve = " << MFLOPs << std::endl<< endl;


  // Increase diagonal dominance

  if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n"
		    << endl;

  if (A.MyGlobalRow(0)) {
    int numvals = A.NumGlobalEntries(0);
    double * Rowvals = new double [numvals];
    int    * Rowinds = new int    [numvals];
    A.ExtractGlobalRowCopy(0, numvals, numvals, Rowvals, Rowinds); // Get A[0,0]

    for (int i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0;
    A.ReplaceGlobalValues(0, numvals, Rowvals, Rowinds);
    delete [] Rowvals;
    delete [] Rowinds;
  // Iterate (again)
  lambda = 0.0;
  EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime();
  total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) cout << "\n\nTotal MFLOPs for second solve = " << MFLOPs << endl<< endl;


  // Solve transpose problem

  if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n"
		    << endl;

  // Iterate (again)
  lambda = 0.0;
  EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr);
  elapsed_time = timer.ElapsedTime();
  total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;

  if (verbose) cout << "\n\nTotal MFLOPs for tranpose of second solve = " << MFLOPs << endl<< endl;

  if (verbose) cout << "\n\n*****Testing constant entry constructor" << endl<< endl;

  Epetra_CrsMatrix AA(Copy, Map, 5);
  if (debug) Comm.Barrier();

  double dble_one = 1.0;
  for (int i=0; i< NumMyEquations; i++) AA.InsertGlobalValues(MyGlobalElements[i], 1, &dble_one, MyGlobalElements+i);

  // Note:  All processors will call the following Insert routines, but only the processor
  //        that owns it will actually do anything

  int One = 1;
  if (AA.MyGlobalRow(0)) {
    EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 0, &dble_one, &One)==0),ierr);
  else EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 1, &dble_one, &One)==-1),ierr);
  if (debug) Comm.Barrier();
  EPETRA_TEST_ERR(check(AA, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, 
	       MyGlobalElements, verbose),ierr);

  if (debug) Comm.Barrier();

  forierr = 0;
  for (int i=0; i<NumMyEquations; i++) forierr += !(AA.NumGlobalEntries(MyGlobalElements[i])==1);

  if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl;

  if (debug) Comm.Barrier();

  if (verbose) cout << "\n\n*****Testing copy constructor" << endl<< endl;

  Epetra_CrsMatrix B(AA);
  EPETRA_TEST_ERR(check(B, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, 
	       MyGlobalElements, verbose),ierr);

  forierr = 0;
  for (int i=0; i<NumMyEquations; i++) forierr += !(B.NumGlobalEntries(MyGlobalElements[i])==1);

  if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl;

  if (debug) Comm.Barrier();

  if (verbose) cout << "\n\n*****Testing local view constructor" << endl<< endl;

  Epetra_CrsMatrix BV(View, AA.RowMap(), AA.ColMap(), 0);

  forierr = 0;
  int* Inds;
  double* Vals;
  for (int i = 0; i < NumMyEquations; i++) {
    forierr += !(AA.ExtractMyRowView(i, NumEntries, Vals, Inds)==0);
    forierr += !(BV.InsertMyValues(i, NumEntries, Vals, Inds)==0);
  EPETRA_TEST_ERR(check(BV, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, 
												MyGlobalElements, verbose),ierr);

  forierr = 0;
  for (int i=0; i<NumMyEquations; i++) forierr += !(BV.NumGlobalEntries(MyGlobalElements[i])==1);

  if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl;

  if (debug) Comm.Barrier();
  if (verbose) cout << "\n\n*****Testing post construction modifications" << endl<< endl;

  EPETRA_TEST_ERR(!(B.InsertGlobalValues(0, 1, &dble_one, &One)==-2),ierr);

  // Release all objects
  delete [] NumNz;
  delete [] Values;
  delete [] Indices;
  delete [] MyGlobalElements;

  if (verbose1) {
    // Test ostream << operator (if verbose1)
    // Construct a Map that puts 2 equations on each PE
    int NumMyElements1 = 2;
    int NumMyEquations1 = NumMyElements1;
    int NumGlobalEquations1 = NumMyEquations1*NumProc;

    Epetra_Map Map1(-1, NumMyElements1, 0, Comm);
    // Get update list and number of local equations from newly created Map
    int * MyGlobalElements1 = new int[Map1.NumMyElements()];
    // Create an integer vector NumNz that is used to build the Petra Matrix.
    // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor
    int * NumNz1 = new int[NumMyEquations1];
    // We are building a tridiagonal matrix where each row has (-1 2 -1)
    // So we need 2 off-diagonal terms (except for the first and last equation)
    for (int i=0; i<NumMyEquations1; i++)
      if (MyGlobalElements1[i]==0 || MyGlobalElements1[i] == NumGlobalEquations1-1)
	NumNz1[i] = 1;
	NumNz1[i] = 2;
    // Create a Epetra_Matrix
    Epetra_CrsMatrix A1(Copy, Map1, NumNz1);
    // Add  rows one-at-a-time
    // Need some vectors to help
    // Off diagonal Values will always be -1
    double *Values1 = new double[2];
    Values1[0] = -1.0; Values1[1] = -1.0;
    int *Indices1 = new int[2];
    double two1 = 2.0;
    int NumEntries1;

    forierr = 0;
    for (int i=0; i<NumMyEquations1; i++)
	if (MyGlobalElements1[i]==0)
	    Indices1[0] = 1;
	    NumEntries1 = 1;
	else if (MyGlobalElements1[i] == NumGlobalEquations1-1)
	    Indices1[0] = NumGlobalEquations1-2;
	    NumEntries1 = 1;
	    Indices1[0] = MyGlobalElements1[i]-1;
	    Indices1[1] = MyGlobalElements1[i]+1;
	    NumEntries1 = 2;
	forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], NumEntries1, Values1, Indices1)==0);
	forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], 1, &two1, MyGlobalElements1+i)>0); // Put in the diagonal entry
    delete [] Indices1;
    delete [] Values1;
    // Finish up
    // Test diagonal extraction function

    Epetra_Vector checkDiag(Map1);

    forierr = 0;
    for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==two1);

    // Test diagonal replacement method

    forierr = 0;
    for (int i=0; i<NumMyEquations1; i++) checkDiag[i]=two1*two1;


    Epetra_Vector checkDiag1(Map1);

    forierr = 0;
    for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==checkDiag1[i]);

    if (verbose) cout << "\n\nDiagonal extraction and replacement OK.\n\n" << endl;

    double orignorm = A1.NormOne();
    if (verbose) cout << "\n\nMatrix scale OK.\n\n" << endl;

    if (verbose) cout << "\n\nPrint out tridiagonal matrix, each part on each processor.\n\n" << endl;
    cout << A1 << endl;

  // Release all objects
  delete [] NumNz1;
  delete [] MyGlobalElements1;


  if (verbose) cout << "\n\n*****Testing LeftScale and RightScale" << endl << endl;

  int NumMyElements2 = 7;
  int NumMyRows2 = 1;//This value should not be changed without editing the
		// code below.
  Epetra_Map RowMap(-1,NumMyRows2,0,Comm);
  Epetra_Map ColMap(NumMyElements2,NumMyElements2,0,Comm);
  // The DomainMap needs to be different from the ColMap for the test to 
  // be meaningful.
  Epetra_Map DomainMap(NumMyElements2,0,Comm);
  int NumMyRangeElements2 = 0;
  // We need to distribute the elements differently for the range map also.
  if (MyPID % 2 == 0)
    NumMyRangeElements2 = NumMyRows2*2; //put elements on even number procs 
  if (NumProc % 2 == 1 && MyPID == NumProc-1)
    NumMyRangeElements2 = NumMyRows2; //If number of procs is odd, put
			// the last NumMyElements2 elements on the last proc
  Epetra_Map RangeMap(-1,NumMyRangeElements2,0,Comm);
  Epetra_CrsMatrix A2(Copy,RowMap,ColMap,NumMyElements2);
  double * Values2 = new double[NumMyElements2];
  int * Indices2 = new int[NumMyElements2]; 

  for (int i=0; i<NumMyElements2; i++) {
    Values2[i] = i+MyPID;

  Epetra_CrsMatrix A2copy(A2);

  double * RowLeftScaleValues = new double[NumMyRows2];
  double * ColRightScaleValues = new double[NumMyElements2];
  int RowLoopLength = RowMap.MaxMyGID()-RowMap.MinMyGID()+1;
  for (int i=0; i<RowLoopLength; i++)
    RowLeftScaleValues[i] = (i + RowMap.MinMyGID() ) % 2 + 1;
  // For the column map, all procs own all elements
  for (int  i=0; i<NumMyElements2;i++)
    ColRightScaleValues[i] = i % 2 + 1;

  int RangeLoopLength = RangeMap.MaxMyGID()-RangeMap.MinMyGID()+1;
  double * RangeLeftScaleValues = new double[RangeLoopLength];
  int DomainLoopLength = DomainMap.MaxMyGID()-DomainMap.MinMyGID()+1;
   double * DomainRightScaleValues = new double[DomainLoopLength];
  for (int i=0; i<RangeLoopLength; i++)
    RangeLeftScaleValues[i] = 1.0/((i + RangeMap.MinMyGID() ) % 2 + 1);
  for (int  i=0; i<DomainLoopLength;i++)
    DomainRightScaleValues[i] = 1.0/((i + DomainMap.MinMyGID() ) % 2 + 1);
  Epetra_Vector xRow(View,RowMap,RowLeftScaleValues);
  Epetra_Vector xCol(View,ColMap,ColRightScaleValues);
  Epetra_Vector xRange(View,RangeMap,RangeLeftScaleValues);
  Epetra_Vector xDomain(View,DomainMap,DomainRightScaleValues);

  double A2infNorm = A2.NormInf();
  double A2oneNorm = A2.NormOne();

  if (verbose1) cout << A2;
  double A2infNorm1 = A2.NormInf();
  double A2oneNorm1 = A2.NormOne();
  bool ScalingBroke = false;
  if (A2infNorm1>2*A2infNorm||A2infNorm1<A2infNorm) {
    ScalingBroke = true;
  if (A2oneNorm1>2*A2oneNorm||A2oneNorm1<A2oneNorm) {

    ScalingBroke = true;
  if (verbose1) cout << A2;
  double A2infNorm2 = A2.NormInf();
  double A2oneNorm2 = A2.NormOne();
  if (A2infNorm2>=2*A2infNorm1||A2infNorm2<=A2infNorm1) {
    ScalingBroke = true;
  if (A2oneNorm2>2*A2oneNorm1||A2oneNorm2<=A2oneNorm1) {
    ScalingBroke = true;
  if (verbose1) cout << A2;
  double A2infNorm3 = A2.NormInf();
  double A2oneNorm3 = A2.NormOne();
  // The last two scaling ops cancel each other out
  if (A2infNorm3!=A2infNorm1) {
    ScalingBroke = true;
  if (A2oneNorm3!=A2oneNorm1) {
    ScalingBroke = true;
  if (verbose1) cout << A2;
  double A2infNorm4 = A2.NormInf();
  double A2oneNorm4 = A2.NormOne();
  // The 4 scaling ops all cancel out
  if (A2infNorm4!=A2infNorm) {
    ScalingBroke = true;
  if (A2oneNorm4!=A2oneNorm) {
    ScalingBroke = true;

  //  Now try changing the values underneath and make sure that 
  //  telling one process about the change causes NormInf() and 
  //  NormOne() to recompute the norm on all processes.
  double *values; 
  int num_my_rows = A2.NumMyRows() ; 
  int num_entries;

  for ( int  i=0 ; i< num_my_rows; i++ ) {
    EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr );
    for ( int j = 0 ; j <num_entries; j++ ) {
      values[j] *= 2.0; 

  if ( MyPID == 0 )
    A2.SumIntoGlobalValues( 0, 0, 0, 0 ) ; 

  double A2infNorm5 = A2.NormInf();
  double A2oneNorm5 = A2.NormOne();

  if (A2infNorm5!=2.0 * A2infNorm4) {
    ScalingBroke = true;
  if (A2oneNorm5!= 2.0 * A2oneNorm4) {
    ScalingBroke = true;

  //  Restore the values underneath
  for ( int  i=0 ; i< num_my_rows; i++ ) {
    EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr );
    for ( int j = 0 ; j <num_entries; j++ ) {
      values[j] /= 2.0; 

  if (verbose1) cout << A2;

  if (ScalingBroke) {
    if (verbose) cout << endl << "LeftScale and RightScale tests FAILED" << endl << endl;
  else {
    if (verbose) cout << endl << "LeftScale and RightScale tests PASSED" << endl << endl;


  if (verbose) cout << "\n\n*****Testing InvRowMaxs and InvColMaxs" << endl << endl;

  if (verbose1) cout << A2 << endl;
  if (verbose1) cout << xRow << endl << xRange << endl;

  if (verbose) cout << "\n\n*****Testing InvRowSums and InvColSums" << endl << endl;
  bool InvSumsBroke = false;
// Works!
  if (verbose1) cout << xRow;
  float A2infNormFloat = A2.NormInf();
  if (verbose1) cout << A2 << endl;
  if (fabs(1.0-A2infNormFloat) > 1.e-5) {
    InvSumsBroke = true;

  // Works
  int expectedcode = 1;
  if (Comm.NumProc()>1) expectedcode = 0;
  EPETRA_TEST_ERR(!(A2.InvColSums(xDomain)==expectedcode),ierr); // This matrix has a single row, the first column has a zero, so a warning is issued.
  if (verbose1) cout << xDomain << endl;
  float A2oneNormFloat2 = A2.NormOne();
  if (verbose1) cout << A2;
  if (fabs(1.0-A2oneNormFloat2)>1.e-5) {
    InvSumsBroke = true;

// Works!

  if (verbose1) cout << xRange;
  float A2infNormFloat2 = A2.NormInf(); // We use a float so that rounding error
	// will not prevent the sum from being 1.0.
  if (verbose1) cout << A2;
  if (fabs(1.0-A2infNormFloat2)>1.e-5) {
    cout << "InfNorm should be = 1, but InfNorm = " << A2infNormFloat2 << endl;
    InvSumsBroke = true;

  // Doesn't work - may not need this test because column ownership is not unique
  /*  EPETRA_TEST_ERR(A2.InvColSums(xCol),ierr);
cout << xCol;
  float A2oneNormFloat = A2.NormOne();
cout << A2;
  if (fabs(1.0-A2oneNormFloat)>1.e-5) {
    InvSumsBroke = true;
  delete [] ColRightScaleValues;
  delete [] DomainRightScaleValues;
  if (verbose) cout << "Begin partial sum testing." << endl;
  // Test with a matrix that has partial sums for a subset of the rows 
  // on multiple processors. (Except for the serial case, of course.)
  int NumMyRows3 = 2; // Changing this requires further changes below
  int * myGlobalElements = new int[NumMyRows3];
  for (int i=0; i<NumMyRows3; i++) myGlobalElements[i] = MyPID+i;
  Epetra_Map RowMap3(NumProc*2, NumMyRows3, myGlobalElements, 0, Comm);
  int NumMyElements3 = 5;
  Epetra_CrsMatrix A3(Copy, RowMap3, NumMyElements3);
  double * Values3 = new double[NumMyElements3];
  int * Indices3 = new int[NumMyElements3];
  for (int i=0; i < NumMyElements3; i++) {
    Values3[i] = (int) (MyPID + (i+1));
  for (int i=0; i<NumMyRows3; i++) {
  Epetra_Map RangeMap3(NumProc+1, 0, Comm);
  Epetra_Map DomainMap3(NumMyElements3, 0, Comm);
  EPETRA_TEST_ERR(A3.FillComplete(DomainMap3, RangeMap3,false),ierr);
  if (verbose1) cout << A3;
  Epetra_Vector xRange3(RangeMap3,false);
  Epetra_Vector xDomain3(DomainMap3,false);


  if (verbose1) cout << xRange3;
  float A3infNormFloat = A3.NormInf();
  if (verbose1) cout << A3;
  if (1.0!=A3infNormFloat) {
    cout << "InfNorm should be = 1, but InfNorm = " << A3infNormFloat <<endl;
    InvSumsBroke = true;
  // we want to take the transpose of our matrix and fill in different values.
  int NumMyColumns3 = NumMyRows3;
  Epetra_Map ColMap3cm(RowMap3); 
  Epetra_Map RowMap3cm(A3.ColMap());

  Epetra_CrsMatrix A3cm(Copy,RowMap3cm,ColMap3cm,NumProc+1);
  double *Values3cm = new double[NumMyColumns3];
  int * Indices3cm = new int[NumMyColumns3];
  for (int i=0; i<NumMyColumns3; i++) {
    Values3cm[i] = MyPID + i + 1;
    Indices3cm[i]= i + MyPID;
  for (int ii=0; ii<NumMyElements3; ii++) {
    A3cm.InsertGlobalValues(ii, NumMyColumns3, Values3cm, Indices3cm);

  // The DomainMap and the RangeMap from the last test will work fine for 
  // the RangeMap and DomainMap, respectively, but I will make copies to
  // avaoid confusion when passing what looks like a DomainMap where we
  // need a RangeMap and vice vera.
  Epetra_Map RangeMap3cm(DomainMap3);
  Epetra_Map DomainMap3cm(RangeMap3);
  if (verbose1) cout << A3cm << endl;

  // Again, we can copy objects from the last example.
  //Epetra_Vector xRange3cm(xDomain3); //Don't use at this time
  Epetra_Vector xDomain3cm(DomainMap3cm,false);


  if (verbose1) cout << xDomain3cm << endl;

  float A3cmOneNormFloat  = A3cm.NormOne();
  if (verbose1) cout << A3cm << endl;
  if (1.0!=A3cmOneNormFloat) {
    cout << "OneNorm should be = 1, but OneNorm = " << A3cmOneNormFloat << endl;
    InvSumsBroke = true;
  if (verbose) cout << "End partial sum testing" << endl;
  if (verbose) cout << "Begin replicated testing" << endl;

  // We will now view the shared row as a repliated row, rather than one 
  // that has partial sums of its entries on mulitple processors.
  // We will reuse much of the data used for the partial sum tesitng.
  Epetra_Vector xRow3(RowMap3,false); 
  Epetra_CrsMatrix A4(Copy, RowMap3, NumMyElements3);
  for (int ii=0; ii < NumMyElements3; ii++) {
    Values3[ii] = (int)((ii*.6)+1.0);
  for (int ii=0; ii<NumMyRows3; ii++) {
  EPETRA_TEST_ERR(A4.FillComplete(DomainMap3, RangeMap3,false),ierr);
  if (verbose1) cout << A4 << endl;
  // The next two lines should be expanded into a verifiable test.
  if (verbose1) cout << xRow3 << xRange3;

  if (verbose1) cout << xRow3;
  float A4infNormFloat = A4.NormInf();
  if (verbose1) cout << A4;
  if (2.0!=A4infNormFloat && NumProc != 1) {
    if (verbose1) cout << "InfNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but InfNorm = " << A4infNormFloat <<endl;
    InvSumsBroke = true;
  else if (1.0!=A4infNormFloat && NumProc == 1) {
    if (verbose1) cout << "InfNorm should be = 1, but InfNorm = " << A4infNormFloat <<endl;
    InvSumsBroke = true;
  Epetra_Vector xCol3cm(ColMap3cm,false);
  Epetra_CrsMatrix A4cm(Copy, RowMap3cm, ColMap3cm, NumProc+1);
  //Use values from A3cm
  for (int ii=0; ii<NumMyElements3; ii++) {
  EPETRA_TEST_ERR(A4cm.FillComplete(DomainMap3cm, RangeMap3cm,false),ierr);
  if (verbose1) cout << A4cm << endl;
  // The next two lines should be expanded into a verifiable test.
  if (verbose1) cout << xCol3cm << xDomain3cm;

  if (verbose1) cout << xCol3cm << endl;
  float A4cmOneNormFloat = A4cm.NormOne();
  if (verbose1) cout << A4cm << endl;
  if (2.0!=A4cmOneNormFloat && NumProc != 1) {
    if (verbose1) cout << "OneNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but OneNorm = " << A4cmOneNormFloat << endl;
    InvSumsBroke = true;
  else if (1.0!=A4cmOneNormFloat && NumProc == 1) {
    if (verbose1) cout << "OneNorm should be = 1, but OneNorm = " << A4infNormFloat <<endl;
    InvSumsBroke = true;

  if (verbose) cout << "End replicated testing" << endl;

  if (InvSumsBroke) {
    if (verbose) cout << endl << "InvRowSums tests FAILED" << endl << endl;
    if (verbose) cout << endl << "InvRowSums tests PASSED" << endl << endl;

  int nnz_A3cm = A3cm.Graph().NumGlobalNonzeros();
  double check_frobnorm = sqrt(nnz_A3cm*4.0);
  double frobnorm = A3cm.NormFrobenius();

  bool frobnorm_test_failed = false;
  if (fabs(check_frobnorm-frobnorm) > 5.e-5) {
    frobnorm_test_failed = true;

  if (frobnorm_test_failed) {
    if (verbose) std::cout << "Frobenius-norm test FAILED."<<std::endl;
    EPETRA_TEST_ERR(-65, ierr);

  delete [] Values2;
  delete [] Indices2;
  delete [] myGlobalElements;
  delete [] Values3;
  delete [] Indices3;
  delete [] Values3cm;
  delete [] Indices3cm;
  delete [] RangeLeftScaleValues;
  delete [] RowLeftScaleValues;
  MPI_Finalize() ;

/* end main
return ierr ;
コード例 #12
ファイル: ex20.cpp プロジェクト: 00liujj/trilinos
int main(int argc, char *argv[])

#ifdef HAVE_MPI
  MPI_Init(&argc, &argv);
  Epetra_MpiComm Comm(MPI_COMM_WORLD);
  Epetra_SerialComm Comm;
  bool verbose = (Comm.MyPID() == 0);

  // set global dimension to 5, could be any number
  int NumGlobalElements = 5;
  // create a map
  Epetra_Map Map(NumGlobalElements,0,Comm);
  // local number of rows
  int NumMyElements = Map.NumMyElements();
  // get update list
  int * MyGlobalElements = Map.MyGlobalElements( );

  // ============= CONSTRUCTION OF THE MATRIX ===========================
  // Create a Epetra_Matrix

  Epetra_CrsMatrix A(Copy,Map,3);

  // Add  rows one-at-a-time

  double *Values = new double[2];
  Values[0] = -1.0; Values[1] = -1.0;
  int *Indices = new int[2];
  double two = 2.0;
  int NumEntries;

  for( int i=0 ; i<NumMyElements; ++i ) {
    if (MyGlobalElements[i]==0) {
	Indices[0] = 1;
	NumEntries = 1;
    } else if (MyGlobalElements[i] == NumGlobalElements-1) {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
    } else {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      NumEntries = 2;
    A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices);
    // Put in the diagonal entry
    A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i);
  // Finish up

  // ================ CONSTRUCTION OF VECTORS =======================
  // build up two distributed vectors q and z, and compute
  // q = A * z
  Epetra_Vector q(A.RowMap());
  Epetra_Vector z(A.RowMap());

  // Fill z with 1's
  z.PutScalar( 1.0 );

  // ================ USE OF TIME AND FLOPS =========================
  Epetra_Flops counter;
  Epetra_Time timer(Comm);

  A.Multiply(false, z, q); // Compute q = A*z

  double elapsed_time = timer.ElapsedTime();
  double total_flops =counter.Flops();
  if (verbose)
    cout << "Total ops: " << total_flops << endl;
  double MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose)
    cout << "Total MFLOPs  for mat-vec = " << MFLOPs << endl<< endl;

  double dotProduct;
  z.Dot(q, &dotProduct);

  total_flops =counter.Flops();
  if (verbose)
    cout << "Total ops: " << total_flops << endl;

  elapsed_time = timer.ElapsedTime();
  if (elapsed_time != 0.0)
    MFLOPs = (total_flops / elapsed_time) / 1000000.0;
    MFLOPs = 0;

  if (verbose)
    cout << "Total MFLOPs for vec-vec = " << MFLOPs << endl<< endl;
    cout << "q dot z = " << dotProduct << endl;

#ifdef HAVE_MPI

  return( 0 );

} /* main */
コード例 #13
ファイル: cxx_main.cpp プロジェクト: cakeisalie/oomphlib_003
int main(int argc, char *argv[]) {

  Epetra_MpiComm Comm (MPI_COMM_WORLD);
  Epetra_SerialComm Comm;

  cout << Comm << endl;

  int MyPID = Comm.MyPID();

  bool verbose = false;
  bool verbose1 = true;
  if (MyPID==0) verbose = true;

  if(argc < 2 && verbose) {
    cerr << "Usage: " << argv[0] 
	 << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl
	 << "where:" << endl
	 << "HB_filename        - filename and path of a Harwell-Boeing data set" << endl
	 << "level_fill         - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl
	 << "level_overlap      - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl
	 << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl
	 << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl
	 << "To specify a non-default value for one of these parameters, you must specify all" << endl
	 << " preceding values but not any subsequent parameters. Example:" << endl
	 << "ifpackHpcSerialMsr.exe mymatrix.hpc 1  - loads mymatrix.hpc, uses level fill of one, all other values are defaults" << endl
	 << endl;


  // Uncomment the next three lines to debug in mpi mode
  //int tmp;
  //if (MyPID==0) cin >> tmp;

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;
  if (Comm.MyPID()==0)	{
    cout << "\n\n****************************************************" << endl;
    cout << "\n Vector redistribute  time (sec) = " << vectorRedistributeTime<< endl;
    cout << "    Matrix redistribute time (sec) = " << matrixRedistributeTime << endl;
    cout << "    Transform to Local  time (sec) = " << fillCompleteTime << endl<< endl;
  Epetra_Vector tmp1(*readMap);
  Epetra_Vector tmp2(map);
  readA->Multiply(false, *readxexact, tmp1);

  A.Multiply(false, xexact, tmp2);
  double residual;
  if (verbose) cout << "Norm of Ax from file            = " << residual << endl;
  if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl;

  //cout << "A from file = " << *readA << endl << endl << endl;

  //cout << "A after dist = " << A << endl << endl << endl;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;


  bool smallProblem = false;
  if (A.RowMap().NumGlobalElements()<100) smallProblem = true;

  if (smallProblem)
    cout << "Original Matrix = " << endl << A   << endl;


  Epetra_LinearProblem FullProblem(&A, &x, &b);
  double normb, norma;
  norma = A.NormInf();
  if (verbose)
    cout << "Inf norm of Original Matrix = " << norma << endl
	 << "Inf norm of Original RHS    = " << normb << endl;
  Epetra_Time ReductionTimer(Comm);
  Epetra_CrsSingletonFilter SingletonFilter;
  double reduceInitTime = ReductionTimer.ElapsedTime();
  double reduceAnalyzeTime = ReductionTimer.ElapsedTime() - reduceInitTime;

  if (SingletonFilter.SingletonsDetected())
    cout << "Singletons found" << endl;
  else {
    cout << "Singletons not found" << endl;
  double reduceConstructTime = ReductionTimer.ElapsedTime() - reduceInitTime;

  double totalReduceTime = ReductionTimer.ElapsedTime();

  if (verbose)
    cout << "\n\n****************************************************" << endl
	 << "    Reduction init  time (sec)           = " << reduceInitTime<< endl
	 << "    Reduction Analyze time (sec)         = " << reduceAnalyzeTime << endl
	 << "    Construct Reduced Problem time (sec) = " << reduceConstructTime << endl
	 << "    Reduction Total time (sec)           = " << totalReduceTime << endl<< endl;


  Epetra_LinearProblem * ReducedProblem = SingletonFilter.ReducedProblem();

  Epetra_CrsMatrix * Ap = dynamic_cast<Epetra_CrsMatrix *>(ReducedProblem->GetMatrix());
  Epetra_Vector * bp = (*ReducedProblem->GetRHS())(0);
  Epetra_Vector * xp = (*ReducedProblem->GetLHS())(0);

  if (smallProblem)
    cout << " Reduced Matrix = " << endl << *Ap << endl
	 << " LHS before sol = " << endl << *xp << endl
	 << " RHS            = " << endl << *bp << endl;

  // Construct ILU preconditioner

  double elapsed_time, total_flops, MFLOPs;
  Epetra_Time timer(Comm);

  int LevelFill = 0;
  if (argc > 2)  LevelFill = atoi(argv[2]);
  if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
  int Overlap = 0;
  if (argc > 3) Overlap = atoi(argv[3]);
  if (verbose) cout << "Using Level Overlap = " << Overlap << endl;
  double Athresh = 0.0;
  if (argc > 4) Athresh = atof(argv[4]);
  if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl;

  double Rthresh = 1.0;
  if (argc > 5) Rthresh = atof(argv[5]);
  if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl;

  Ifpack_IlukGraph * IlukGraph = 0;
  Ifpack_CrsRiluk * ILUK = 0;

  if (LevelFill>-1) {
    elapsed_time = timer.ElapsedTime();
    IlukGraph = new Ifpack_IlukGraph(Ap->Graph(), LevelFill, Overlap);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;

    Epetra_Flops fact_counter;
    elapsed_time = timer.ElapsedTime();
    ILUK = new Ifpack_CrsRiluk(*IlukGraph);
    int initerr = ILUK->InitValues(*Ap);
    if (initerr!=0) {
      cout << endl << Comm << endl << "  InitValues error = " << initerr;
      if (initerr==1) cout << "  Zero diagonal found, warning error only";
      cout << endl << endl;
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    //cout << *ILUK << endl;
  double Condest;
  ILUK->Condest(false, Condest);

  if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  int Maxiter = 100;
  double Tolerance = 1.0E-8;

  Epetra_Flops counter;
  if (ILUK!=0) ILUK->SetFlopCounter(*Ap);

  elapsed_time = timer.ElapsedTime();

  double normreducedb, normreduceda;
  normreduceda = Ap->NormInf();
  if (verbose) 
    cout << "Inf norm of Reduced Matrix = " << normreduceda << endl
	 << "Inf norm of Reduced RHS    = " << normreducedb << endl;

  BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;


  if (smallProblem)
  cout << " Reduced LHS after sol = " << endl << *xp << endl
       << " Full    LHS after sol = " << endl << x << endl
       << " Full  Exact LHS         = " << endl << xexact << endl;

  Epetra_Vector resid(x);

  resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact

  double normx, normxexact;

  if (verbose) 
    cout << "2-norm of computed solution                               = " << normx << endl
	 << "2-norm of exact solution                                  = " << normxexact << endl
	 << "2-norm of difference between computed and exact solution  = " << residual << endl;
  if (verbose1 && residual>1.0e-5) {
    if (verbose)
      cout << "Difference between computed and exact solution appears large..." << endl
	   << "Computing norm of A times this difference.  If this norm is small, then matrix is singular"
	   << endl;
    Epetra_Vector bdiff(b);
    assert(A.Multiply(false, resid, bdiff)==0);
    if (verbose) 
      cout << "2-norm of A times difference between computed and exact solution  = " << residual << endl;
  if (verbose) 
    cout << "********************************************************" << endl
	 << "              Solving again with 2*Ax=2*b" << endl
	 << "********************************************************" << endl;

  A.Scale(1.0); // A = 2*A
  b.Scale(1.0); // b = 2*b
  norma = A.NormInf();
  if (verbose)
    cout << "Inf norm of Original Matrix = " << norma << endl
	 << "Inf norm of Original RHS    = " << normb << endl;
  double updateReducedProblemTime = ReductionTimer.ElapsedTime();
  updateReducedProblemTime = ReductionTimer.ElapsedTime() - updateReducedProblemTime;
  if (verbose)
    cout << "\n\n****************************************************" << endl
	 << "    Update Reduced Problem time (sec)           = " << updateReducedProblemTime<< endl
	 << "****************************************************" << endl;

  if (LevelFill>-1) {

    Epetra_Flops fact_counter;
    elapsed_time = timer.ElapsedTime();

    int initerr = ILUK->InitValues(*Ap);
    if (initerr!=0) {
      cout << endl << Comm << endl << "  InitValues error = " << initerr;
      if (initerr==1) cout << "  Zero diagonal found, warning error only";
      cout << endl << endl;
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    double Condest;
    ILUK->Condest(false, Condest);
    if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  normreduceda = Ap->NormInf();
  if (verbose) 
    cout << "Inf norm of Reduced Matrix = " << normreduceda << endl
	 << "Inf norm of Reduced RHS    = " << normreducedb << endl;

  BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;


  if (smallProblem)
  cout << " Reduced LHS after sol = " << endl << *xp << endl
       << " Full    LHS after sol = " << endl << x << endl
       << " Full  Exact LHS         = " << endl << xexact << endl;

  resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact


  if (verbose) 
    cout << "2-norm of computed solution                               = " << normx << endl
	 << "2-norm of exact solution                                  = " << normxexact << endl
	 << "2-norm of difference between computed and exact solution  = " << residual << endl;
  if (verbose1 && residual>1.0e-5) {
    if (verbose)
      cout << "Difference between computed and exact solution appears large..." << endl
	   << "Computing norm of A times this difference.  If this norm is small, then matrix is singular"
	   << endl;
    Epetra_Vector bdiff(b);
    assert(A.Multiply(false, resid, bdiff)==0);
    if (verbose) 
      cout << "2-norm of A times difference between computed and exact solution  = " << residual << endl;

  if (ILUK!=0) delete ILUK;
  if (IlukGraph!=0) delete IlukGraph;
  MPI_Finalize() ;

return 0 ;