//=============================================================================
void Epetra_MsrMatrix::Print(ostream& os) const {
  int MyPID = RowMatrixRowMap().Comm().MyPID();
  int NumProc = RowMatrixRowMap().Comm().NumProc();

  for (int iproc=0; iproc < NumProc; iproc++) {
    if (MyPID==iproc) {
/*      const Epetra_fmtflags olda = os.setf(ios::right,ios::adjustfield);
      const Epetra_fmtflags oldf = os.setf(ios::scientific,ios::floatfield);
      const int             oldp = os.precision(12); */
      if (MyPID==0) {
	os <<  "\nNumber of Global Rows        = "; os << NumGlobalRows(); os << endl;
	os <<    "Number of Global Cols        = "; os << NumGlobalCols(); os << endl;
	os <<    "Number of Global Diagonals   = "; os << NumGlobalDiagonals(); os << endl;
	os <<    "Number of Global Nonzeros    = "; os << NumGlobalNonzeros(); os << endl;
	if (LowerTriangular()) os <<    " ** Matrix is Lower Triangular **"; os << endl;
	if (UpperTriangular()) os <<    " ** Matrix is Upper Triangular **"; os << endl;
      }

      os <<  "\nNumber of My Rows        = "; os << NumMyRows(); os << endl;
      os <<    "Number of My Cols        = "; os << NumMyCols(); os << endl;
      os <<    "Number of My Diagonals   = "; os << NumMyDiagonals(); os << endl;
      os <<    "Number of My Nonzeros    = "; os << NumMyNonzeros(); os << endl; os << endl;

      os << flush;
      
      // Reset os flags
      
/*      os.setf(olda,ios::adjustfield);
      os.setf(oldf,ios::floatfield);
      os.precision(oldp); */
    }
    // Do a few global ops to give I/O a chance to complete
    Comm().Barrier();
    Comm().Barrier();
    Comm().Barrier();
  }

  {for (int iproc=0; iproc < NumProc; iproc++) {
    if (MyPID==iproc) {
      int i, j;

      if (MyPID==0) {
	os.width(8);
	os <<  "   Processor ";
	os.width(10);
	os <<  "   Row Index ";
	os.width(10);
	os <<  "   Col Index ";
	os.width(20);
	os <<  "   Value     ";
	os << endl;
      }
      for (i=0; i<NumMyRows_; i++) {
	int Row = RowMatrixRowMap().GID(i); // Get global row number
	int NumEntries = GetRow(i); // ith row is now in Values_ and Indices_
	
	for (j = 0; j < NumEntries ; j++) {   
	  os.width(8);
	  os <<  MyPID ; os << "    ";	
	  os.width(10);
	  os <<  Row ; os << "    ";	
	  os.width(10);
	  os <<  RowMatrixColMap().GID(Indices_[j]); os << "    ";
	  os.width(20);
	  os <<  Values_[j]; os << "    ";
	  os << endl;
	}
      }

      
      os << flush;
      
    }
    // Do a few global ops to give I/O a chance to complete
    RowMatrixRowMap().Comm().Barrier();
    RowMatrixRowMap().Comm().Barrier();
    RowMatrixRowMap().Comm().Barrier();
  }}

  return;
}
//==============================================================================
// NOTE:
// - matrix is supposed to be localized, and passes through the
// singleton filter. This means that I do not have to look
// for Dirichlet nodes (singletons). Also, all rows and columns are 
// local.
int Ifpack_METISPartitioner::ComputePartitions()
{

  int ierr;
#ifdef HAVE_IFPACK_METIS
  int nbytes = 0;
  int edgecut;
#endif

  Teuchos::RefCountPtr<Epetra_CrsGraph> SymGraph ;
  Teuchos::RefCountPtr<Epetra_Map> SymMap;
  Teuchos::RefCountPtr<Ifpack_Graph_Epetra_CrsGraph> SymIFPACKGraph;
  Teuchos::RefCountPtr<Ifpack_Graph> IFPACKGraph = Teuchos::rcp( (Ifpack_Graph*)Graph_, false );

  int Length = 2 * MaxNumEntries();
  int NumIndices;
  std::vector<int> Indices;
  Indices.resize(Length);

  /* construct the CSR graph information of the LOCAL matrix
     using the get_row function */

  std::vector<idxtype> wgtflag;
  wgtflag.resize(4);

  std::vector<int> options;
  options.resize(4);
  
  int numflag;

  if (UseSymmetricGraph_) {

#if !defined(EPETRA_NO_32BIT_GLOBAL_INDICES) || !defined(EPETRA_NO_64BIT_GLOBAL_INDICES)
    // need to build a symmetric graph. 
    // I do this in two stages:
    // 1.- construct an Epetra_CrsMatrix, symmetric
    // 2.- convert the Epetra_CrsMatrix into METIS format
    SymMap = Teuchos::rcp( new Epetra_Map(NumMyRows(),0,Graph_->Comm()) );
    SymGraph = Teuchos::rcp( new Epetra_CrsGraph(Copy,*SymMap,0) );
#endif

#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
      if(SymGraph->RowMap().GlobalIndicesInt()) {
        for (int i = 0; i < NumMyRows() ; ++i) {

          ierr = Graph_->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
          IFPACK_CHK_ERR(ierr);

          for (int j = 0 ; j < NumIndices ; ++j) {
            int jj = Indices[j];
            if (jj != i) {
              SymGraph->InsertGlobalIndices(i,1,&jj);
              SymGraph->InsertGlobalIndices(jj,1,&i);
            }
          }
        }
      }
      else
#endif
#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
      if(SymGraph->RowMap().GlobalIndicesLongLong()) {
        for (int i = 0; i < NumMyRows() ; ++i) {
          long long i_LL = i;

          ierr = Graph_->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
          IFPACK_CHK_ERR(ierr);

          for (int j = 0 ; j < NumIndices ; ++j) {
            long long jj = Indices[j];
            if (jj != i_LL) {
              SymGraph->InsertGlobalIndices(i_LL,1,&jj);
              SymGraph->InsertGlobalIndices(jj,1,&i_LL);
            }
          }
        }
      }
      else
#endif
        throw "Ifpack_METISPartitioner::ComputePartitions: GlobalIndices type unknown";

    IFPACK_CHK_ERR(SymGraph->FillComplete());
    SymIFPACKGraph = Teuchos::rcp( new Ifpack_Graph_Epetra_CrsGraph(SymGraph) );
    IFPACKGraph = SymIFPACKGraph;
  }

  // now work on IFPACKGraph, that can be the symmetric or
  // the non-symmetric one

  /* set parameters */
   
  wgtflag[0] = 0;    /* no weights */
  numflag    = 0;    /* C style */
  options[0] = 0;    /* default options */
   
  std::vector<idxtype> xadj;
  xadj.resize(NumMyRows() + 1);

  std::vector<idxtype> adjncy;
  adjncy.resize(NumMyNonzeros());
   
  int count = 0; 
  int count2 = 0; 
  xadj[0] = 0;
  
  for (int i = 0; i < NumMyRows() ; ++i) {

    xadj[count2+1] = xadj[count2]; /* nonzeros in row i-1 */

    ierr = IFPACKGraph->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
    IFPACK_CHK_ERR(ierr);

    for (int j = 0 ; j < NumIndices ; ++j) {
      int jj = Indices[j];
      if (jj != i) {
	adjncy[count++] = jj;
	xadj[count2+1]++;
      }
    }
    count2++;
  }

  std::vector<idxtype> NodesInSubgraph;
  NodesInSubgraph.resize(NumLocalParts_);

  // some cases can be handled separately
  
  int ok;

  if (NumLocalParts() == 1) {

    for (int i = 0 ; i < NumMyRows() ; ++i) 
      Partition_[i] = 0;
    
  } else if (NumLocalParts() == NumMyRows()) {

    for (int i = 0 ; i < NumMyRows() ; ++i) 
      Partition_[i] = i;
  
  } else {

    ok = 0;

    // sometimes METIS creates less partitions than specified.
    // ok will check this problem, and recall metis, asking
    // for NumLocalParts_/2 partitions
    while (ok == 0) {
      
      for (int i = 0 ; i < NumMyRows() ; ++i) 
	Partition_[i] = -1;
    
#ifdef HAVE_IFPACK_METIS
      int j = NumMyRows();
      if (NumLocalParts_ < 8) {

	int i = 1; /* optype in the METIS manual */
	numflag = 0;
	METIS_EstimateMemory(&j, &xadj[0], &adjncy[0], 
			     &numflag, &i, &nbytes );
	
	METIS_PartGraphRecursive(&j, &xadj[0], &adjncy[0],
				 NULL, NULL,
				 &wgtflag[0], &numflag, &NumLocalParts_, 
				 &options[0], &edgecut, &Partition_[0]);
      } else {

	numflag = 0;
	
	METIS_PartGraphKway (&j, &xadj[0], &adjncy[0], 
			     NULL, 
			     NULL, &wgtflag[0], &numflag, 
			     &NumLocalParts_, &options[0],
			     &edgecut, &Partition_[0]);
      }
#else
      numflag = numflag * 2; // avoid warning for unused variable
      if (Graph_->Comm().MyPID() == 0) {
	cerr << "METIS was not linked; now I put all" << endl;
	cerr << "the local nodes in the same partition." << endl;
      }
      for (int i = 0 ; i < NumMyRows() ; ++i) 
	Partition_[i] = 0;
      NumLocalParts_ = 1;
#endif
      
      ok = 1;
      
      for (int i = 0 ; i < NumLocalParts() ; ++i) 
	NodesInSubgraph[i] = 0;

      for (int i = 0 ; i < NumMyRows() ; ++i) {
	int j = Partition_[i];
	if ((j < 0) || (j>= NumLocalParts())) {
	  ok = 0;
	  break;
	} 
	else NodesInSubgraph[j]++;
      }
      
      for (int i = 0 ; i < NumLocalParts() ; ++i) {
	if( NodesInSubgraph[i] == 0 ) {
	  ok = 0;
	  break;
	}
      }
      
      if (ok == 0) {
	cerr << "Specified number of subgraphs ("
	     << NumLocalParts_ << ") generates empty subgraphs." << endl;
	cerr << "Now I recall METIS with NumLocalParts_ = "
	     << NumLocalParts_ / 2 << "..." << endl;
	NumLocalParts_ = NumLocalParts_/2;
      }
      
      if (NumLocalParts() == 0) {
	IFPACK_CHK_ERR(-10); // something went wrong
      }
      
      if (NumLocalParts() == 1) {
	for (int i = 0 ; i < NumMyRows() ; ++i) 
	  Partition_[i] = 0;
	ok = 1;
      }
      
    } /* while( ok == 0 ) */
  
  } /* if( NumLocalParts_ == 1 ) */

  return(0);
}