//==============================================================================
Ifpack_DiagonalFilter::Ifpack_DiagonalFilter(const Teuchos::RefCountPtr<Epetra_RowMatrix>& Matrix,
					     double AbsoluteThreshold,
					     double RelativeThreshold) :
  A_(Matrix),
  AbsoluteThreshold_(AbsoluteThreshold),
  RelativeThreshold_(RelativeThreshold)
{
  Epetra_Time Time(Comm());
  
  pos_.resize(NumMyRows());
  val_.resize(NumMyRows());
  
  std::vector<int> Indices(MaxNumEntries());
  std::vector<double> Values(MaxNumEntries());
  int NumEntries;
  
  for (int MyRow = 0 ; MyRow < NumMyRows() ; ++MyRow) {
    
    pos_[MyRow] = -1;
    val_[MyRow] = 0.0;
    int ierr = A_->ExtractMyRowCopy(MyRow, MaxNumEntries(), NumEntries,
				    &Values[0], &Indices[0]);
    assert (ierr == 0);
    
    for (int i = 0 ; i < NumEntries ; ++i) {
      if (Indices[i] == MyRow) {
	pos_[MyRow] = i;
	val_[MyRow] = Values[i] * (RelativeThreshold_ - 1) +
	  AbsoluteThreshold_ * EPETRA_SGN(Values[i]);
      }
      break;
    }
  }
  cout << "TIME = " << Time.ElapsedTime() << endl;
}
// ============================================================================
inline void Ifpack_LinePartitioner::local_automatic_line_search(int NumEqns, int * blockIndices, int last, int next, int LineID, double tol, int *itemp, double * dtemp) const {
  double *xvals=xcoord_, *yvals=ycoord_, *zvals=zcoord_;

  int N = NumMyRows();

  int allocated_space = MaxNumEntries();
  int * cols    = itemp;
  int * indices = &itemp[allocated_space];
  double * dist = dtemp;


  while (blockIndices[next] == -1) {
    // Get the next row
    int n=0;
    int neighbors_in_line=0;

    Graph_->ExtractMyRowCopy(next,allocated_space,n,cols);
    double x0 = (xvals) ? xvals[next/NumEqns] : 0.0;
    double y0 = (yvals) ? yvals[next/NumEqns] : 0.0;
    double z0 = (zvals) ? zvals[next/NumEqns] : 0.0;

    // Calculate neighbor distances & sort
    int neighbor_len=0;
    for(int i=0; i<n; i+=NumEqns) {
      double mydist = 0.0;
      if(cols[i] >=N) continue; // Check for off-proc entries
      int nn = cols[i] / NumEqns;
      if(blockIndices[nn]==LineID) neighbors_in_line++;
      if(xvals) mydist += (x0 - xvals[nn]) * (x0 - xvals[nn]);
      if(yvals) mydist += (y0 - yvals[nn]) * (y0 - yvals[nn]);
      if(zvals) mydist += (z0 - zvals[nn]) * (z0 - zvals[nn]);
      dist[neighbor_len] = sqrt(mydist);
      indices[neighbor_len]=cols[i];
      neighbor_len++;
    }
    // If more than one of my neighbors is already in this line.  I
    // can't be because I'd create a cycle
    if(neighbors_in_line > 1) break;

    // Otherwise add me to the line 
    for(int k=0; k<NumEqns; k++) 
      blockIndices[next + k] = LineID;
    
    // Try to find the next guy in the line (only check the closest two that aren't element 0 (diagonal))
    Epetra_Util::Sort(true,neighbor_len,dist,0,0,1,&indices,0,0);

    if(neighbor_len > 2 && indices[1] != last && blockIndices[indices[1]] == -1 && dist[1]/dist[neighbor_len-1] < tol) {
      last=next;
      next=indices[1];
    }
    else if(neighbor_len > 3 && indices[2] != last && blockIndices[indices[2]] == -1 && dist[2]/dist[neighbor_len-1] < tol) {
      last=next;
      next=indices[2];
    }
    else {
      // I have no further neighbors in this line
      break;
    }
  }
}
//==============================================================================
int Ifpack_GreedyPartitioner::ComputePartitions()
{
  std::vector<int> ElementsPerPart(NumLocalParts());
  std::vector<int> Count(NumLocalParts());
  for (int i = 0 ; i < NumLocalParts() ; ++i)
    Count[i] = 0;

  // define how many nodes have to be put on each part
  int div = NumMyRows() / NumLocalParts();
  int mod = NumMyRows() % NumLocalParts();

  for (int i = 0 ; i < NumLocalParts() ; ++i) {
    Count[i] = 0;
    ElementsPerPart[i] = div;
    if (i < mod) ElementsPerPart[i]++;
  }

  for( int i=0 ; i<NumMyRows() ; ++i ) {
    Partition_[i] = -1;
  }

  int NumEntries;
  std::vector<int> Indices(MaxNumEntries());
  
  // load root node for partition 0
  int CurrentPartition = 0;
  int TotalCount = 0;

  // filter singletons and empty rows, put all of them in partition 0
  for (int i = 0 ; i < NumMyRows() ; ++i) {
    NumEntries = 0;
    int ierr = Graph_->ExtractMyRowCopy(i, MaxNumEntries(),
                                        NumEntries, &Indices[0]);
    IFPACK_CHK_ERR(ierr);
    if (NumEntries <= 1) {
      Partition_[i] = 0;
      TotalCount++;
    }
  }

  if (TotalCount)
    CurrentPartition = 1;

  std::vector<int> ThisLevel(1);
  ThisLevel[0] = RootNode_;

  // be sure that RootNode is not a singleton or empty row
  if (Partition_[RootNode_] != -1) {
    // look for another RN
    for (int i = 0 ; i < NumMyRows() ; ++i)
      if (Partition_[i] == -1) {
        ThisLevel[0] = i;
        break;
      }
  }
  else {
    Partition_[RootNode_] = CurrentPartition;
  }

  // now aggregate the non-empty and non-singleton rows
  while (ThisLevel.size()) {

    std::vector<int> NextLevel;

    for (unsigned int i = 0 ; i < ThisLevel.size() ; ++i) {

      int CurrentNode = ThisLevel[i];
      int ierr = Graph_->ExtractMyRowCopy(CurrentNode, MaxNumEntries(),
                                          NumEntries, &Indices[0]);
      IFPACK_CHK_ERR(ierr);

      if (NumEntries <= 1)
        continue;

      for (int j = 0 ; j < NumEntries ; ++j) {

        int NextNode = Indices[j];
        if (NextNode >= NumMyRows()) continue;

        if (Partition_[NextNode] == -1) {
          // this is a free node
          NumLocalParts_ = CurrentPartition + 1;
          Partition_[NextNode] = CurrentPartition;
          ++Count[CurrentPartition];
          ++TotalCount;
          NextLevel.push_back(NextNode);
        }
      }
    } // for (i)

    // check whether change partition or not
    if (Count[CurrentPartition] >= ElementsPerPart[CurrentPartition])
      ++CurrentPartition;

    // swap next and this
    ThisLevel.resize(0);
    for (unsigned int i = 0 ; i < NextLevel.size() ; ++i)
      ThisLevel.push_back(NextLevel[i]);

    if (ThisLevel.size() == 0 && (TotalCount != NumMyRows())) {
      // need to look for new RootNode, do this in a simple way
      for (int i = 0 ; i < NumMyRows() ; i++) {
        if (Partition_[i] == -1)
          ThisLevel.push_back(i);
        break;
      }
    }

  } // while (ok)

  return(0);
}
//==============================================================================
// NOTE:
// - matrix is supposed to be localized, and passes through the
// singleton filter. This means that I do not have to look
// for Dirichlet nodes (singletons). Also, all rows and columns are 
// local.
int Ifpack_METISPartitioner::ComputePartitions()
{

  int ierr;
#ifdef HAVE_IFPACK_METIS
  int nbytes = 0;
  int edgecut;
#endif

  Teuchos::RefCountPtr<Epetra_CrsGraph> SymGraph ;
  Teuchos::RefCountPtr<Epetra_Map> SymMap;
  Teuchos::RefCountPtr<Ifpack_Graph_Epetra_CrsGraph> SymIFPACKGraph;
  Teuchos::RefCountPtr<Ifpack_Graph> IFPACKGraph = Teuchos::rcp( (Ifpack_Graph*)Graph_, false );

  int Length = 2 * MaxNumEntries();
  int NumIndices;
  std::vector<int> Indices;
  Indices.resize(Length);

  /* construct the CSR graph information of the LOCAL matrix
     using the get_row function */

  std::vector<idxtype> wgtflag;
  wgtflag.resize(4);

  std::vector<int> options;
  options.resize(4);
  
  int numflag;

  if (UseSymmetricGraph_) {

#if !defined(EPETRA_NO_32BIT_GLOBAL_INDICES) || !defined(EPETRA_NO_64BIT_GLOBAL_INDICES)
    // need to build a symmetric graph. 
    // I do this in two stages:
    // 1.- construct an Epetra_CrsMatrix, symmetric
    // 2.- convert the Epetra_CrsMatrix into METIS format
    SymMap = Teuchos::rcp( new Epetra_Map(NumMyRows(),0,Graph_->Comm()) );
    SymGraph = Teuchos::rcp( new Epetra_CrsGraph(Copy,*SymMap,0) );
#endif

#ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
      if(SymGraph->RowMap().GlobalIndicesInt()) {
        for (int i = 0; i < NumMyRows() ; ++i) {

          ierr = Graph_->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
          IFPACK_CHK_ERR(ierr);

          for (int j = 0 ; j < NumIndices ; ++j) {
            int jj = Indices[j];
            if (jj != i) {
              SymGraph->InsertGlobalIndices(i,1,&jj);
              SymGraph->InsertGlobalIndices(jj,1,&i);
            }
          }
        }
      }
      else
#endif
#ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
      if(SymGraph->RowMap().GlobalIndicesLongLong()) {
        for (int i = 0; i < NumMyRows() ; ++i) {
          long long i_LL = i;

          ierr = Graph_->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
          IFPACK_CHK_ERR(ierr);

          for (int j = 0 ; j < NumIndices ; ++j) {
            long long jj = Indices[j];
            if (jj != i_LL) {
              SymGraph->InsertGlobalIndices(i_LL,1,&jj);
              SymGraph->InsertGlobalIndices(jj,1,&i_LL);
            }
          }
        }
      }
      else
#endif
        throw "Ifpack_METISPartitioner::ComputePartitions: GlobalIndices type unknown";

    IFPACK_CHK_ERR(SymGraph->FillComplete());
    SymIFPACKGraph = Teuchos::rcp( new Ifpack_Graph_Epetra_CrsGraph(SymGraph) );
    IFPACKGraph = SymIFPACKGraph;
  }

  // now work on IFPACKGraph, that can be the symmetric or
  // the non-symmetric one

  /* set parameters */
   
  wgtflag[0] = 0;    /* no weights */
  numflag    = 0;    /* C style */
  options[0] = 0;    /* default options */
   
  std::vector<idxtype> xadj;
  xadj.resize(NumMyRows() + 1);

  std::vector<idxtype> adjncy;
  adjncy.resize(NumMyNonzeros());
   
  int count = 0; 
  int count2 = 0; 
  xadj[0] = 0;
  
  for (int i = 0; i < NumMyRows() ; ++i) {

    xadj[count2+1] = xadj[count2]; /* nonzeros in row i-1 */

    ierr = IFPACKGraph->ExtractMyRowCopy(i, Length, NumIndices, &Indices[0]);
    IFPACK_CHK_ERR(ierr);

    for (int j = 0 ; j < NumIndices ; ++j) {
      int jj = Indices[j];
      if (jj != i) {
	adjncy[count++] = jj;
	xadj[count2+1]++;
      }
    }
    count2++;
  }

  std::vector<idxtype> NodesInSubgraph;
  NodesInSubgraph.resize(NumLocalParts_);

  // some cases can be handled separately
  
  int ok;

  if (NumLocalParts() == 1) {

    for (int i = 0 ; i < NumMyRows() ; ++i) 
      Partition_[i] = 0;
    
  } else if (NumLocalParts() == NumMyRows()) {

    for (int i = 0 ; i < NumMyRows() ; ++i) 
      Partition_[i] = i;
  
  } else {

    ok = 0;

    // sometimes METIS creates less partitions than specified.
    // ok will check this problem, and recall metis, asking
    // for NumLocalParts_/2 partitions
    while (ok == 0) {
      
      for (int i = 0 ; i < NumMyRows() ; ++i) 
	Partition_[i] = -1;
    
#ifdef HAVE_IFPACK_METIS
      int j = NumMyRows();
      if (NumLocalParts_ < 8) {

	int i = 1; /* optype in the METIS manual */
	numflag = 0;
	METIS_EstimateMemory(&j, &xadj[0], &adjncy[0], 
			     &numflag, &i, &nbytes );
	
	METIS_PartGraphRecursive(&j, &xadj[0], &adjncy[0],
				 NULL, NULL,
				 &wgtflag[0], &numflag, &NumLocalParts_, 
				 &options[0], &edgecut, &Partition_[0]);
      } else {

	numflag = 0;
	
	METIS_PartGraphKway (&j, &xadj[0], &adjncy[0], 
			     NULL, 
			     NULL, &wgtflag[0], &numflag, 
			     &NumLocalParts_, &options[0],
			     &edgecut, &Partition_[0]);
      }
#else
      numflag = numflag * 2; // avoid warning for unused variable
      if (Graph_->Comm().MyPID() == 0) {
	cerr << "METIS was not linked; now I put all" << endl;
	cerr << "the local nodes in the same partition." << endl;
      }
      for (int i = 0 ; i < NumMyRows() ; ++i) 
	Partition_[i] = 0;
      NumLocalParts_ = 1;
#endif
      
      ok = 1;
      
      for (int i = 0 ; i < NumLocalParts() ; ++i) 
	NodesInSubgraph[i] = 0;

      for (int i = 0 ; i < NumMyRows() ; ++i) {
	int j = Partition_[i];
	if ((j < 0) || (j>= NumLocalParts())) {
	  ok = 0;
	  break;
	} 
	else NodesInSubgraph[j]++;
      }
      
      for (int i = 0 ; i < NumLocalParts() ; ++i) {
	if( NodesInSubgraph[i] == 0 ) {
	  ok = 0;
	  break;
	}
      }
      
      if (ok == 0) {
	cerr << "Specified number of subgraphs ("
	     << NumLocalParts_ << ") generates empty subgraphs." << endl;
	cerr << "Now I recall METIS with NumLocalParts_ = "
	     << NumLocalParts_ / 2 << "..." << endl;
	NumLocalParts_ = NumLocalParts_/2;
      }
      
      if (NumLocalParts() == 0) {
	IFPACK_CHK_ERR(-10); // something went wrong
      }
      
      if (NumLocalParts() == 1) {
	for (int i = 0 ; i < NumMyRows() ; ++i) 
	  Partition_[i] = 0;
	ok = 1;
      }
      
    } /* while( ok == 0 ) */
  
  } /* if( NumLocalParts_ == 1 ) */

  return(0);
} 
// ============================================================================
inline void Ifpack_LinePartitioner::local_automatic_line_search(int NumEqns, int * blockIndices, int last, int next, int LineID, double tol, int *itemp, double * dtemp) const {
  double *xvals=xcoord_, *yvals=ycoord_, *zvals=zcoord_;

  int N = NumMyRows();

  int allocated_space = MaxNumEntries();
  int * cols    = itemp;
  int * indices = &itemp[allocated_space];
  double * merit= dtemp;
  double * vals = &dtemp[allocated_space];

  while (blockIndices[next] == -1) {
    // Get the next row
    int n=0;
    int neighbors_in_line=0;

    if(mode_==MATRIX_ENTRIES) Matrix_->ExtractMyRowCopy(next,allocated_space,n,vals,cols);
    else Graph_->ExtractMyRowCopy(next,allocated_space,n,cols);

    // Coordinate distance info
    double x0 = (xvals) ? xvals[next/NumEqns] : 0.0;
    double y0 = (yvals) ? yvals[next/NumEqns] : 0.0;
    double z0 = (zvals) ? zvals[next/NumEqns] : 0.0;

    // Calculate neighbor distances & sort
    int neighbor_len=0;
    for(int i=0; i<n; i+=NumEqns) {
      if(cols[i] >=N) continue; // Check for off-proc entries
      int nn = cols[i] / NumEqns;
      if(blockIndices[nn]==LineID) neighbors_in_line++;      
      if(mode_==COORDINATES) merit[neighbor_len] = compute_distance_coordinates(x0,y0,z0,nn,xvals,yvals,zvals);
      else {
	merit[neighbor_len] =  - compute_distance_matrix_entries(vals,i,NumEqns); // Make this negative here, so we can use the same if tests at coordinates
	// Boost the diagonal here to ensure it goes first
	if(cols[i]==next) merit[neighbor_len] = -DBL_MAX;
      }

      indices[neighbor_len]=cols[i];
      neighbor_len++;
    }
    // If more than one of my neighbors is already in this line.  I
    // can't be because I'd create a cycle
    if(neighbors_in_line > 1) break;

    // Otherwise add me to the line 
    for(int k=0; k<NumEqns; k++) 
      blockIndices[next + k] = LineID;
    
    // Try to find the next guy in the line (only check the closest two that aren't element 0 (diagonal))
    Epetra_Util::Sort(true,neighbor_len,merit,0,0,1,&indices,0,0);

    if(neighbor_len > 2 && indices[1] != last && blockIndices[indices[1]] == -1 && merit[1]  < tol*merit[neighbor_len-1]) {
      last=next;
      next=indices[1];
    }
    else if(neighbor_len > 3 && indices[2] != last && blockIndices[indices[2]] == -1 && merit[2] < tol*merit[neighbor_len-1]) {
      last=next;
      next=indices[2];
    }
    else {
      // I have no further neighbors in this line
      break;
    }
  }
}
// ============================================================================
int Ifpack_LinePartitioner::Compute_Blocks_AutoLine(int * blockIndices) const {
  double *xvals=xcoord_, *yvals=ycoord_, *zvals=zcoord_;
  int NumEqns = NumEqns_;
  double tol = threshold_;
  int N = NumMyRows();
  int allocated_space = MaxNumEntries();
    
  int * cols    = new int[2*allocated_space];
  int * indices = &cols[allocated_space];
  double * merit = new double[2*allocated_space];
  double * vals = &merit[allocated_space];

  int * itemp   = new int[2*allocated_space];
  double *dtemp = new double[2*allocated_space];


  int num_lines = 0;

  for(int i=0; i<N; i+=NumEqns) {
    int nz=0;
    // Short circuit if I've already been blocked
    if(blockIndices[i] !=-1) continue;

    // Get neighbors and sort by distance
    if(mode_==MATRIX_ENTRIES) Matrix_->ExtractMyRowCopy(i,allocated_space,nz,vals,cols);
    else Graph_->ExtractMyRowCopy(i,allocated_space,nz,cols);

    double x0 = (xvals) ? xvals[i/NumEqns] : 0.0;
    double y0 = (yvals) ? yvals[i/NumEqns] : 0.0;
    double z0 = (zvals) ? zvals[i/NumEqns] : 0.0;

    int neighbor_len=0;
    for(int j=0; j<nz; j+=NumEqns) {
      int nn = cols[j] / NumEqns;
      if(cols[j] >=N) continue; // Check for off-proc entries
      if(mode_==COORDINATES) merit[neighbor_len] = compute_distance_coordinates(x0,y0,z0,nn,xvals,yvals,zvals);
      else {
	merit[neighbor_len] =  - compute_distance_matrix_entries(vals,j,NumEqns); // Make this negative here, so we can use the same if tests at coordinates
	// Boost the diagonal here to ensure it goes first
	if(cols[j]==i)   merit[neighbor_len] = -DBL_MAX;
      }
      indices[neighbor_len] = cols[j];
      neighbor_len++;
    }

    Epetra_Util::Sort(true,neighbor_len,merit,0,0,1,&indices,0,0);

    // Number myself
    for(int k=0; k<NumEqns; k++)
      blockIndices[i + k] = num_lines;

    // Fire off a neighbor line search (nearest neighbor)
    if(neighbor_len > 2 && merit[1] < tol*merit[neighbor_len-1]) {      
      local_automatic_line_search(NumEqns,blockIndices,i,indices[1],num_lines,tol,itemp,dtemp);
    }
    // Fire off a neighbor line search (second nearest neighbor)
    if(neighbor_len > 3 && merit[2] < tol*merit[neighbor_len-1]) {
      local_automatic_line_search(NumEqns,blockIndices,i,indices[2],num_lines,tol,itemp,dtemp);
    }

    num_lines++;
  }
  
  // Cleanup
  delete [] cols;
  delete [] merit;
  delete [] itemp;
  delete [] dtemp;

  return num_lines;
}
// ============================================================================
int Ifpack_LinePartitioner::Compute_Blocks_AutoLine(int * blockIndices) const {
  double *xvals=xcoord_, *yvals=ycoord_, *zvals=zcoord_;
  int NumEqns = NumEqns_;
  double tol = threshold_;
  int N = NumMyRows();
  int allocated_space = MaxNumEntries();
    
  int * cols    = new int[2*allocated_space];
  int * indices = &cols[allocated_space];
  double * dist = new double[allocated_space];

  int * itemp   = new int[2*allocated_space];
  double *dtemp = new double[allocated_space];

  int num_lines = 0;

  for(int i=0; i<N; i+=NumEqns) {
    int nz=0;
    // Short circuit if I've already been blocked
    if(blockIndices[i] !=-1) continue;

    // Get neighbors and sort by distance
    Graph_->ExtractMyRowCopy(i,allocated_space,nz,cols);
    double x0 = (xvals) ? xvals[i/NumEqns] : 0.0;
    double y0 = (yvals) ? yvals[i/NumEqns] : 0.0;
    double z0 = (zvals) ? zvals[i/NumEqns] : 0.0;

    int neighbor_len=0;
    for(int j=0; j<nz; j+=NumEqns) {
      double mydist = 0.0;
      int nn = cols[j] / NumEqns;
      if(cols[j] >=N) continue; // Check for off-proc entries
      if(xvals) mydist += (x0 - xvals[nn]) * (x0 - xvals[nn]);
      if(yvals) mydist += (y0 - yvals[nn]) * (y0 - yvals[nn]);
      if(zvals) mydist += (z0 - zvals[nn]) * (z0 - zvals[nn]);
      dist[neighbor_len] = sqrt(mydist);
      indices[neighbor_len]=cols[j];
      neighbor_len++;
    }

    Epetra_Util::Sort(true,neighbor_len,dist,0,0,1,&indices,0,0);

    // Number myself
    for(int k=0; k<NumEqns; k++)
      blockIndices[i + k] = num_lines;

    // Fire off a neighbor line search (nearest neighbor)
    if(neighbor_len > 2 && dist[1]/dist[neighbor_len-1] < tol) {
      local_automatic_line_search(NumEqns,blockIndices,i,indices[1],num_lines,tol,itemp,dtemp);
    }
    // Fire off a neighbor line search (second nearest neighbor)
    if(neighbor_len > 3 && dist[2]/dist[neighbor_len-1] < tol) {
      local_automatic_line_search(NumEqns,blockIndices,i,indices[2],num_lines,tol,itemp,dtemp);
    }

    num_lines++;
  }
  
  // Cleanup
  delete [] cols;
  delete [] dist;
  delete [] itemp;
  delete [] dtemp;

  return num_lines;
}