Teuchos::RCP<Epetra_MapColoring>
Colorer::generateColMapColoring()
{
  Teuchos::RCP<Epetra_MapColoring> rowColorMap = generateRowMapColoring();

  // Color map has colored rows -- need colored columns
  Epetra_Import importer(*colmap_, *input_map_);

  Teuchos::RCP<Epetra_MapColoring> colorMap =
    Teuchos::rcp(new Epetra_MapColoring(*colmap_));

  colorMap->Import(*rowColorMap, importer, Insert);
  return (colorMap);
}
static int run_test(Teuchos::RCP<Epetra_CrsMatrix> matrix,
	  bool verbose,           // display the graph before & after
	  bool contract,          // set global number of partitions to 1/2 num procs
	  int partitioningType,   // hypergraph or graph partitioning, or simple
	  int vertexWeightType,   // use vertex weights?
	  int edgeWeightType,     // use edge/hyperedge weights?
	  int objectType)         // use isorropia's CrsMatrix or CrsGraph
{
  int rc=0, fail = 0;
#ifdef HAVE_EPETRAEXT
  int localProc = 0;
  double balance1, balance2, cutn1, cutn2, cutl1, cutl2;
  double balance3, cutn3, cutl3;
  double cutWgt1, cutWgt2, cutWgt3;
  int numCuts1, numCuts2, numCuts3, valid;
  int numPartitions = 0;
  int keepDenseEdges = 0;
  int numProcs = 1;

#ifdef HAVE_MPI
  const Epetra_MpiComm &Comm = dynamic_cast<const Epetra_MpiComm &>(matrix->Comm());
  localProc = Comm.MyPID();
  numProcs = Comm.NumProc();
#else
  const Epetra_SerialComm &Comm = dynamic_cast<const Epetra_SerialComm &>(matrix->Comm());
#endif

  int numRows = matrix->NumGlobalRows();

  if (numRows < (numProcs * 100)){
    // By default Zoltan throws out dense edges, defined as those
    // whose number of non-zeros exceeds 25% of the number of vertices.
    //
    // If dense edges are thrown out of a small matrix, there may be nothing left.
    keepDenseEdges = 1;
  }

  double myShareBefore = 1.0 / numProcs;
  double myShare = myShareBefore;

  if (contract){
    numPartitions = numProcs / 2;

    if (numPartitions > numRows)
      numPartitions = numRows;

    if (numPartitions > 0){
      if (localProc < numPartitions){
	myShare = 1.0 / numPartitions;
      }
      else{
	myShare = 0.0;
      }
    }
    else{
      contract = 0;
    }
  }

  // If we want Zoltan's or Isorropia's default weights, then we don't
  // need to supply a CostDescriber object to createBalancedCopy,
  // so we get to test the API functions that don't take a CostDescriber.

  bool noCosts = ((vertexWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS) &&
		   (edgeWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS));

  // Test the interface that has no parameters, if possible

  bool noParams =
    ((partitioningType == HYPERGRAPH_PARTITIONING) && // default, so requires no params
     (numPartitions == 0) &&                          // >0 would require a parameter
     (keepDenseEdges == 0));                          // >0 would require a parameter

  // Maps for original object
  const Epetra_Map &sourceRowMap = matrix->RowMap();
  const Epetra_Map &sourceRangeMap = matrix->RangeMap();
//   const Epetra_Map &sourceColMap = matrix->ColMap();
  const Epetra_Map &sourceDomainMap = matrix->DomainMap();

  int numCols = matrix->NumGlobalCols();
  int nMyRows = sourceRowMap.NumMyElements();
  int base = sourceRowMap.IndexBase();

  // Compute vertex and edge weights

  Isorropia::Epetra::CostDescriber costs;

  Teuchos::RCP<Epetra_Vector> vptr;

  Teuchos::RCP<Epetra_CrsMatrix> eptr;

  Teuchos::RCP<Epetra_Vector> hyperEdgeWeights;

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      // Create graph edge weights.

      eptr = Teuchos::rcp(new Epetra_CrsMatrix(*matrix));

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	eptr->PutScalar(1.0);   // set all nonzeros to 1.0
      }
      else{
	int maxRowSize = eptr->MaxNumEntries();
	double *newVal = NULL;
	if (maxRowSize > 0){
	  newVal = new double [maxRowSize];
	  for (int j=0; j<maxRowSize; j++){
	    newVal[j] = localProc + 1 + j;
	  }
	}
	int numEntries;
	int *idx;
	double *val;
	for (int i=0; i<nMyRows; i++){
	  rc = eptr->ExtractMyRowView(i, numEntries, val, idx);
	  for (int j=0; j<numEntries; j++){
	    val[j] = newVal[j];
	  }
	}
	if (newVal) delete [] newVal;
      }

      eptr->FillComplete(sourceDomainMap, sourceRangeMap);

      costs.setGraphEdgeWeights(eptr);
    }
    else{
      // Create hyperedge weights.  (Note that the list of hyperedges that a
      // process provides weights for has no relation to the columns
      // that it has non-zeroes for, or the rows that is has.  Hypergraphs
      // in general are not square.  Also more than one process can provide
      // a weight for the same edge.  Zoltan combines the weights according
      // to the value of the PHG_EDGE_WEIGHT_OPERATION parameter.  The default
      // for this parameter is to use the maximum edge weight provided by any
      // process for a given hyperedge.)

      Epetra_Map hyperEdgeMap(numCols, base, Comm);

      hyperEdgeWeights = Teuchos::rcp(new Epetra_Vector(hyperEdgeMap));

      int *edgeGIDs = NULL;
      double *weights = NULL;
      int numHEweights = hyperEdgeMap.NumMyElements();

      if (numHEweights){
	edgeGIDs = new int [numHEweights];
	weights = new double [numHEweights];

	if (edgeWeightType == SUPPLY_EQUAL_WEIGHTS){
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    weights[i] = 1.0;
	  }
	}
	else{
	  int hiVolumeStart = matrix->NumGlobalCols() / 3;
	  int hiVolumeEnd = hiVolumeStart * 2;
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    if ((edgeGIDs[i] < hiVolumeStart) || (edgeGIDs[i] >= hiVolumeEnd)){
	      weights[i] = 1.0;
	    }
	    else{
	      weights[i] = 3.0;
	    }
	  }
	}
	hyperEdgeWeights->ReplaceGlobalValues(numHEweights, weights, edgeGIDs);
      }

      if (weights){
	delete [] weights;
	delete [] edgeGIDs;
      }

      costs.setHypergraphEdgeWeights(hyperEdgeWeights);
    }
  }

  bool need_importer = false;

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    need_importer = true;  // to redistribute row weights

    double *val = NULL;

    if (nMyRows){
      val = new double [nMyRows];

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0;
	}
      }
      else if (vertexWeightType == SUPPLY_UNEQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0 + ((localProc+1) / 2);
	}
      }
    }

    vptr = Teuchos::rcp(new Epetra_Vector(Copy, sourceRowMap, val));

    if (val) delete [] val;

    costs.setVertexWeights(vptr);
  }

  // Calculate partition quality metrics before calling Zoltan

  if (partitioningType == GRAPH_PARTITIONING){
    rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShare, balance1, numCuts1, cutWgt1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, numCuts3, cutWgt3, cutn3, cutl3);
    }
  }
  else{
    rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShare, balance1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, cutn3, cutl3);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  Teuchos::ParameterList params;

#ifdef HAVE_ISORROPIA_ZOLTAN

  if (!noParams){

    // We're using Zoltan for partitioning and supplying
    // parameters, overriding defaults.

    Teuchos::ParameterList &sublist = params.sublist("Zoltan");

    if (partitioningType == GRAPH_PARTITIONING){
      params.set("PARTITIONING METHOD", "GRAPH");
      sublist.set("GRAPH_PACKAGE", "PHG");
    }
    else{
      params.set("PARTITIONING METHOD", "HYPERGRAPH");
      sublist.set("LB_APPROACH", "PARTITION");
      sublist.set("PHG_CUT_OBJECTIVE", "CONNECTIVITY");  // "cutl"
    }

    if (keepDenseEdges){
      // only throw out rows that have no zeroes, default is to
      // throw out if .25 or more of the columns are non-zero
      sublist.set("PHG_EDGE_SIZE_THRESHOLD", "1.0");
    }
     if (numPartitions > 0){
	// test #Partitions < #Processes
	std::ostringstream os;
	os << numPartitions;
	std::string s = os.str();
	//	sublist.set("NUM_GLOBAL_PARTS", s);
	params.set("NUM PARTS", s);
      }

      //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters
      //sublist.set("DEBUG_LEVEL", "5");   // proc 0 will trace Zoltan calls
      //sublist.set("DEBUG_MEMORY", "2");  // Zoltan will trace alloc & free
  }

#else
    ERROREXIT((localProc==0),
      "Zoltan partitioning required but Zoltan not available.")
#endif

  // Function scope values

  Teuchos::RCP<Epetra_Vector> newvwgts;
  Teuchos::RCP<Epetra_CrsMatrix> newewgts;

  // Function scope values required for LinearProblem

  Epetra_LinearProblem *problem = NULL;
  Epetra_Map *LHSmap = NULL;
  Epetra_MultiVector *RHS = NULL;
  Epetra_MultiVector *LHS = NULL;

  // Reference counted pointer to balanced object

  Epetra_CrsMatrix *matrixPtr=NULL;
  Epetra_CrsGraph *graphPtr=NULL;
  Epetra_RowMatrix *rowMatrixPtr=NULL;
  Epetra_LinearProblem *problemPtr=NULL;

  // Row map for balanced object
  const Epetra_BlockMap *targetBlockRowMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetRowMap=NULL;            // for all other inputs

  // Column map for balanced object
  const Epetra_BlockMap *targetBlockColMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetColMap=NULL;            // for all other inputs

  if (objectType == EPETRA_CRSMATRIX){
    if (noParams && noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(matrixPtr->RowMap());
    targetColMap = &(matrixPtr->ColMap());
  }
  else if (objectType == EPETRA_CRSGRAPH){
    const Epetra_CrsGraph graph = matrix->Graph();
    if (noParams && noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph);
    }
    else if (noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph, params);
    }
    targetBlockRowMap = &(graphPtr->RowMap());
    targetBlockColMap = &(graphPtr->ColMap());
  }
  else if (objectType == EPETRA_ROWMATRIX){
    if (noParams && noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(rowMatrixPtr->RowMatrixRowMap());
    targetColMap = &(rowMatrixPtr->RowMatrixColMap());
  }
  else if (objectType == EPETRA_LINEARPROBLEM){

    // Create a linear problem with this matrix.

    LHSmap = new Epetra_Map(numCols, base, Comm);

    int myRHSsize = sourceRowMap.NumMyElements();
    int myLHSsize = LHSmap->NumMyElements();

    int valSize = ((myRHSsize > myLHSsize) ? myRHSsize : myLHSsize);

    double *vals = NULL;

    if (valSize){
      vals = new double [valSize];
    }

    if (valSize){
      for (int i=0; i < valSize; i++){
	// put my rank in my portion of LHS and my portion of RHS
	vals[i] = localProc;
      }
    }

    RHS = new Epetra_MultiVector(Copy, sourceRowMap, vals, 1, 1);

    LHS = new Epetra_MultiVector(Copy, *LHSmap, vals, 1, 1);

    if (valSize){
      delete [] vals;
    }

    problem = new Epetra_LinearProblem(matrix.get(), LHS, RHS);

    Epetra_LinearProblem lp = *problem;

    if (lp.CheckInput()){
      ERROREXIT((localProc==0), "Error creating a LinearProblem");
    }
    if (noParams && noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp);
    }
    else if (noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp, params);
    }

    targetRowMap = &(problemPtr->GetMatrix()->RowMatrixRowMap());
    targetColMap = &(problemPtr->GetMatrix()->RowMatrixColMap());
  }

  // Redistribute the edge weights
  // Comment this out since we don't redistribute columns

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      Epetra_Import *importer = NULL;

      if (objectType == EPETRA_CRSGRAPH){
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graphPtr));
	targetRowMap = &(newewgts->RowMap());
	targetColMap = &(newewgts->ColMap());
      }
      else{
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *targetRowMap, *targetColMap, 0));
      }

      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
      newewgts->Import(*eptr, *importer, Insert);
      newewgts->FillComplete(*targetColMap, *targetRowMap);

      costs.setGraphEdgeWeights(newewgts);
    }
  }

  // Redistribute the vertex weights

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    Epetra_Import *importer = NULL;

    if (objectType == EPETRA_CRSGRAPH){
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetBlockRowMap));
      importer = new Epetra_Import(*targetBlockRowMap, sourceRowMap);
    }
    else{
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetRowMap));
      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
    }

    newvwgts->Import(*vptr, *importer, Insert);
    costs.setVertexWeights(newvwgts);
  }

  if (localProc == 0){
    test_type(numPartitions, partitioningType, vertexWeightType, edgeWeightType, objectType);
  }

  if (verbose){

    // Picture of problem before balancing

    if (objectType == EPETRA_LINEARPROBLEM){

      ispatest::show_matrix("Before load balancing", *problem, Comm);
    }
    else{
      ispatest::show_matrix("Before load balancing", matrix->Graph(), Comm);
    }

    // Picture of problem after balancing

    if (objectType == EPETRA_LINEARPROBLEM){
      ispatest::show_matrix("After load balancing (x in Ax=b is not redistributed)", *problemPtr, Comm);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      ispatest::show_matrix("After load balancing", *rowMatrixPtr, Comm);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      ispatest::show_matrix("After load balancing", matrixPtr->Graph(), Comm);
    }
    else if (objectType == EPETRA_CRSGRAPH){
      ispatest::show_matrix("After load balancing", *graphPtr, Comm);
    }
  }

  // After partitioning, recompute the metrics

  if (partitioningType == GRAPH_PARTITIONING){
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_graph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_graph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_graph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else {
      rc = ispatest::compute_graph_metrics(*graphPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
  }
  else{
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_hypergraph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_hypergraph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_hypergraph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else{
      rc = ispatest::compute_hypergraph_metrics(*graphPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  std::string why;

  if (partitioningType == GRAPH_PARTITIONING){
    fail = (cutWgt2 > cutWgt1);
    why = "New weighted edge cuts are worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;

      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;

      std::cout << " Total edge cuts: " << numCuts1;
      std::cout << " Total weighted edge cuts: " << cutWgt1 << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
      std::cout << " Total edge cuts: " << numCuts2;
      std::cout << " Total weighted edge cuts: " << cutWgt2 << std::endl;
    }
  }
  else{
      fail = (cutl2 > cutl1);
      why = "New cutl is worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;
      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
    }
  }

  if (fail){
    if (localProc == 0) std::cout << "ERROR: "+why << std::endl;
  }

  // Check that input matrix is valid.  This test constructs an "x"
  // with the matrix->DomainMap() and a "y" with matrix->RangeMap()
  // and then calculates y = Ax.

  if (objectType == EPETRA_LINEARPROBLEM){
    valid = ispatest::test_matrix_vector_multiply(*problemPtr);
  }
  else if (objectType == EPETRA_ROWMATRIX){
    valid = ispatest::test_row_matrix_vector_multiply(*rowMatrixPtr);
  }
  else if (objectType == EPETRA_CRSMATRIX){
    valid = ispatest::test_matrix_vector_multiply(*matrixPtr);
  }
  else{
    valid = ispatest::test_matrix_vector_multiply(*graphPtr);
  }

  if (!valid){
    if (localProc == 0) std::cout << "Rebalanced matrix is not a valid Epetra matrix" << std::endl;
    fail = 1;
  }
  else{
    if (localProc == 0) std::cout << "Rebalanced matrix is a valid Epetra matrix" << std::endl;
  }

  if (localProc == 0)
    std::cout << std::endl;



#else
  std::cout << "test_simple main: currently can only test "
	 << "with Epetra and EpetraExt enabled." << std::endl;
  rc = -1;
#endif

  return fail;
}
void
PeridigmNS::InterfaceData::WriteExodusOutput(int timeStep, const float & timeValue, Teuchos::RCP<Epetra_Vector> x, Teuchos::RCP<Epetra_Vector> y){

  int error_int = 0;

  int CPU_word_size = 0;
  int IO_word_size = 0;
  float version = 0;
  std::string outputFileNameStr = filename.str();
  std::vector<char> writable(outputFileNameStr.size() + 1);
  std::copy(outputFileNameStr.begin(), outputFileNameStr.end(), writable.begin());

  exoid = ex_open(&writable[0], EX_WRITE, &CPU_word_size, &IO_word_size, &version);

  error_int = ex_put_time(exoid, timeStep, &timeValue);
  TEUCHOS_TEST_FOR_EXCEPTION(error_int,std::logic_error, "ex_put_time(): Failure");

  float * quadValues = new float[numQuads];
  float * triValues = new float[numTris];

  // populate the quad values
  int quadIndex = 0;
  int triIndex = 0;
  for(int i=0;i<numOwnedPoints;++i){
    if(interfaceNodesMap->ElementSize(i)==4){
      quadValues[quadIndex] = (*interfaceAperture)[i];
      quadIndex++;
    }
    else if(interfaceNodesMap->ElementSize(i)==3){
      triValues[triIndex] = (*interfaceAperture)[i];
      triIndex++;
    }
    else{
      TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"size of this element is not recognized: " << interfaceNodesMap->ElementSize(i));
    }
  }

  int blockIndex = 0;
  const int varIndex = 1;
  blockIndex++;
  if(numQuads > 0){
    error_int = ex_put_elem_var(exoid, timeStep, varIndex, blockIndex, numQuads, &quadValues[0]);
    TEUCHOS_TEST_FOR_EXCEPTION(error_int,std::logic_error,"Failure ex_put_elem_var(): ");
  }
  blockIndex++;
  if(numTris > 0){
    error_int = ex_put_elem_var(exoid, timeStep, varIndex, blockIndex, numTris, &triValues[0]);
    TEUCHOS_TEST_FOR_EXCEPTION(error_int,std::logic_error,"Failure ex_put_elem_var(): ");
  }

  delete [] quadValues;
  delete [] triValues;

  // update the apertures...
  // import the mothership vectors x and y to the overlap epetra vectors
  Teuchos::RCP<const Epetra_Import> importer = Teuchos::rcp(new Epetra_Import(*elemOverlapMap, x->Map()));

  Teuchos::RCP<Epetra_Vector> xOverlap = Teuchos::rcp(new Epetra_Vector(*elemOverlapMap,true));
  xOverlap->Import(*x,*importer,Insert);
  Teuchos::RCP<Epetra_Vector> yOverlap = Teuchos::rcp(new Epetra_Vector(*elemOverlapMap,true));
  yOverlap->Import(*y,*importer,Insert);

  double *xValues;
  xOverlap->ExtractView( &xValues );
  double *yValues;
  yOverlap->ExtractView( &yValues );

  double xLeft=0,yLeft=0,zLeft=0,xRight=0,yRight=0,zRight=0;
  double XLeft=0,YLeft=0,ZLeft=0,XRight=0,YRight=0,ZRight=0;
  double X=0,Y=0;
  double dx=0,dy=0,dz=0,dX=0,dY=0,dZ=0;
  int elemIndexLeft=-1,elemIndexRight=-1,GIDLeft=-1,GIDRight=-1;

  for(int i=0;i<numOwnedPoints;++i){
    GIDLeft = elementLeft[i];
    GIDRight = elementRight[i];

    elemIndexLeft = xOverlap->Map().FirstPointInElement(elemOverlapMap->LID(GIDLeft));
    elemIndexRight = xOverlap->Map().FirstPointInElement(elemOverlapMap->LID(GIDRight));

    xLeft = xValues[elemIndexLeft+0];
    yLeft = xValues[elemIndexLeft+1];
    zLeft = xValues[elemIndexLeft+2];
    xRight = xValues[elemIndexRight+0];
    yRight = xValues[elemIndexRight+1];
    zRight = xValues[elemIndexRight+2];

    XLeft = yValues[elemIndexLeft+0];
    YLeft = yValues[elemIndexLeft+1];
    ZLeft = yValues[elemIndexLeft+2];
    XRight = yValues[elemIndexRight+0];
    YRight = yValues[elemIndexRight+1];
    ZRight = yValues[elemIndexRight+2];

    dx = xRight - xLeft;
    dy = yRight - yLeft;
    dz = zRight - zLeft;

    dX = XRight - XLeft;
    dY = YRight - YLeft;
    dZ = ZRight - ZLeft;

    X = std::sqrt(dx*dx + dy*dy + dz*dz);
    Y = std::sqrt(dX*dX + dY*dY + dZ*dZ);

    interfaceAperture->ReplaceMyValue(i,0,Y-X);
  }
  error_int = ex_update(exoid);
  TEUCHOS_TEST_FOR_EXCEPTION(error_int,std::logic_error,"Exodus file close failed.");
  error_int = ex_close(exoid);
  TEUCHOS_TEST_FOR_EXCEPTION(error_int,std::logic_error,"Exodus file close failed.");

}
std::pair<Teuchos::RCP<std::vector<std::size_t> >,
          Teuchos::RCP<std::vector<Teuchos::Tuple<double,3> > > >
getSideIdsAndCoords(const STK_Interface & mesh,
              const std::string & sideName, const std::string type_)
{
   Epetra_MpiComm Comm(mesh.getBulkData()->parallel());

   unsigned physicalDim = mesh.getDimension();
 
   // grab local IDs and coordinates on this side
   // and build local epetra vector
   //////////////////////////////////////////////////////////////////

   std::pair<Teuchos::RCP<std::vector<std::size_t> >,
             Teuchos::RCP<std::vector<Teuchos::Tuple<double,3> > > > sidePair =
          getLocalSideIdsAndCoords(mesh,sideName,type_);

   std::vector<std::size_t> & local_side_ids = *sidePair.first;
   std::vector<Teuchos::Tuple<double,3> > & local_side_coords = *sidePair.second;
   int nodeCount = local_side_ids.size();

   // build local Epetra objects
   Epetra_Map idMap(-1,nodeCount,0,Comm);
   Teuchos::RCP<Epetra_IntVector> localIdVec = Teuchos::rcp(new Epetra_IntVector(idMap));
   Teuchos::RCP<Epetra_MultiVector> localCoordVec = Teuchos::rcp(new Epetra_MultiVector(idMap,physicalDim));

   // copy local Ids into Epetra vector
   for(std::size_t n=0;n<local_side_ids.size();n++) {
      std::size_t nodeId = local_side_ids[n];
      Teuchos::Tuple<double,3> & coords = local_side_coords[n];

      (*localIdVec)[n] = nodeId;
      for(unsigned d=0;d<physicalDim;d++)
         (*(*localCoordVec)(d))[n] = coords[d];
   }

   // fully distribute epetra vector across all processors 
   // (these are "distributed" or "dist" objects)
   //////////////////////////////////////////////////////////////

   int dist_nodeCount = idMap.NumGlobalElements();

   // build global epetra objects
   Epetra_LocalMap distMap(dist_nodeCount,0,Comm);
   Teuchos::RCP<Epetra_IntVector> distIdVec = Teuchos::rcp(new Epetra_IntVector(distMap));
   Teuchos::RCP<Epetra_MultiVector> distCoordVec = Teuchos::rcp(new Epetra_MultiVector(distMap,physicalDim));

   // export to the localVec object from the "vector" object
   Epetra_Import importer(distMap,idMap);
   TEUCHOS_ASSERT(distIdVec->Import(*localIdVec,importer,Insert)==0);
   TEUCHOS_ASSERT(distCoordVec->Import(*localCoordVec,importer,Insert)==0);

   // convert back to generic stl vector objects
   ///////////////////////////////////////////////////////////

   Teuchos::RCP<std::vector<std::size_t> > dist_side_ids
      = Teuchos::rcp(new std::vector<std::size_t>(dist_nodeCount));
   Teuchos::RCP<std::vector<Teuchos::Tuple<double,3> > > dist_side_coords
      = Teuchos::rcp(new std::vector<Teuchos::Tuple<double,3> >(dist_nodeCount));

   // copy local Ids into Epetra vector
   for(std::size_t n=0;n<dist_side_ids->size();n++) {
      (*dist_side_ids)[n] = (*distIdVec)[n];

      Teuchos::Tuple<double,3> & coords = (*dist_side_coords)[n];
      for(unsigned d=0;d<physicalDim;d++)
         coords[d] = (*(*distCoordVec)(d))[n];
   }

   return std::make_pair(dist_side_ids,dist_side_coords);
}
AmesosBTFGlobal_LinearProblem::NewTypeRef
AmesosBTFGlobal_LinearProblem::
operator()( OriginalTypeRef orig )
{
  origObj_ = &orig;

  // Extract the matrix and vectors from the linear problem
  OldRHS_ = Teuchos::rcp( orig.GetRHS(), false );
  OldLHS_ = Teuchos::rcp( orig.GetLHS(), false );
  OldMatrix_ = Teuchos::rcp( dynamic_cast<Epetra_CrsMatrix *>( orig.GetMatrix() ), false );
	
  int nGlobal = OldMatrix_->NumGlobalRows(); 
  int n = OldMatrix_->NumMyRows();

  // Check if the matrix is on one processor.
  int myMatProc = -1, matProc = -1;
  int myPID = OldMatrix_->Comm().MyPID();
  int numProcs = OldMatrix_->Comm().NumProc();

  const Epetra_BlockMap& oldRowMap = OldMatrix_->RowMap();

  // Get some information about the parallel distribution.
  int maxMyRows = 0;
  std::vector<int> numGlobalElem( numProcs );
  OldMatrix_->Comm().GatherAll(&n, &numGlobalElem[0], 1);
  OldMatrix_->Comm().MaxAll(&n, &maxMyRows, 1);

  for (int proc=0; proc<numProcs; proc++) 
  {
    if (OldMatrix_->NumGlobalNonzeros() == OldMatrix_->NumMyNonzeros())
      myMatProc = myPID;
  }

  OldMatrix_->Comm().MaxAll( &myMatProc, &matProc, 1 );

  Teuchos::RCP<Epetra_CrsMatrix> serialMatrix;
  Teuchos::RCP<Epetra_Map> serialMap;	
  if( oldRowMap.DistributedGlobal() && matProc == -1) 
  {
    // The matrix is distributed and needs to be moved to processor zero.
    // Set the zero processor as the master.
    matProc = 0;
    serialMap = Teuchos::rcp( new Epetra_Map( Epetra_Util::Create_Root_Map( OldMatrix_->RowMap(), matProc ) ) );
    
    Epetra_Import serialImporter( *serialMap, OldMatrix_->RowMap() );
    serialMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *serialMap, 0 ) );
    serialMatrix->Import( *OldMatrix_, serialImporter, Insert );
    serialMatrix->FillComplete();
  }
  else {
    // The old matrix has already been moved to one processor (matProc).
    serialMatrix = OldMatrix_;
  }

  if( debug_ )
  {
    cout << "Original (serial) Matrix:\n";
    cout << *serialMatrix << endl;
  }

  // Obtain the current row and column orderings
  std::vector<int> origGlobalRows(nGlobal), origGlobalCols(nGlobal);
  serialMatrix->RowMap().MyGlobalElements( &origGlobalRows[0] );
  serialMatrix->ColMap().MyGlobalElements( &origGlobalCols[0] );
  
  // Perform reindexing on the full serial matrix (needed for BTF).
  Epetra_Map reIdxMap( serialMatrix->RowMap().NumGlobalElements(), serialMatrix->RowMap().NumMyElements(), 0, serialMatrix->Comm() );
  Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans =
    Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap ) );
  Epetra_CrsMatrix newSerialMatrix = (*reIdxTrans)( *serialMatrix );
  reIdxTrans->fwd();
  
  // Compute and apply BTF to the serial CrsMatrix and has been filtered by the threshold
  EpetraExt::AmesosBTF_CrsMatrix BTFTrans( threshold_, upperTri_, verbose_, debug_ );
  Epetra_CrsMatrix newSerialMatrixBTF = BTFTrans( newSerialMatrix );
  
  rowPerm_ = BTFTrans.RowPerm();
  colPerm_ = BTFTrans.ColPerm();
  blockPtr_ = BTFTrans.BlockPtr();
  numBlocks_ = BTFTrans.NumBlocks();
 
  if (myPID == matProc && verbose_) {
    bool isSym = true;
    for (int i=0; i<nGlobal; ++i) {
      if (rowPerm_[i] != colPerm_[i]) {
        isSym = false;
        break;
      }
    }
    std::cout << "The BTF permutation symmetry (0=false,1=true) is : " << isSym << std::endl;
  }
  
  // Compute the permutation w.r.t. the original row and column GIDs.
  std::vector<int> origGlobalRowsPerm(nGlobal), origGlobalColsPerm(nGlobal);
  if (myPID == matProc) {
    for (int i=0; i<nGlobal; ++i) {
      origGlobalRowsPerm[i] = origGlobalRows[ rowPerm_[i] ];
      origGlobalColsPerm[i] = origGlobalCols[ colPerm_[i] ];
    }
  }
  OldMatrix_->Comm().Broadcast( &origGlobalRowsPerm[0], nGlobal, matProc );
  OldMatrix_->Comm().Broadcast( &origGlobalColsPerm[0], nGlobal, matProc );

  // Generate the full serial matrix that imports according to the previously computed BTF.
  Epetra_CrsMatrix newSerialMatrixT( Copy, newSerialMatrixBTF.RowMap(), 0 );
  newSerialMatrixT.Import( newSerialMatrix, *(BTFTrans.Importer()), Insert );
  newSerialMatrixT.FillComplete();
  
  if( debug_ )
  {
    cout << "Original (serial) Matrix permuted via BTF:\n";
    cout << newSerialMatrixT << endl;
  }

  // Perform reindexing on the full serial matrix (needed for balancing).
  Epetra_Map reIdxMap2( newSerialMatrixT.RowMap().NumGlobalElements(), newSerialMatrixT.RowMap().NumMyElements(), 0, newSerialMatrixT.Comm() );
  Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans2 =
    Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap2 ) );
  Epetra_CrsMatrix tNewSerialMatrixT = (*reIdxTrans2)( newSerialMatrixT );
  reIdxTrans2->fwd();

  Teuchos::RCP<Epetra_Map> balancedMap;
  
  if (balance_ == "linear") {
    
    // Distribute block somewhat evenly across processors
    std::vector<int> rowDist(numProcs+1,0);
    int balRows = nGlobal / numProcs + 1;
    int numRows = balRows, currProc = 1;
    for ( int i=0; i<numBlocks_ || currProc < numProcs; ++i ) {
      if (blockPtr_[i] > numRows) {
	rowDist[currProc++] = blockPtr_[i-1];
	numRows = blockPtr_[i-1] + balRows;
      }      
    }
    rowDist[numProcs] = nGlobal;
   
    // Create new Map based on this linear distribution.
    int numMyBalancedRows = rowDist[myPID+1]-rowDist[myPID];

    NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, &origGlobalRowsPerm[ rowDist[myPID] ], 0, OldMatrix_->Comm() ) );
    // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric!
    //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) );
    
    if ( verbose_ ) 
      std::cout << "Processor " << myPID << " has " << numMyBalancedRows << " rows." << std::endl;    
    //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, 0, serialMatrix->Comm() ) );
  }
  else if (balance_ == "isorropia") {
	
    // Compute block adjacency graph for partitioning.
    std::vector<double> weight;
    Teuchos::RCP<Epetra_CrsGraph> blkGraph;
    EpetraExt::BlockAdjacencyGraph adjGraph;
    blkGraph = adjGraph.compute( const_cast<Epetra_CrsGraph&>(tNewSerialMatrixT.Graph()), 
							numBlocks_, blockPtr_, weight, verbose_);
    Epetra_Vector rowWeights( View, blkGraph->Map(), &weight[0] );
    
    // Call Isorropia to rebalance this graph.
    Teuchos::RCP<Epetra_CrsGraph> balancedGraph =
      Isorropia::Epetra::create_balanced_copy( *blkGraph, rowWeights );
    
    int myNumBlkRows = balancedGraph->NumMyRows();    
    
    //std::vector<int> myGlobalElements(nGlobal);
    std::vector<int> newRangeElements(nGlobal), newDomainElements(nGlobal);
    int grid = 0, myElements = 0;
    for (int i=0; i<myNumBlkRows; ++i) {
      grid = balancedGraph->GRID( i );
      for (int j=blockPtr_[grid]; j<blockPtr_[grid+1]; ++j) {
	newRangeElements[myElements++] = origGlobalRowsPerm[j];
	//myGlobalElements[myElements++] = j;
      }
    }

    NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &newRangeElements[0], 0, OldMatrix_->Comm() ) );
    // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric!
    //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) );
    //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &myGlobalElements[0], 0, serialMatrix->Comm() ) );

    if ( verbose_ ) 
      std::cout << "Processor " << myPID << " has " << myElements << " rows." << std::endl;
  }
  
  // Use New Domain and Range Maps to Generate Importer
  //for now, assume they start out as identical
  Epetra_Map OldRowMap = OldMatrix_->RowMap();
  Epetra_Map OldColMap = OldMatrix_->ColMap();
  
  if( debug_ )
  {
    cout << "New Row Map\n";
    cout << *NewRowMap_ << endl;
    //cout << "New Col Map\n";
    //cout << *NewColMap_ << endl;
  }

  // Generate New Graph
  // NOTE:  Right now we are creating the graph, assuming that the permutation is symmetric!
  // NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) );
  NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, 0 ) );
  Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) );
  Importer2_ = Teuchos::rcp( new Epetra_Import( OldRowMap, *NewRowMap_ ) );
  NewGraph_->Import( OldMatrix_->Graph(), *Importer_, Insert );
  NewGraph_->FillComplete();

  if( debug_ )
  {
    cout << "NewGraph\n";
    cout << *NewGraph_;
  }

  // Create new linear problem and import information from old linear problem
  NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) );
  NewMatrix_->Import( *OldMatrix_, *Importer_, Insert );
  NewMatrix_->FillComplete();

  NewLHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldLHS_->NumVectors() ) );
  NewLHS_->Import( *OldLHS_, *Importer_, Insert );
  
  NewRHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldRHS_->NumVectors() ) );
  NewRHS_->Import( *OldRHS_, *Importer_, Insert );

  if( debug_ )
  {
    cout << "New Matrix\n";
    cout << *NewMatrix_ << endl;
  }

  newObj_ = new Epetra_LinearProblem( &*NewMatrix_, &*NewLHS_, &*NewRHS_ );

  return *newObj_;
}