int four_quads(const Epetra_Comm& Comm, bool preconstruct_graph, bool verbose)
{   
  if (verbose) {
    cout << "******************* four_quads ***********************"<<endl;
  }

  //This function assembles a matrix representing a finite-element mesh
  //of four 2-D quad elements. There are 9 nodes in the problem. The
  //same problem is assembled no matter how many processors are being used
  //(within reason). It may not work if more than 9 processors are used.
  //
  //  *------*------*
  // 6|     7|     8|
  //  | E2   | E3   |
  //  *------*------*
  // 3|     4|     5|
  //  | E0   | E1   |
  //  *------*------*
  // 0      1      2
  //
  //Nodes are denoted by * with node-numbers below and left of each node.
  //E0, E1 and so on are element-numbers.
  //
  //Each processor will contribute a sub-matrix of size 4x4, filled with 1's,
  //for each element. Thus, the coefficient value at position 0,0 should end up
  //being 1.0*numProcs, the value at position 4,4 should be 1.0*4*numProcs, etc.
  //
  //Depending on the number of processors being used, the locations of the
  //specific matrix positions (in terms of which processor owns them) will vary.
  //
  
  int numProcs = Comm.NumProc();
  
  int numNodes = 9;
  int numElems = 4;
  int numNodesPerElem = 4;

  int blockSize = 1;
  int indexBase = 0;

  //Create a map using epetra-defined linear distribution.
  Epetra_BlockMap map(numNodes, blockSize, indexBase, Comm);

  Epetra_CrsGraph* graph = NULL;

  int* nodes = new int[numNodesPerElem];
  int i, j, k, err = 0;

  if (preconstruct_graph) {
    graph = new Epetra_CrsGraph(Copy, map, 1);

    //we're going to fill the graph with indices, but remember it will only
    //accept indices in rows for which map.MyGID(row) is true.

    for(i=0; i<numElems; ++i) {
      switch(i) {
      case 0:
        nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3;
        break;
      case 1:
        nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4;
        break;
      case 2:
        nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6;
        break;
      case 3:
        nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7;
        break;
      }

      for(j=0; j<numNodesPerElem; ++j) {
        if (map.MyGID(nodes[j])) {
          err = graph->InsertGlobalIndices(nodes[j], numNodesPerElem,
                                           nodes);
          if (err<0) return(err);
        }
      }
    }

    EPETRA_CHK_ERR( graph->FillComplete() );
  }

  Epetra_FEVbrMatrix* A = NULL;

  if (preconstruct_graph) {
    A = new Epetra_FEVbrMatrix(Copy, *graph);
  }
  else {
    A = new Epetra_FEVbrMatrix(Copy, map, 1);
  }

  //EPETRA_CHK_ERR( A->PutScalar(0.0) );

  double* values_1d = new double[numNodesPerElem*numNodesPerElem];
  double** values_2d = new double*[numNodesPerElem];

  for(i=0; i<numNodesPerElem*numNodesPerElem; ++i) values_1d[i] = 1.0;

  int offset = 0;
  for(i=0; i<numNodesPerElem; ++i) {
    values_2d[i] = &(values_1d[offset]);
    offset += numNodesPerElem;
  }

  for(i=0; i<numElems; ++i) {
    switch(i) {
    case 0:
      nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3;
      break;

    case 1:
      nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4;
      break;

    case 2:
      nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6;
      break;

     case 3:
      nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7;
      break;
    }

    for(j=0; j<numNodesPerElem; ++j) {
      if (preconstruct_graph) {
	err = A->BeginSumIntoGlobalValues(nodes[j], numNodesPerElem, nodes);
	if (err<0) return(err);
      }
      else {
	err = A->BeginInsertGlobalValues(nodes[j], numNodesPerElem, nodes);
	if (err<0) return(err);
      }
    
      for(k=0; k<numNodesPerElem; ++k) {
	err = A->SubmitBlockEntry(values_1d, blockSize, blockSize, blockSize);
	if (err<0) return(err);
      }

      err = A->EndSubmitEntries();
      if (err<0) return(err);
    }
  }

  EPETRA_CHK_ERR( A->GlobalAssemble() );

  Epetra_FEVbrMatrix* Acopy = new Epetra_FEVbrMatrix(*A);

  if (verbose) {
    cout << "A:"<<*A << endl;
    cout << "Acopy:"<<*Acopy<<endl;
  }

  Epetra_Vector x(A->RowMap()), y(A->RowMap());

  x.PutScalar(1.0); y.PutScalar(0.0);

  Epetra_Vector x2(Acopy->RowMap()), y2(Acopy->RowMap());

  x2.PutScalar(1.0); y2.PutScalar(0.0);

  A->Multiply(false, x, y);

  Acopy->Multiply(false, x2, y2);

  double ynorm2, y2norm2;

  y.Norm2(&ynorm2);
  y2.Norm2(&y2norm2);
  if (ynorm2 != y2norm2) {
    cerr << "norm2(A*ones) != norm2(*Acopy*ones)"<<endl;
    return(-99);
  }

  Epetra_FEVbrMatrix* Acopy2 =
    new Epetra_FEVbrMatrix(Copy, A->RowMap(), A->ColMap(), 1);

  *Acopy2 = *Acopy;

  Epetra_Vector x3(Acopy->RowMap()), y3(Acopy->RowMap());

  x3.PutScalar(1.0); y3.PutScalar(0.0);

  Acopy2->Multiply(false, x3, y3);

  double y3norm2;
  y3.Norm2(&y3norm2);

  if (y3norm2 != y2norm2) {
    cerr << "norm2(Acopy*ones) != norm2(Acopy2*ones)"<<endl;
    return(-999);
  }

  int len = 20;
  int* indices = new int[len];
  double* values = new double[len];
  int numIndices;

  if (map.MyGID(0)) {
    int lid = map.LID(0);
    EPETRA_CHK_ERR( A->ExtractMyRowCopy(lid, len, numIndices,
					values, indices) );
    if (numIndices != 4) {
      return(-1);
    }
    if (indices[0] != lid) {
      return(-2);
    }

    if (values[0] != 1.0*numProcs) {
      cout << "ERROR: values[0] ("<<values[0]<<") should be "<<numProcs<<endl;
      return(-3);
    }
  }

  if (map.MyGID(4)) {
    int lid = map.LID(4);
    EPETRA_CHK_ERR( A->ExtractMyRowCopy(lid, len, numIndices,
					values, indices) );

    if (numIndices != 9) {
      return(-4);
    }
    int lcid = A->LCID(4);
//     if (indices[lcid] != 4) {
//       cout << "ERROR: indices[4] ("<<indices[4]<<") should be "
// 	   <<A->LCID(4)<<endl;
//       return(-5);
//     }
    if (values[lcid] != 4.0*numProcs) {
      cout << "ERROR: values["<<lcid<<"] ("<<values[lcid]<<") should be "
	   <<4*numProcs<<endl;
      return(-6);
    }
  }

  delete [] values_2d;
  delete [] values_1d;
  delete [] nodes;
  delete [] indices;
  delete [] values;

  delete A;
  delete Acopy2;
  delete Acopy;
  delete graph;

  return(0);
}
Epetra_CrsGraph * BlockUtility::TGenerateBlockGraph(
        const Epetra_RowMatrix & BaseMatrix,
        const vector< vector<int_type> > & RowStencil,
        const vector<int_type> & RowIndices,
        const Epetra_Comm & GlobalComm )
{

  const Epetra_BlockMap & BaseMap = BaseMatrix.RowMatrixRowMap();
  const Epetra_BlockMap & BaseColMap = BaseMatrix.RowMatrixColMap();
  int_type BaseIndex = (int_type) BaseMap.IndexBase64();
  int_type Offset = BlockUtility::TCalculateOffset<int_type>(BaseMap);

  //Get Base Global IDs
  int NumBlockRows = RowIndices.size();
  int Size = BaseMap.NumMyElements();
  int TotalSize = NumBlockRows * Size;
  vector<int_type> GIDs(Size);
  BaseMap.MyGlobalElements( &GIDs[0] );

  vector<int_type> GlobalGIDs( TotalSize );
  for( int i = 0; i < NumBlockRows; ++i )
  {
    for( int j = 0; j < Size; ++j )
      GlobalGIDs[i*Size+j] = GIDs[j] + RowIndices[i] * Offset;
  }

  int_type GlobalSize;
  int_type TotalSize_int_type = TotalSize;
  GlobalComm.SumAll( &TotalSize_int_type, &GlobalSize, 1 );

  Epetra_Map GlobalMap( GlobalSize, TotalSize, &GlobalGIDs[0], BaseIndex, GlobalComm );

  int MaxIndices = BaseMatrix.MaxNumEntries();
  vector<int> Indices_local(MaxIndices);
  vector<int_type> Indices_global(MaxIndices);
  vector<double> Values(MaxIndices);
  int NumIndices;

  Epetra_CrsGraph * GlobalGraph = new Epetra_CrsGraph( Copy,
                               dynamic_cast<Epetra_BlockMap&>(GlobalMap),
                               0 );

  for( int i = 0; i < NumBlockRows; ++i )
  {
    int StencilSize = RowStencil[i].size();
    for( int j = 0; j < Size; ++j )
    {
      int_type GlobalRow = (int_type) GlobalMap.GID64(j+i*Size);

      BaseMatrix.ExtractMyRowCopy( j, MaxIndices, NumIndices, &Values[0], &Indices_local[0] );
      for( int l = 0; l < NumIndices; ++l ) Indices_global[l] = (int_type) BaseColMap.GID64(Indices_local[l]);

      for( int k = 0; k < StencilSize; ++k )
      {
        int_type ColOffset = (RowIndices[i]+RowStencil[i][k]) * Offset;
        if( k > 0 ) ColOffset -= (RowIndices[i]+RowStencil[i][k-1]) * Offset;

        for( int l = 0; l < NumIndices; ++l )
          Indices_global[l] += ColOffset;

        GlobalGraph->InsertGlobalIndices( GlobalRow, NumIndices, &Indices_global[0] );
      }
    }
  }

  GlobalGraph->FillComplete();

  return GlobalGraph;
}
Epetra_CrsGraph * BlockUtility::TGenerateBlockGraph(
        const Epetra_CrsGraph & BaseGraph,
        const Epetra_CrsGraph & LocalBlockGraph,
        const Epetra_Comm & GlobalComm )
{
  const Epetra_BlockMap & BaseRowMap = BaseGraph.RowMap();
  const Epetra_BlockMap & BaseColMap = BaseGraph.ColMap();
  int_type ROffset = BlockUtility::TCalculateOffset<int_type>(BaseRowMap);
  (void) ROffset; // Silence "unused variable" compiler warning.
  int_type COffset = BlockUtility::TCalculateOffset<int_type>(BaseColMap);

  //Get Base Global IDs
  const Epetra_BlockMap & BlockRowMap = LocalBlockGraph.RowMap();
  const Epetra_BlockMap & BlockColMap = LocalBlockGraph.ColMap();

  int NumBlockRows = BlockRowMap.NumMyElements();
  vector<int_type> RowIndices(NumBlockRows);
  BlockRowMap.MyGlobalElements(&RowIndices[0]);

  int Size = BaseRowMap.NumMyElements();

  Epetra_Map *GlobalRowMap =
    GenerateBlockMap(BaseRowMap, BlockRowMap, GlobalComm);


  int MaxIndices = BaseGraph.MaxNumIndices();
  vector<int_type> Indices(MaxIndices);

  Epetra_CrsGraph * GlobalGraph = new Epetra_CrsGraph( Copy,
                               dynamic_cast<Epetra_BlockMap&>(*GlobalRowMap),
                               0 );

  int NumBlockIndices, NumBaseIndices;
  int *BlockIndices, *BaseIndices;
  for( int i = 0; i < NumBlockRows; ++i )
  {
    LocalBlockGraph.ExtractMyRowView(i, NumBlockIndices, BlockIndices);

    for( int j = 0; j < Size; ++j )
    {
      int_type GlobalRow = (int_type) GlobalRowMap->GID64(j+i*Size);

      BaseGraph.ExtractMyRowView( j, NumBaseIndices, BaseIndices );
      for( int k = 0; k < NumBlockIndices; ++k )
      {
        int_type ColOffset = (int_type) BlockColMap.GID64(BlockIndices[k]) * COffset;

        for( int l = 0; l < NumBaseIndices; ++l )
          Indices[l] = (int_type) BaseGraph.GCID64(BaseIndices[l]) + ColOffset;

        GlobalGraph->InsertGlobalIndices( GlobalRow, NumBaseIndices, &Indices[0] );
      }
    }
  }

  const Epetra_BlockMap & BaseDomainMap = BaseGraph.DomainMap();
  const Epetra_BlockMap & BaseRangeMap = BaseGraph.RangeMap();
  const Epetra_BlockMap & BlockDomainMap = LocalBlockGraph.DomainMap();
  const Epetra_BlockMap & BlockRangeMap = LocalBlockGraph.RangeMap();

  Epetra_Map *GlobalDomainMap =
    GenerateBlockMap(BaseDomainMap, BlockDomainMap, GlobalComm);
  Epetra_Map *GlobalRangeMap =
    GenerateBlockMap(BaseRangeMap, BlockRangeMap, GlobalComm);

  GlobalGraph->FillComplete(*GlobalDomainMap, *GlobalRangeMap);

  delete GlobalDomainMap;
  delete GlobalRangeMap;
  delete GlobalRowMap;

  return GlobalGraph;
}
int four_quads(const Epetra_Comm& Comm, bool preconstruct_graph, bool verbose)
{
  if (verbose) {
    cout << "******************* four_quads ***********************"<<endl;
  }

  //This function assembles a matrix representing a finite-element mesh
  //of four 2-D quad elements. There are 9 nodes in the problem. The
  //same problem is assembled no matter how many processors are being used
  //(within reason). It may not work if more than 9 processors are used.
  //
  //  *------*------*
  // 6|     7|     8|
  //  | E2   | E3   |
  //  *------*------*
  // 3|     4|     5|
  //  | E0   | E1   |
  //  *------*------*
  // 0      1      2
  //
  //Nodes are denoted by * with node-numbers below and left of each node.
  //E0, E1 and so on are element-numbers.
  //
  //Each processor will contribute a sub-matrix of size 4x4, filled with 1's,
  //for each element. Thus, the coefficient value at position 0,0 should end up
  //being 1.0*numProcs, the value at position 4,4 should be 1.0*4*numProcs, etc.
  //
  //Depending on the number of processors being used, the locations of the
  //specific matrix positions (in terms of which processor owns them) will vary.
  //

  int numProcs = Comm.NumProc();

  int numNodes = 9;
  int numElems = 4;
  int numNodesPerElem = 4;

  int indexBase = 0;

  //Create a map using epetra-defined linear distribution.
  Epetra_Map map(numNodes, indexBase, Comm);

  Epetra_CrsGraph* graph = NULL;

  int* nodes = new int[numNodesPerElem];
  int i, j, err = 0;

  if (preconstruct_graph) {
    graph = new Epetra_CrsGraph(Copy, map, 1);

    //we're going to fill the graph with indices, but remember it will only
    //accept indices in rows for which map.MyGID(row) is true.

    for(i=0; i<numElems; ++i) {
      switch(i) {
      case 0:
	nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3;
	break;
      case 1:
	nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4;
	break;
      case 2:
	nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6;
	break;
      case 3:
	nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7;
	break;
      }

      for(j=0; j<numNodesPerElem; ++j) {
	if (map.MyGID(nodes[j])) {
	  err = graph->InsertGlobalIndices(nodes[j], numNodesPerElem,
					   nodes);
	  if (err<0) return(err);
	}
      }
    }

    EPETRA_CHK_ERR( graph->FillComplete() );
  }

  Epetra_FECrsMatrix* A = NULL;

  if (preconstruct_graph) {
    A = new Epetra_FECrsMatrix(Copy, *graph);
  }
  else {
    A = new Epetra_FECrsMatrix(Copy, map, 1);
  }

  EPETRA_CHK_ERR( A->PutScalar(0.0) );

  double* values_1d = new double[numNodesPerElem*numNodesPerElem];
  double** values_2d = new double*[numNodesPerElem];

  for(i=0; i<numNodesPerElem*numNodesPerElem; ++i) values_1d[i] = 1.0;

  int offset = 0;
  for(i=0; i<numNodesPerElem; ++i) {
    values_2d[i] = &(values_1d[offset]);
    offset += numNodesPerElem;
  }

  int format = Epetra_FECrsMatrix::ROW_MAJOR;
  Epetra_IntSerialDenseVector epetra_nodes(View, nodes, numNodesPerElem);
  Epetra_SerialDenseMatrix epetra_values(View, values_1d, numNodesPerElem,
					 numNodesPerElem, numNodesPerElem);

  for(i=0; i<numElems; ++i) {
    switch(i) {
    case 0:
      nodes[0] = 0; nodes[1] = 1; nodes[2] = 4; nodes[3] = 3;
      if (preconstruct_graph) {
	err = A->SumIntoGlobalValues(epetra_nodes,
				     epetra_values, format);
	if (err<0) return(err);
      }
      else {
	err = A->InsertGlobalValues(epetra_nodes,
				    epetra_values, format);
	if (err<0) return(err);
      }
      break;

    case 1:
      nodes[0] = 1; nodes[1] = 2; nodes[2] = 5; nodes[3] = 4;
      if (preconstruct_graph) {
	err = A->SumIntoGlobalValues(nodes[0], numNodesPerElem, values_2d[0],
                                     nodes);
	err += A->SumIntoGlobalValues(nodes[1], numNodesPerElem, values_2d[1],
                                     nodes);
	err += A->SumIntoGlobalValues(nodes[2], numNodesPerElem, values_2d[2],
                                     nodes);
	err += A->SumIntoGlobalValues(nodes[3], numNodesPerElem, values_2d[3],
                                     nodes);
	if (err<0) return(err);
      }
      else {
	err = A->InsertGlobalValues(numNodesPerElem, nodes,
				    values_2d, format);
	if (err<0) return(err);
      }
      break;

    case 2:
      nodes[0] = 3; nodes[1] = 4; nodes[2] = 7; nodes[3] = 6;
      if (preconstruct_graph) {
	err = A->SumIntoGlobalValues(numNodesPerElem, nodes,
				     numNodesPerElem, nodes,
				     values_1d, format);
	if (err<0) return(err);
      }
      else {
	err = A->InsertGlobalValues(numNodesPerElem, nodes,
				    numNodesPerElem, nodes,
				    values_1d, format);
	if (err<0) return(err);
      }
      break;

     case 3:
      nodes[0] = 4; nodes[1] = 5; nodes[2] = 8; nodes[3] = 7;
      if (preconstruct_graph) {
	err = A->SumIntoGlobalValues(numNodesPerElem, nodes,
				     numNodesPerElem, nodes,
				     values_2d, format);
	if (err<0) return(err);
      }
      else {
	err = A->InsertGlobalValues(numNodesPerElem, nodes,
				    numNodesPerElem, nodes,
				    values_2d, format);
	if (err<0) return(err);
      }
      break;
    }
  }

  err = A->GlobalAssemble();
  if (err < 0) {
    return(err);
  }

  Epetra_Vector x(A->RowMap()), y(A->RowMap());

  x.PutScalar(1.0); y.PutScalar(0.0);

  Epetra_FECrsMatrix Acopy(*A);

  err = Acopy.GlobalAssemble();
  if (err < 0) {
    return(err);
  }

  bool the_same = epetra_test::compare_matrices(*A, Acopy);
  if (!the_same) {
    return(-1);
  }

  Epetra_FECrsMatrix Acopy2(Copy, A->RowMap(), A->ColMap(), 1);

  Acopy2 = Acopy;

  the_same = epetra_test::compare_matrices(*A, Acopy);
  if (!the_same) {
    return(-1);
  }

  int len = 20;
  int* indices = new int[len];
  double* values = new double[len];
  int numIndices;

  if (map.MyGID(0)) {
    EPETRA_CHK_ERR( A->ExtractGlobalRowCopy(0, len, numIndices,
					    values, indices) );
    if (numIndices != 4) {
      return(-1);
    }
    if (indices[0] != 0) {
      return(-2);
    }

    if (values[0] != 1.0*numProcs) {
      cout << "ERROR: values[0] ("<<values[0]<<") should be "<<numProcs<<endl;
      return(-3);
    }
  }

  if (map.MyGID(4)) {
    EPETRA_CHK_ERR( A->ExtractGlobalRowCopy(4, len, numIndices,
					    values, indices) );

    if (numIndices != 9) {
      return(-4);
    }
    int lcid = A->LCID(4);
    if (lcid<0) {
      return(-5);
    }
    if (values[lcid] != 4.0*numProcs) {
      cout << "ERROR: values["<<lcid<<"] ("<<values[lcid]<<") should be "
	   <<4*numProcs<<endl;
      return(-6);
    }
  }

  delete [] values_2d;
  delete [] values_1d;
  delete [] nodes;
  delete [] indices;
  delete [] values;

  delete A;
  delete graph;

  return(0);
}