Beispiel #1
0
void KfieldBC<PHAL::AlbanyTraits::Jacobian, Traits>::
evaluateFields(typename Traits::EvalData dirichletWorkset)
{

  Teuchos::RCP<Epetra_Vector> f = dirichletWorkset.f;
  Teuchos::RCP<Epetra_CrsMatrix> jac = dirichletWorkset.Jac;
  Teuchos::RCP<const Epetra_Vector> x = dirichletWorkset.x;

  RealType time = dirichletWorkset.current_time;

  const RealType j_coeff = dirichletWorkset.j_coeff;
  const std::vector<std::vector<int> >& nsNodes = 
    dirichletWorkset.nodeSets->find(this->nodeSetID)->second;
  const std::vector<double*>& nsNodeCoords = 
    dirichletWorkset.nodeSetCoords->find(this->nodeSetID)->second;

  RealType* matrixEntries;
  int*    matrixIndices;
  int     numEntries;
  RealType diag=j_coeff;
  bool fillResid = (f != Teuchos::null);

  int xlunk, ylunk; // local indicies into unknown vector
  double* coord;
  ScalarT Xval, Yval; 
  for (unsigned int inode = 0; inode < nsNodes.size(); inode++) 
  {
    xlunk = nsNodes[inode][0];
    ylunk = nsNodes[inode][1];
    coord = nsNodeCoords[inode];
    
    this->computeBCs(coord, Xval, Yval, time);
    
    // replace jac values for the X dof 
    jac->ExtractMyRowView(xlunk, numEntries, matrixEntries, matrixIndices);
    for (int i=0; i<numEntries; i++) matrixEntries[i]=0;
    jac->ReplaceMyValues(xlunk, 1, &diag, &xlunk);

    // replace jac values for the y dof
    jac->ExtractMyRowView(ylunk, numEntries, matrixEntries, matrixIndices);
    for (int i=0; i<numEntries; i++) matrixEntries[i]=0;
    jac->ReplaceMyValues(ylunk, 1, &diag, &ylunk);
    
    if (fillResid)
    {
      (*f)[xlunk] = ((*x)[xlunk] - Xval.val());
      (*f)[ylunk] = ((*x)[ylunk] - Yval.val());
    } 
  }
}
Beispiel #2
0
int main(int argc, char *argv[])
{
  // Initialize MPI
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

  // Create a communicator for Epetra objects
#ifdef HAVE_MPI
  Epetra_MpiComm Comm( MPI_COMM_WORLD );
#else
  Epetra_SerialComm Comm;
#endif

  int * testInt = new int[100];
  delete [] testInt;

  bool verbose = false;

  if (argc > 1)
    if (argv[1][0]=='-' && argv[1][1]=='v')
      verbose = true;

  // Get the process ID and the total number of processors
  int MyPID = Comm.MyPID();
  int NumProc = Comm.NumProc();

  // Set up theolver options parameter list
  Teuchos::RCP<Teuchos::ParameterList> noxParamsPtr = Teuchos::rcp(new Teuchos::ParameterList);
  Teuchos::ParameterList & noxParams = *(noxParamsPtr.get());

  // Set up the printing utilities
  // Only print output if the "-v" flag is set on the command line
  Teuchos::ParameterList& printParams = noxParams.sublist("Printing");
  printParams.set("MyPID", MyPID);
  printParams.set("Output Precision", 5);
  printParams.set("Output Processor", 0);
  if( verbose )
    printParams.set("Output Information",
        NOX::Utils::OuterIteration +
        NOX::Utils::OuterIterationStatusTest +
        NOX::Utils::InnerIteration +
        NOX::Utils::Parameters +
        NOX::Utils::Details +
        NOX::Utils::Warning +
        NOX::Utils::TestDetails);
  else
    printParams.set("Output Information", NOX::Utils::Error +
        NOX::Utils::TestDetails);

  Teuchos::RCP<NOX::Utils> printing = Teuchos::rcp( new NOX::Utils(printParams) );

  // Identify the test problem
  if (printing->isPrintType(NOX::Utils::TestDetails))
    printing->out() << "Starting epetra/NOX_Operators/NOX_BroydenOp.exe" << std::endl;

  // Identify processor information
#ifdef HAVE_MPI
  if (printing->isPrintType(NOX::Utils::TestDetails))
  {
    printing->out() << "Parallel Run" << std::endl;
    printing->out() << "Number of processors = " << NumProc << std::endl;
    printing->out() << "Print Process = " << MyPID << std::endl;
  }
  Comm.Barrier();
  if (printing->isPrintType(NOX::Utils::TestDetails))
    printing->out() << "Process " << MyPID << " is alive!" << std::endl;
  Comm.Barrier();
#else
  if (printing->isPrintType(NOX::Utils::TestDetails))
    printing->out() << "Serial Run" << std::endl;
#endif

  int status = 0;

  // Create a TestCompare class
  NOX::Epetra::TestCompare tester( printing->out(), *printing);
  double abstol = 1.e-4;
  double reltol = 1.e-4 ;

  // Test NOX::Epetra::BroydenOperator
  int numGlobalElems = 3 * NumProc;
  Epetra_Map      broydenRowMap   ( numGlobalElems, 0, Comm );
  Epetra_Vector   broydenWorkVec  ( broydenRowMap );
  Epetra_CrsGraph broydenWorkGraph( Copy, broydenRowMap, 0 );
  std::vector<int> globalIndices(3);
  for( int lcol = 0; lcol < 3; ++lcol )
    globalIndices[lcol] = 3 * MyPID + lcol;

  std::vector<int> myGlobalIndices(2);

  // Row 1 structure
  myGlobalIndices[0] = globalIndices[0];
  myGlobalIndices[1] = globalIndices[2];
  broydenWorkGraph.InsertGlobalIndices( globalIndices[0], 2, &myGlobalIndices[0] );
  // Row 2 structure
  myGlobalIndices[0] = globalIndices[0];
  myGlobalIndices[1] = globalIndices[1];
  broydenWorkGraph.InsertGlobalIndices( globalIndices[1], 2, &myGlobalIndices[0] );
  // Row 3 structure
  myGlobalIndices[0] = globalIndices[1];
  myGlobalIndices[1] = globalIndices[2];
  broydenWorkGraph.InsertGlobalIndices( globalIndices[2], 2, &myGlobalIndices[0] );

  broydenWorkGraph.FillComplete();

  Teuchos::RCP<Epetra_CrsMatrix> broydenWorkMatrix =
    Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph ) );

  // Create an identity matrix
  broydenWorkVec.PutScalar(1.0);
  broydenWorkMatrix->ReplaceDiagonalValues(broydenWorkVec);

  NOX::Epetra::BroydenOperator broydenOp( noxParams, printing, broydenWorkVec, broydenWorkMatrix, true );

  broydenWorkVec[0] =  1.0;
  broydenWorkVec[1] = -1.0;
  broydenWorkVec[2] =  2.0;
  broydenOp.setStepVector( broydenWorkVec );

  broydenWorkVec[0] =  2.0;
  broydenWorkVec[1] =  1.0;
  broydenWorkVec[2] =  3.0;
  broydenOp.setYieldVector( broydenWorkVec );

  broydenOp.computeSparseBroydenUpdate();

  // Create the gold matrix for comparison
  Teuchos::RCP<Epetra_CrsMatrix> goldMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph ) );

  int      numCols ;
  double * values  ;

  // Row 1 answers
  goldMatrix->ExtractMyRowView( 0, numCols, values );
  values[0] =  6.0 ;
  values[1] =  2.0 ;
  // Row 2 answers
  goldMatrix->ExtractMyRowView( 1, numCols, values );
  values[0] =  5.0 ;
  values[1] =  0.0 ;
  // Row 3 structure
  goldMatrix->ExtractMyRowView( 2, numCols, values );
  values[0] = -1.0 ;
  values[1] =  7.0 ;

  goldMatrix->Scale(0.2);

  status += tester.testCrsMatrices( broydenOp.getBroydenMatrix(), *goldMatrix, reltol, abstol,
                              "Broyden Sparse Operator Update Test" );


  // Now try a dense Broyden Update
  Epetra_CrsGraph broydenWorkGraph2( Copy, broydenRowMap, 0 );

  myGlobalIndices.resize(3);

  // All Rowsstructure
  myGlobalIndices[0] = globalIndices[0];
  myGlobalIndices[1] = globalIndices[1];
  myGlobalIndices[2] = globalIndices[2];
  broydenWorkGraph2.InsertGlobalIndices( globalIndices[0], 3, &myGlobalIndices[0] );
  broydenWorkGraph2.InsertGlobalIndices( globalIndices[1], 3, &myGlobalIndices[0] );
  broydenWorkGraph2.InsertGlobalIndices( globalIndices[2], 3, &myGlobalIndices[0] );

  broydenWorkGraph2.FillComplete();

  Teuchos::RCP<Epetra_CrsMatrix> broydenWorkMatrix2 = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph2 ) );

  // Create an identity matrix
  broydenWorkVec.PutScalar(1.0);
  broydenWorkMatrix2->ReplaceDiagonalValues(broydenWorkVec);

  NOX::Epetra::BroydenOperator broydenOp2( noxParams, printing, broydenWorkVec, broydenWorkMatrix2, true );

  broydenWorkVec[0] =  1.0;
  broydenWorkVec[1] = -1.0;
  broydenWorkVec[2] =  2.0;
  broydenOp2.setStepVector( broydenWorkVec );

  broydenWorkVec[0] =  2.0;
  broydenWorkVec[1] =  1.0;
  broydenWorkVec[2] =  3.0;
  broydenOp2.setYieldVector( broydenWorkVec );

  broydenOp2.computeSparseBroydenUpdate();

  // Create the gold matrix for comparison
  Teuchos::RCP<Epetra_CrsMatrix> goldMatrix2 = Teuchos::rcp( new Epetra_CrsMatrix( Copy, broydenWorkGraph2 ) );

  // Row 1 answers
  goldMatrix2->ExtractMyRowView( 0, numCols, values );
  values[0] =  7.0 ;
  values[1] = -1.0 ;
  values[2] =  2.0 ;
  // Row 2 answers
  goldMatrix2->ExtractMyRowView( 1, numCols, values );
  values[0] =  2.0 ;
  values[1] =  4.0 ;
  values[2] =  4.0 ;
  // Row 3 structure
  goldMatrix2->ExtractMyRowView( 2, numCols, values );
  values[0] =  1.0 ;
  values[1] = -1.0 ;
  values[2] =  8.0 ;

  double scaleF = 1.0 / 6.0;
  goldMatrix2->Scale( scaleF );

  status += tester.testCrsMatrices( broydenOp2.getBroydenMatrix(), *goldMatrix2, reltol, abstol,
                              "Broyden Sparse Operator Update Test (Dense)" );

  // Now test the ability to remove active entries in the Broyden update
  Epetra_CrsGraph inactiveGraph( Copy, broydenRowMap, 0 );

  // Row 1 structure
  inactiveGraph.InsertGlobalIndices( globalIndices[0], 1, &myGlobalIndices[1] );
  // Row 2 structure
  inactiveGraph.InsertGlobalIndices( globalIndices[1], 1, &myGlobalIndices[2] );
  // Row 3 structure
  inactiveGraph.InsertGlobalIndices( globalIndices[2], 1, &myGlobalIndices[0] );

  inactiveGraph.FillComplete();

  // Inactivate entries in dense matrix to arrive again at the original sparse structure
  broydenOp2.removeEntriesFromBroydenUpdate( inactiveGraph );

#ifdef HAVE_NOX_DEBUG
  if( verbose )
    broydenOp2.outputActiveEntries();
#endif

  // Reset to the identity matrix
  broydenOp2.resetBroydenMatrix( *broydenWorkMatrix2 );

  // Step and Yield vectors are already set
  broydenOp2.computeSparseBroydenUpdate();


  status += tester.testCrsMatrices( broydenOp2.getBroydenMatrix(), *goldMatrix, reltol, abstol,
                              "Broyden Sparse Operator Update Test (Entry Removal)", false );


  // Summarize test results
  if( status == 0 )
    printing->out() << "Test passed!" << std::endl;
  else
    printing->out() << "Test failed!" << std::endl;

#ifdef HAVE_MPI
  MPI_Finalize();
#endif

  // Final return value (0 = successfull, non-zero = failure)
  return status;

}
void panzer::ScatterDirichletResidual_BlockedEpetra<panzer::Traits::Jacobian, TRAITS,LO,GO>::
evaluateFields(typename TRAITS::EvalData workset)
{
    using Teuchos::RCP;
    using Teuchos::ArrayRCP;
    using Teuchos::ptrFromRef;
    using Teuchos::rcp_dynamic_cast;

    using Thyra::VectorBase;
    using Thyra::SpmdVectorBase;
    using Thyra::ProductVectorBase;
    using Thyra::BlockedLinearOpBase;

    typedef BlockedEpetraLinearObjContainer BLOC;

    std::vector<std::pair<int,GO> > GIDs;
    std::vector<int> LIDs;

    // for convenience pull out some objects from workset
    std::string blockId = this->wda(workset).block_id;
    const std::vector<std::size_t> & localCellIds = this->wda(workset).cell_local_ids;

    RCP<const BLOC> blockedContainer = blockedContainer_;
    RCP<ProductVectorBase<double> > r = rcp_dynamic_cast<ProductVectorBase<double> >(blockedContainer->get_f());
    Teuchos::RCP<BlockedLinearOpBase<double> > Jac = rcp_dynamic_cast<BlockedLinearOpBase<double> >(blockedContainer->get_A());

    int numFieldBlocks = globalIndexer_->getNumFieldBlocks();
    std::vector<int> blockOffsets(numFieldBlocks+1); // number of fields, plus a sentinnel
    for(int blk=0; blk<numFieldBlocks; blk++) {
        int blockOffset = globalIndexer_->getBlockGIDOffset(blockId,blk);
        blockOffsets[blk] = blockOffset;
    }

    std::unordered_map<std::pair<int,int>,Teuchos::RCP<Epetra_CrsMatrix>,panzer::pair_hash> jacEpetraBlocks;

    // NOTE: A reordering of these loops will likely improve performance
    //       The "getGIDFieldOffsets may be expensive.  However the
    //       "getElementGIDs" can be cheaper. However the lookup for LIDs
    //       may be more expensive!

    // scatter operation for each cell in workset
    for(std::size_t worksetCellIndex=0; worksetCellIndex<localCellIds.size(); ++worksetCellIndex) {
        std::size_t cellLocalId = localCellIds[worksetCellIndex];

        globalIndexer_->getElementGIDs(cellLocalId,GIDs);
        blockOffsets[numFieldBlocks] = GIDs.size();

        // caculate the local IDs for this element
        LIDs.resize(GIDs.size());
        for(std::size_t i=0; i<GIDs.size(); i++) {
            // used for doing local ID lookups
            RCP<const Epetra_Map> r_map = blockedContainer->getMapForBlock(GIDs[i].first);

            LIDs[i] = r_map->LID(GIDs[i].second);
        }

        std::vector<bool> is_owned(GIDs.size(), false);
        globalIndexer_->ownedIndices(GIDs,is_owned);

        // loop over each field to be scattered
        Teuchos::ArrayRCP<double> local_r, local_dc;
        for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
            int fieldNum = fieldIds_[fieldIndex];
            int blockRowIndex = globalIndexer_->getFieldBlock(fieldNum);

            RCP<SpmdVectorBase<double> > dc = rcp_dynamic_cast<SpmdVectorBase<double> >(dirichletCounter_->getNonconstVectorBlock(blockRowIndex));
            dc->getNonconstLocalData(ptrFromRef(local_dc));

            // grab local data for inputing
            RCP<SpmdVectorBase<double> > block_r = rcp_dynamic_cast<SpmdVectorBase<double> >(r->getNonconstVectorBlock(blockRowIndex));
            block_r->getNonconstLocalData(ptrFromRef(local_r));

            // this call "should" get the right ordering according to the Intrepid basis
            const std::pair<std::vector<int>,std::vector<int> > & indicePair
                = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_);
            const std::vector<int> & elmtOffset = indicePair.first;
            const std::vector<int> & basisIdMap = indicePair.second;

            // loop over basis functions
            for(std::size_t basis=0; basis<elmtOffset.size(); basis++) {
                int offset = elmtOffset[basis];
                int lid = LIDs[offset];
                if(lid<0) // not on this processor
                    continue;

                int basisId = basisIdMap[basis];

                if (checkApplyBC_)
                    if (!applyBC_[fieldIndex](worksetCellIndex,basisId))
                        continue;

                // zero out matrix row
                for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) {
                    int start = blockOffsets[blockColIndex];
                    int end = blockOffsets[blockColIndex+1];

                    if(end-start<=0)
                        continue;

                    // check hash table for jacobian sub block
                    std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
                    Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex];

                    // if you didn't find one before, add it to the hash table
                    if(subJac==Teuchos::null) {
                        Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second);

                        // block operator is null, don't do anything (it is excluded)
                        if(Teuchos::is_null(tOp))
                            continue;

                        Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp);
                        subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true);
                        jacEpetraBlocks[blockIndex] = subJac;
                    }

                    int numEntries = 0;
                    int * rowIndices = 0;
                    double * rowValues = 0;

                    subJac->ExtractMyRowView(lid,numEntries,rowValues,rowIndices);

                    for(int i=0; i<numEntries; i++)
                        rowValues[i] = 0.0;
                }

                const ScalarT scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId);

                local_r[lid] = scatterField.val();
                local_dc[lid] = 1.0; // mark row as dirichlet

                // loop over the sensitivity indices: all DOFs on a cell
                std::vector<double> jacRow(scatterField.size(),0.0);

                for(int sensIndex=0; sensIndex<scatterField.size(); ++sensIndex)
                    jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
                TEUCHOS_ASSERT(jacRow.size()==GIDs.size());

                for(int blockColIndex=0; blockColIndex<numFieldBlocks; blockColIndex++) {
                    int start = blockOffsets[blockColIndex];
                    int end = blockOffsets[blockColIndex+1];

                    if(end-start<=0)
                        continue;

                    // check hash table for jacobian sub block
                    std::pair<int,int> blockIndex = std::make_pair(blockRowIndex,blockColIndex);
                    Teuchos::RCP<Epetra_CrsMatrix> subJac = jacEpetraBlocks[blockIndex];

                    // if you didn't find one before, add it to the hash table
                    if(subJac==Teuchos::null) {
                        Teuchos::RCP<Thyra::LinearOpBase<double> > tOp = Jac->getNonconstBlock(blockIndex.first,blockIndex.second);

                        // block operator is null, don't do anything (it is excluded)
                        if(Teuchos::is_null(tOp))
                            continue;

                        Teuchos::RCP<Epetra_Operator> eOp = Thyra::get_Epetra_Operator(*tOp);
                        subJac = rcp_dynamic_cast<Epetra_CrsMatrix>(eOp,true);
                        jacEpetraBlocks[blockIndex] = subJac;
                    }

                    // Sum Jacobian
                    int err = subJac->ReplaceMyValues(lid, end-start, &jacRow[start],&LIDs[start]);
                    if(err!=0) {
                        std::stringstream ss;
                        ss << "Failed inserting row: " << GIDs[offset].second << " (" << lid << "): ";
                        for(int i=start; i<end; i++)
                            ss << GIDs[i].second << " (" << LIDs[i] << ") ";
                        ss << std::endl;
                        ss << "Into block " << blockRowIndex << ", " << blockColIndex << std::endl;

                        ss << "scatter field = ";
                        scatterFields_[fieldIndex].print(ss);
                        ss << std::endl;

                        TEUCHOS_TEST_FOR_EXCEPTION(err!=0,std::runtime_error,ss.str());
                    }

                }
            }
        }
    }
}
static int run_test(Teuchos::RCP<Epetra_CrsMatrix> matrix,
	  bool verbose,           // display the graph before & after
	  bool contract,          // set global number of partitions to 1/2 num procs
	  int partitioningType,   // hypergraph or graph partitioning, or simple
	  int vertexWeightType,   // use vertex weights?
	  int edgeWeightType,     // use edge/hyperedge weights?
	  int objectType)         // use isorropia's CrsMatrix or CrsGraph
{
  int rc=0, fail = 0;
#ifdef HAVE_EPETRAEXT
  int localProc = 0;
  double balance1, balance2, cutn1, cutn2, cutl1, cutl2;
  double balance3, cutn3, cutl3;
  double cutWgt1, cutWgt2, cutWgt3;
  int numCuts1, numCuts2, numCuts3, valid;
  int numPartitions = 0;
  int keepDenseEdges = 0;
  int numProcs = 1;

#ifdef HAVE_MPI
  const Epetra_MpiComm &Comm = dynamic_cast<const Epetra_MpiComm &>(matrix->Comm());
  localProc = Comm.MyPID();
  numProcs = Comm.NumProc();
#else
  const Epetra_SerialComm &Comm = dynamic_cast<const Epetra_SerialComm &>(matrix->Comm());
#endif

  int numRows = matrix->NumGlobalRows();

  if (numRows < (numProcs * 100)){
    // By default Zoltan throws out dense edges, defined as those
    // whose number of non-zeros exceeds 25% of the number of vertices.
    //
    // If dense edges are thrown out of a small matrix, there may be nothing left.
    keepDenseEdges = 1;
  }

  double myShareBefore = 1.0 / numProcs;
  double myShare = myShareBefore;

  if (contract){
    numPartitions = numProcs / 2;

    if (numPartitions > numRows)
      numPartitions = numRows;

    if (numPartitions > 0){
      if (localProc < numPartitions){
	myShare = 1.0 / numPartitions;
      }
      else{
	myShare = 0.0;
      }
    }
    else{
      contract = 0;
    }
  }

  // If we want Zoltan's or Isorropia's default weights, then we don't
  // need to supply a CostDescriber object to createBalancedCopy,
  // so we get to test the API functions that don't take a CostDescriber.

  bool noCosts = ((vertexWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS) &&
		   (edgeWeightType == NO_APPLICATION_SUPPLIED_WEIGHTS));

  // Test the interface that has no parameters, if possible

  bool noParams =
    ((partitioningType == HYPERGRAPH_PARTITIONING) && // default, so requires no params
     (numPartitions == 0) &&                          // >0 would require a parameter
     (keepDenseEdges == 0));                          // >0 would require a parameter

  // Maps for original object
  const Epetra_Map &sourceRowMap = matrix->RowMap();
  const Epetra_Map &sourceRangeMap = matrix->RangeMap();
//   const Epetra_Map &sourceColMap = matrix->ColMap();
  const Epetra_Map &sourceDomainMap = matrix->DomainMap();

  int numCols = matrix->NumGlobalCols();
  int nMyRows = sourceRowMap.NumMyElements();
  int base = sourceRowMap.IndexBase();

  // Compute vertex and edge weights

  Isorropia::Epetra::CostDescriber costs;

  Teuchos::RCP<Epetra_Vector> vptr;

  Teuchos::RCP<Epetra_CrsMatrix> eptr;

  Teuchos::RCP<Epetra_Vector> hyperEdgeWeights;

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      // Create graph edge weights.

      eptr = Teuchos::rcp(new Epetra_CrsMatrix(*matrix));

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	eptr->PutScalar(1.0);   // set all nonzeros to 1.0
      }
      else{
	int maxRowSize = eptr->MaxNumEntries();
	double *newVal = NULL;
	if (maxRowSize > 0){
	  newVal = new double [maxRowSize];
	  for (int j=0; j<maxRowSize; j++){
	    newVal[j] = localProc + 1 + j;
	  }
	}
	int numEntries;
	int *idx;
	double *val;
	for (int i=0; i<nMyRows; i++){
	  rc = eptr->ExtractMyRowView(i, numEntries, val, idx);
	  for (int j=0; j<numEntries; j++){
	    val[j] = newVal[j];
	  }
	}
	if (newVal) delete [] newVal;
      }

      eptr->FillComplete(sourceDomainMap, sourceRangeMap);

      costs.setGraphEdgeWeights(eptr);
    }
    else{
      // Create hyperedge weights.  (Note that the list of hyperedges that a
      // process provides weights for has no relation to the columns
      // that it has non-zeroes for, or the rows that is has.  Hypergraphs
      // in general are not square.  Also more than one process can provide
      // a weight for the same edge.  Zoltan combines the weights according
      // to the value of the PHG_EDGE_WEIGHT_OPERATION parameter.  The default
      // for this parameter is to use the maximum edge weight provided by any
      // process for a given hyperedge.)

      Epetra_Map hyperEdgeMap(numCols, base, Comm);

      hyperEdgeWeights = Teuchos::rcp(new Epetra_Vector(hyperEdgeMap));

      int *edgeGIDs = NULL;
      double *weights = NULL;
      int numHEweights = hyperEdgeMap.NumMyElements();

      if (numHEweights){
	edgeGIDs = new int [numHEweights];
	weights = new double [numHEweights];

	if (edgeWeightType == SUPPLY_EQUAL_WEIGHTS){
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    weights[i] = 1.0;
	  }
	}
	else{
	  int hiVolumeStart = matrix->NumGlobalCols() / 3;
	  int hiVolumeEnd = hiVolumeStart * 2;
	  for (int i=0; i<numHEweights; i++){
	    edgeGIDs[i] = hyperEdgeMap.GID(i);
	    if ((edgeGIDs[i] < hiVolumeStart) || (edgeGIDs[i] >= hiVolumeEnd)){
	      weights[i] = 1.0;
	    }
	    else{
	      weights[i] = 3.0;
	    }
	  }
	}
	hyperEdgeWeights->ReplaceGlobalValues(numHEweights, weights, edgeGIDs);
      }

      if (weights){
	delete [] weights;
	delete [] edgeGIDs;
      }

      costs.setHypergraphEdgeWeights(hyperEdgeWeights);
    }
  }

  bool need_importer = false;

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    need_importer = true;  // to redistribute row weights

    double *val = NULL;

    if (nMyRows){
      val = new double [nMyRows];

      if (vertexWeightType == SUPPLY_EQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0;
	}
      }
      else if (vertexWeightType == SUPPLY_UNEQUAL_WEIGHTS){
	for (int i=0; i<nMyRows; i++){
	  val[i] = 1.0 + ((localProc+1) / 2);
	}
      }
    }

    vptr = Teuchos::rcp(new Epetra_Vector(Copy, sourceRowMap, val));

    if (val) delete [] val;

    costs.setVertexWeights(vptr);
  }

  // Calculate partition quality metrics before calling Zoltan

  if (partitioningType == GRAPH_PARTITIONING){
    rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShare, balance1, numCuts1, cutWgt1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_graph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, numCuts3, cutWgt3, cutn3, cutl3);
    }
  }
  else{
    rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShare, balance1, cutn1, cutl1);
    if (contract){
      // balance wrt target of balancing weight over *all* procs
      rc = ispatest::compute_hypergraph_metrics(matrix->Graph(), costs,
	     myShareBefore, balance3, cutn3, cutl3);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  Teuchos::ParameterList params;

#ifdef HAVE_ISORROPIA_ZOLTAN

  if (!noParams){

    // We're using Zoltan for partitioning and supplying
    // parameters, overriding defaults.

    Teuchos::ParameterList &sublist = params.sublist("Zoltan");

    if (partitioningType == GRAPH_PARTITIONING){
      params.set("PARTITIONING METHOD", "GRAPH");
      sublist.set("GRAPH_PACKAGE", "PHG");
    }
    else{
      params.set("PARTITIONING METHOD", "HYPERGRAPH");
      sublist.set("LB_APPROACH", "PARTITION");
      sublist.set("PHG_CUT_OBJECTIVE", "CONNECTIVITY");  // "cutl"
    }

    if (keepDenseEdges){
      // only throw out rows that have no zeroes, default is to
      // throw out if .25 or more of the columns are non-zero
      sublist.set("PHG_EDGE_SIZE_THRESHOLD", "1.0");
    }
     if (numPartitions > 0){
	// test #Partitions < #Processes
	std::ostringstream os;
	os << numPartitions;
	std::string s = os.str();
	//	sublist.set("NUM_GLOBAL_PARTS", s);
	params.set("NUM PARTS", s);
      }

      //sublist.set("DEBUG_LEVEL", "1"); // Zoltan will print out parameters
      //sublist.set("DEBUG_LEVEL", "5");   // proc 0 will trace Zoltan calls
      //sublist.set("DEBUG_MEMORY", "2");  // Zoltan will trace alloc & free
  }

#else
    ERROREXIT((localProc==0),
      "Zoltan partitioning required but Zoltan not available.")
#endif

  // Function scope values

  Teuchos::RCP<Epetra_Vector> newvwgts;
  Teuchos::RCP<Epetra_CrsMatrix> newewgts;

  // Function scope values required for LinearProblem

  Epetra_LinearProblem *problem = NULL;
  Epetra_Map *LHSmap = NULL;
  Epetra_MultiVector *RHS = NULL;
  Epetra_MultiVector *LHS = NULL;

  // Reference counted pointer to balanced object

  Epetra_CrsMatrix *matrixPtr=NULL;
  Epetra_CrsGraph *graphPtr=NULL;
  Epetra_RowMatrix *rowMatrixPtr=NULL;
  Epetra_LinearProblem *problemPtr=NULL;

  // Row map for balanced object
  const Epetra_BlockMap *targetBlockRowMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetRowMap=NULL;            // for all other inputs

  // Column map for balanced object
  const Epetra_BlockMap *targetBlockColMap=NULL;  // for input CrsGraph
  const Epetra_Map *targetColMap=NULL;            // for all other inputs

  if (objectType == EPETRA_CRSMATRIX){
    if (noParams && noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      matrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(matrixPtr->RowMap());
    targetColMap = &(matrixPtr->ColMap());
  }
  else if (objectType == EPETRA_CRSGRAPH){
    const Epetra_CrsGraph graph = matrix->Graph();
    if (noParams && noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph);
    }
    else if (noCosts){
      graphPtr = Isorropia::Epetra::createBalancedCopy(graph, params);
    }
    targetBlockRowMap = &(graphPtr->RowMap());
    targetBlockColMap = &(graphPtr->ColMap());
  }
  else if (objectType == EPETRA_ROWMATRIX){
    if (noParams && noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix);
    }
    else if (noCosts){
      rowMatrixPtr = Isorropia::Epetra::createBalancedCopy(*matrix, params);
    }
    targetRowMap = &(rowMatrixPtr->RowMatrixRowMap());
    targetColMap = &(rowMatrixPtr->RowMatrixColMap());
  }
  else if (objectType == EPETRA_LINEARPROBLEM){

    // Create a linear problem with this matrix.

    LHSmap = new Epetra_Map(numCols, base, Comm);

    int myRHSsize = sourceRowMap.NumMyElements();
    int myLHSsize = LHSmap->NumMyElements();

    int valSize = ((myRHSsize > myLHSsize) ? myRHSsize : myLHSsize);

    double *vals = NULL;

    if (valSize){
      vals = new double [valSize];
    }

    if (valSize){
      for (int i=0; i < valSize; i++){
	// put my rank in my portion of LHS and my portion of RHS
	vals[i] = localProc;
      }
    }

    RHS = new Epetra_MultiVector(Copy, sourceRowMap, vals, 1, 1);

    LHS = new Epetra_MultiVector(Copy, *LHSmap, vals, 1, 1);

    if (valSize){
      delete [] vals;
    }

    problem = new Epetra_LinearProblem(matrix.get(), LHS, RHS);

    Epetra_LinearProblem lp = *problem;

    if (lp.CheckInput()){
      ERROREXIT((localProc==0), "Error creating a LinearProblem");
    }
    if (noParams && noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp);
    }
    else if (noCosts){
      problemPtr = Isorropia::Epetra::createBalancedCopy(lp, params);
    }

    targetRowMap = &(problemPtr->GetMatrix()->RowMatrixRowMap());
    targetColMap = &(problemPtr->GetMatrix()->RowMatrixColMap());
  }

  // Redistribute the edge weights
  // Comment this out since we don't redistribute columns

  if (edgeWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS){

    if (partitioningType == GRAPH_PARTITIONING){

      Epetra_Import *importer = NULL;

      if (objectType == EPETRA_CRSGRAPH){
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *graphPtr));
	targetRowMap = &(newewgts->RowMap());
	targetColMap = &(newewgts->ColMap());
      }
      else{
	newewgts = Teuchos::rcp(new Epetra_CrsMatrix(Copy, *targetRowMap, *targetColMap, 0));
      }

      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
      newewgts->Import(*eptr, *importer, Insert);
      newewgts->FillComplete(*targetColMap, *targetRowMap);

      costs.setGraphEdgeWeights(newewgts);
    }
  }

  // Redistribute the vertex weights

  if ((vertexWeightType != NO_APPLICATION_SUPPLIED_WEIGHTS)){

    Epetra_Import *importer = NULL;

    if (objectType == EPETRA_CRSGRAPH){
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetBlockRowMap));
      importer = new Epetra_Import(*targetBlockRowMap, sourceRowMap);
    }
    else{
      newvwgts = Teuchos::rcp(new Epetra_Vector(*targetRowMap));
      importer = new Epetra_Import(*targetRowMap, sourceRowMap);
    }

    newvwgts->Import(*vptr, *importer, Insert);
    costs.setVertexWeights(newvwgts);
  }

  if (localProc == 0){
    test_type(numPartitions, partitioningType, vertexWeightType, edgeWeightType, objectType);
  }

  if (verbose){

    // Picture of problem before balancing

    if (objectType == EPETRA_LINEARPROBLEM){

      ispatest::show_matrix("Before load balancing", *problem, Comm);
    }
    else{
      ispatest::show_matrix("Before load balancing", matrix->Graph(), Comm);
    }

    // Picture of problem after balancing

    if (objectType == EPETRA_LINEARPROBLEM){
      ispatest::show_matrix("After load balancing (x in Ax=b is not redistributed)", *problemPtr, Comm);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      ispatest::show_matrix("After load balancing", *rowMatrixPtr, Comm);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      ispatest::show_matrix("After load balancing", matrixPtr->Graph(), Comm);
    }
    else if (objectType == EPETRA_CRSGRAPH){
      ispatest::show_matrix("After load balancing", *graphPtr, Comm);
    }
  }

  // After partitioning, recompute the metrics

  if (partitioningType == GRAPH_PARTITIONING){
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_graph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_graph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_graph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
    else {
      rc = ispatest::compute_graph_metrics(*graphPtr, costs,
	     myShare, balance2, numCuts2, cutWgt2, cutn2, cutl2);
    }
  }
  else{
    if (objectType == EPETRA_LINEARPROBLEM){
      rc = ispatest::compute_hypergraph_metrics(*(problemPtr->GetMatrix()), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_ROWMATRIX){
      rc = ispatest::compute_hypergraph_metrics(*rowMatrixPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else if (objectType == EPETRA_CRSMATRIX){
      rc = ispatest::compute_hypergraph_metrics(matrixPtr->Graph(), costs,
	     myShare, balance2, cutn2, cutl2);
    }
    else{
      rc = ispatest::compute_hypergraph_metrics(*graphPtr, costs,
	     myShare, balance2, cutn2, cutl2);
    }
  }

  if (rc){
    ERROREXIT((localProc==0), "Error in computing partitioning metrics")
  }

  std::string why;

  if (partitioningType == GRAPH_PARTITIONING){
    fail = (cutWgt2 > cutWgt1);
    why = "New weighted edge cuts are worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;

      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;

      std::cout << " Total edge cuts: " << numCuts1;
      std::cout << " Total weighted edge cuts: " << cutWgt1 << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
      std::cout << " Total edge cuts: " << numCuts2;
      std::cout << " Total weighted edge cuts: " << cutWgt2 << std::endl;
    }
  }
  else{
      fail = (cutl2 > cutl1);
      why = "New cutl is worse";

    if (localProc == 0){
      std::cout << "Before partitioning: Balance " << balance1 ;
      std::cout << " cutn " << cutn1 ;
      std::cout << " cutl " << cutl1 ;
      if (contract){
	std::cout << "  (wrt balancing over " << numPartitions << " partitions)" << std::endl;
	std::cout << "Before partitioning: Balance " << balance3 ;
	std::cout << " cutn " << cutn3 ;
	std::cout << " cutl " << cutl3 ;
	std::cout << "  (wrt balancing over " << numProcs << " partitions)" ;
      }
      std::cout << std::endl;
      std::cout << "After partitioning: Balance " << balance2 ;
      std::cout << " cutn " << cutn2 ;
      std::cout << " cutl " << cutl2 << std::endl;
    }
  }

  if (fail){
    if (localProc == 0) std::cout << "ERROR: "+why << std::endl;
  }

  // Check that input matrix is valid.  This test constructs an "x"
  // with the matrix->DomainMap() and a "y" with matrix->RangeMap()
  // and then calculates y = Ax.

  if (objectType == EPETRA_LINEARPROBLEM){
    valid = ispatest::test_matrix_vector_multiply(*problemPtr);
  }
  else if (objectType == EPETRA_ROWMATRIX){
    valid = ispatest::test_row_matrix_vector_multiply(*rowMatrixPtr);
  }
  else if (objectType == EPETRA_CRSMATRIX){
    valid = ispatest::test_matrix_vector_multiply(*matrixPtr);
  }
  else{
    valid = ispatest::test_matrix_vector_multiply(*graphPtr);
  }

  if (!valid){
    if (localProc == 0) std::cout << "Rebalanced matrix is not a valid Epetra matrix" << std::endl;
    fail = 1;
  }
  else{
    if (localProc == 0) std::cout << "Rebalanced matrix is a valid Epetra matrix" << std::endl;
  }

  if (localProc == 0)
    std::cout << std::endl;



#else
  std::cout << "test_simple main: currently can only test "
	 << "with Epetra and EpetraExt enabled." << std::endl;
  rc = -1;
#endif

  return fail;
}
Beispiel #5
0
void panzer::ScatterDirichletResidual_Epetra<panzer::Traits::Jacobian, Traits,LO,GO>::
evaluateFields(typename Traits::EvalData workset)
{ 
   std::vector<GO> GIDs;
   std::vector<int> LIDs;
 
   // for convenience pull out some objects from workset
   std::string blockId = workset.block_id;
   const std::vector<std::size_t> & localCellIds = workset.cell_local_ids;

   Teuchos::RCP<const EpetraLinearObjContainer> epetraContainer = epetraContainer_;
   TEUCHOS_ASSERT(epetraContainer!=Teuchos::null);
   Teuchos::RCP<Epetra_Vector> r = epetraContainer->get_f(); 
   Teuchos::RCP<Epetra_CrsMatrix> Jac = epetraContainer->get_A();

   // NOTE: A reordering of these loops will likely improve performance
   //       The "getGIDFieldOffsets may be expensive.  However the
   //       "getElementGIDs" can be cheaper. However the lookup for LIDs
   //       may be more expensive!

   // scatter operation for each cell in workset
   for(std::size_t worksetCellIndex=0;worksetCellIndex<localCellIds.size();++worksetCellIndex) {
      std::size_t cellLocalId = localCellIds[worksetCellIndex];

      globalIndexer_->getElementGIDs(cellLocalId,GIDs); 

      if(r!=Teuchos::null) {
         // caculate the local IDs for this element
         LIDs.resize(GIDs.size());
         for(std::size_t i=0;i<GIDs.size();i++)
            LIDs[i] = r->Map().LID(GIDs[i]);
      }
      else {
         // caculate the local IDs for this element
         LIDs.resize(GIDs.size());
         for(std::size_t i=0;i<GIDs.size();i++)
            LIDs[i] = Jac->RowMap().LID(GIDs[i]);
      }

      // loop over each field to be scattered
      for(std::size_t fieldIndex = 0; fieldIndex < scatterFields_.size(); fieldIndex++) {
         int fieldNum = fieldIds_[fieldIndex];
   
         // this call "should" get the right ordering accordint to the Intrepid basis
         const std::pair<std::vector<int>,std::vector<int> > & indicePair 
               = globalIndexer_->getGIDFieldOffsets_closure(blockId,fieldNum, side_subcell_dim_, local_side_id_);
         const std::vector<int> & elmtOffset = indicePair.first;
         const std::vector<int> & basisIdMap = indicePair.second;
   
         // loop over basis functions
         for(std::size_t basis=0;basis<elmtOffset.size();basis++) {
            int offset = elmtOffset[basis];
            int lid = LIDs[offset];
            if(lid<0) // not on this processor
               continue;

            // zero out matrix row
            {
               int numEntries = 0;
               int * rowIndices = 0;
               double * rowValues = 0;

               Jac->ExtractMyRowView(lid,numEntries,rowValues,rowIndices);

               for(int i=0;i<numEntries;i++) {
                  if(preserveDiagonal_) {
                    if(lid!=rowIndices[i])
                      rowValues[i] = 0.0;
                  }
                  else
                    rowValues[i] = 0.0;
               }
            }
 
            int basisId = basisIdMap[basis];
            int gid = GIDs[offset];
            const ScalarT & scatterField = (scatterFields_[fieldIndex])(worksetCellIndex,basisId);
          
            if(r!=Teuchos::null) 
              (*r)[lid] = scatterField.val();
            if(dirichletCounter_!=Teuchos::null)
              (*dirichletCounter_)[lid] = 1.0; // mark row as dirichlet
    
            // loop over the sensitivity indices: all DOFs on a cell
            std::vector<double> jacRow(scatterField.size(),0.0);
    
            if(!preserveDiagonal_) {
              // this is the default case
              for(int sensIndex=0;sensIndex<scatterField.size();++sensIndex)
                 jacRow[sensIndex] = scatterField.fastAccessDx(sensIndex);
              TEUCHOS_ASSERT(jacRow.size()==GIDs.size());
    
              int err = Jac->ReplaceGlobalValues(gid, scatterField.size(), &jacRow[0],&GIDs[0]);
              TEUCHOS_ASSERT(err==0); 
            }
         }
      }
   }
}
// helper routines
bool SplitMatrix2x2(Teuchos::RCP<const Epetra_CrsMatrix> A,
    const Epetra_Map& A11rowmap,
    const Epetra_Map& A22rowmap,
    Teuchos::RCP<Epetra_CrsMatrix>& A11,
    Teuchos::RCP<Epetra_CrsMatrix>& A12,
    Teuchos::RCP<Epetra_CrsMatrix>& A21,
    Teuchos::RCP<Epetra_CrsMatrix>& A22)
{
  if (A==Teuchos::null)
  {
    std::cout << "ERROR: SplitMatrix2x2: A==null on entry" << std::endl;
    return false;
  }

  const Epetra_Comm& Comm   = A->Comm();
  const Epetra_Map&  A22map = A22rowmap;
  const Epetra_Map&  A11map = A11rowmap;

  //----------------------------- create a parallel redundant map of A22map
  std::map<int,int> a22gmap;
  {
    std::vector<int> a22global(A22map.NumGlobalElements());
    int count=0;
    for (int proc=0; proc<Comm.NumProc(); ++proc)
    {
      int length = 0;
      if (proc==Comm.MyPID())
      {
        for (int i=0; i<A22map.NumMyElements(); ++i)
        {
          a22global[count+length] = A22map.GID(i);
          ++length;
        }
      }
      Comm.Broadcast(&length,1,proc);
      Comm.Broadcast(&a22global[count],length,proc);
      count += length;
    }
    if (count != A22map.NumGlobalElements())
    {
      std::cout << "ERROR SplitMatrix2x2: mismatch in dimensions" << std::endl;
      return false;
    }

    // create the map
    for (int i=0; i<count; ++i)
      a22gmap[a22global[i]] = 1;
    a22global.clear();
  }

  //--------------------------------------------------- create matrix A22
  A22 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100));
  {
    std::vector<int>    a22gcindices(100);
    std::vector<double> a22values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A22map.MyGID(grid)==false)
        continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }
      if (numentries>(int)a22gcindices.size())
      {
        a22gcindices.resize(numentries);
        a22values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid in a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr==a22gmap.end()) continue;
        //std::cout << gcid << " ";
        a22gcindices[count] = gcid;
        a22values[count]    = values[j];
        ++count;
      }
      //std::cout << std::endl; fflush(stdout);
      // add this filtered row to A22
      err = A22->InsertGlobalValues(grid,count,&a22values[0],&a22gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } //for (int i=0; i<A->NumMyRows(); ++i)
    a22gcindices.clear();
    a22values.clear();
  }
  A22->FillComplete();
  A22->OptimizeStorage();

  //----------------------------------------------------- create matrix A11
  A11 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100));
  {
    std::vector<int>    a11gcindices(100);
    std::vector<double> a11values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A11map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }
      if (numentries>(int)a11gcindices.size())
      {
        a11gcindices.resize(numentries);
        a11values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr!=a22gmap.end()) continue;
        a11gcindices[count] = gcid;
        a11values[count] = values[j];
        ++count;
      }
      err = A11->InsertGlobalValues(grid,count,&a11values[0],&a11gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a11gcindices.clear();
    a11values.clear();
  }
  A11->FillComplete();
  A11->OptimizeStorage();

  //---------------------------------------------------- create matrix A12
  A12 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A11map,100));
  {
    std::vector<int>    a12gcindices(100);
    std::vector<double> a12values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A11map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }

      if (numentries>(int)a12gcindices.size())
      {
        a12gcindices.resize(numentries);
        a12values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr==a22gmap.end()) continue;
        a12gcindices[count] = gcid;
        a12values[count] = values[j];
        ++count;
      }
      err = A12->InsertGlobalValues(grid,count,&a12values[0],&a12gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a12values.clear();
    a12gcindices.clear();
  }
  A12->FillComplete(A22map,A11map);
  A12->OptimizeStorage();

  //----------------------------------------------------------- create A21
  A21 = Teuchos::rcp(new Epetra_CrsMatrix(Copy,A22map,100));
  {
    std::vector<int>    a21gcindices(100);
    std::vector<double> a21values(100);
    for (int i=0; i<A->NumMyRows(); ++i)
    {
      const int grid = A->GRID(i);
      if (A22map.MyGID(grid)==false) continue;
      int     numentries;
      double* values;
      int*    cindices;
      int err = A->ExtractMyRowView(i,numentries,values,cindices);
      if (err)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->ExtractMyRowView returned " << err << std::endl;
        return false;
      }

      if (numentries>(int)a21gcindices.size())
      {
        a21gcindices.resize(numentries);
        a21values.resize(numentries);
      }
      int count=0;
      for (int j=0; j<numentries; ++j)
      {
        const int gcid = A->ColMap().GID(cindices[j]);
        // see whether we have gcid as part of a22gmap
        std::map<int,int>::iterator curr = a22gmap.find(gcid);
        if (curr!=a22gmap.end()) continue;
        a21gcindices[count] = gcid;
        a21values[count] = values[j];
        ++count;
      }
      err = A21->InsertGlobalValues(grid,count,&a21values[0],&a21gcindices[0]);
      if (err<0)
      {
        std::cout << "ERROR: SplitMatrix2x2: A->InsertGlobalValues returned " << err << std::endl;
        return false;
      }

    } // for (int i=0; i<A->NumMyRows(); ++i)
    a21values.clear();
    a21gcindices.clear();
  }
  A21->FillComplete(A11map,A22map);
  A21->OptimizeStorage();

  //-------------------------------------------------------------- tidy up
  a22gmap.clear();
  return true;
}