// rebuild a single subblock Epetra_CrsMatrix
void rebuildSubBlock(int i,int j,const Epetra_CrsMatrix & A,const std::vector<std::pair<int,RCP<Epetra_Map> > > & subMaps,Epetra_CrsMatrix & mat)
{
   // get the number of variables families
   int numVarFamily = subMaps.size();

   TEUCHOS_ASSERT(i>=0 && i<numVarFamily);
   TEUCHOS_ASSERT(j>=0 && j<numVarFamily);
   TEUCHOS_ASSERT(mat.Filled());

   const Epetra_Map & gRowMap = *subMaps[i].second;
   const Epetra_Map & rowMap = *Teuchos::get_extra_data<RCP<Epetra_Map> >(subMaps[i].second,"contigMap");
   int colFamilyCnt = subMaps[j].first;

   // compute the number of global variables
   // and the row and column block offset
   int numGlobalVars = 0;
   int rowBlockOffset = 0;
   int colBlockOffset = 0;
   for(int k=0;k<numVarFamily;k++) {
      numGlobalVars += subMaps[k].first;
 
      // compute block offsets
      if(k<i) rowBlockOffset += subMaps[k].first;
      if(k<j) colBlockOffset += subMaps[k].first;
   }

   // copy all global rows to here
   Epetra_Import import(gRowMap,A.RowMap());
   Epetra_CrsMatrix localA(Copy,gRowMap,0);
   localA.Import(A,import,Insert);

   // clear out the old matrix
   mat.PutScalar(0.0);

   // get entry information
   int numMyRows = rowMap.NumMyElements();
   int maxNumEntries = A.GlobalMaxNumEntries();

   // for extraction
   std::vector<int> indices(maxNumEntries);
   std::vector<double> values(maxNumEntries);

   // for insertion
   std::vector<int> colIndices(maxNumEntries);
   std::vector<double> colValues(maxNumEntries);

   // insert each row into subblock
   // let FillComplete handle column distribution
   for(int localRow=0;localRow<numMyRows;localRow++) {
      int numEntries = -1; 
      int globalRow = gRowMap.GID(localRow);
      int contigRow = rowMap.GID(localRow);

      TEUCHOS_ASSERT(globalRow>=0);
      TEUCHOS_ASSERT(contigRow>=0);

      // extract a global row copy
      int err = localA.ExtractGlobalRowCopy(globalRow, maxNumEntries, numEntries, &values[0], &indices[0]);
      TEUCHOS_ASSERT(err==0);

      int numOwnedCols = 0;
      for(int localCol=0;localCol<numEntries;localCol++) {
         int globalCol = indices[localCol];

         // determinate which block this column ID is in
         int block = globalCol / numGlobalVars;
         
         bool inFamily = true; 
 
         // test the beginning of the block
         inFamily &= (block*numGlobalVars+colBlockOffset <= globalCol);
         inFamily &= ((block*numGlobalVars+colBlockOffset+colFamilyCnt) > globalCol);

         // is this column in the variable family
         if(inFamily) {
            int familyOffset = globalCol-(block*numGlobalVars+colBlockOffset);

            colIndices[numOwnedCols] = block*colFamilyCnt + familyOffset;
            colValues[numOwnedCols] = values[localCol];

            numOwnedCols++;
         }
      }

      // insert it into the new matrix
      mat.SumIntoGlobalValues(contigRow,numOwnedCols,&colValues[0],&colIndices[0]);
   }
}
int test_AztecWrappers::test1()
{
#ifdef HAVE_FEI_AZTECOO
    int localSize = 3, globalSize = localSize*numProcs_;
    int localOffset = localSize*localProc_;
    int i;

    std::vector<int> update(localSize);
    for(i=0; i<localSize; i++) update[i] = localOffset+i;

    fei::SharedPtr<fei_trilinos::Aztec_Map> map(
        new fei_trilinos::Aztec_Map(globalSize, localSize, &update[0], localOffset, comm_));

    fei_trilinos::AztecDMSR_Matrix* matrix = new fei_trilinos::AztecDMSR_Matrix(map);

    std::vector<int> elemrows(localSize);
    std::vector<int> elemcols(globalSize);
    double** elemcoefs = new double*[localSize];
    for(int j=0; j<globalSize; ++j) elemcols[j] = j;
    for(i=0; i<localSize; ++i) {
        elemrows[i] = localOffset+i;
        elemcoefs[i] = new double[globalSize];
        for(int j=0; j<globalSize; ++j) {
            elemcoefs[i][j] = (double)(localOffset+i+j);
        }
    }

    std::vector<std::vector<int> > colIndices(localSize);
    std::vector<std::vector<double> > values(localSize);
    std::vector<int> rowLengths(localSize);
    std::vector<int*> colPtrs(localSize);
    int nnzeros = 0;

    for(i=0; i<localSize; i++) {
        int diagEntry = 0;
        int row = i+localOffset;
        for(int j=0; j<globalSize; j++) {
            int col = j;
            if (col == row) diagEntry = 1;
            colIndices[i].push_back(col);
            values[i].push_back((double)(row+col));
        }
        rowLengths[i] = colIndices[i].size() - diagEntry;
        nnzeros += rowLengths[i] + 1;
        colPtrs[i] = &(colIndices[i][0]);
    }

    matrix->allocate( &rowLengths[0] );

    if (!(matrix->isAllocated())) {
        ERReturn(-1);
    }

    if (matrix->getNumNonZeros() != nnzeros) {
        ERReturn(-1);
    }

    CHK_ERR( fill_DMSR(*matrix, localOffset, colIndices, values, true) );

    int* rowinds = &elemrows[0];
    int* colinds = &elemcols[0];

    CHK_ERR( matrix->sumIntoRow(localSize, rowinds, globalSize, colinds, elemcoefs) );

    for(i=0; i<localSize; ++i) {
        for(int j=0; j<globalSize; ++j) values[i][j] *= 2.0;
    }

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    for(i=0; i<localSize; ++i) {
        for(int j=0; j<globalSize; ++j) values[i][j] /= 2.0;
    }

    CHK_ERR( fill_DMSR(*matrix, localOffset, colIndices, values, false) );

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    if (matrix->writeToFile("A_Az_notFilled.mtx") != true) {
        ERReturn(-1);
    }

    if (matrix->readFromFile("A_Az_notFilled.mtx") != true) {
        ERReturn(-1);
    }

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    matrix->fillComplete();

    if (!(matrix->isFilled())) {
        ERReturn(-1);
    }

    if (matrix->writeToFile("A_Az_filled.mtx") != true) {
        ERReturn(-1);
    }

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    CHK_ERR( fill_DMSR(*matrix, localOffset, colIndices, values, false) );

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    matrix->put(0.0);

    CHK_ERR( fill_DMSR(*matrix, localOffset, colIndices, values, true) );

    CHK_ERR( matrix->sumIntoRow(localSize, rowinds, globalSize, colinds, elemcoefs) );

    for(i=0; i<localSize; ++i) {
        for(int j=0; j<globalSize; ++j) values[i][j] *= 2.0;
        delete [] elemcoefs[i];
    }
    delete [] elemcoefs;

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    if (matrix->writeToFile("A_Az_filled2.mtx") != true) {
        ERReturn(-1);
    }

    if (matrix->readFromFile("A_Az_filled2.mtx") != true) {
        ERReturn(-1);
    }

    CHK_ERR( compare_DMSR_contents(*matrix, localOffset, colIndices, values) );

    delete matrix;
#endif
    return(0);
}
inline void RandomSparseMatrixBuilder<Scalar>
::initOp(double onProcDensity,
  double offProcDensity)
{
  int rank = MPIComm::world().getRank();
  int nProc = MPIComm::world().getNProc();

  RCP<MatrixFactory<double> > mFact 
    = this->vecType().createMatrixFactory(this->domain(), this->range());

  int colDimension = this->domain().dim();
  int rowDimension = this->range().dim();
  int numLocalCols = colDimension / nProc;
  int numLocalRows = rowDimension / nProc;
  int lowestLocalRow = numLocalRows * rank;

  int lowestLocalCol = numLocalCols * rank;
  int highestLocalCol = numLocalCols * (rank+1) - 1;


  IncrementallyConfigurableMatrixFactory* icmf 
    = dynamic_cast<IncrementallyConfigurableMatrixFactory*>(mFact.get());
  Array<Array<int> > colIndices(numLocalRows);
  for (int i=0; i<numLocalRows; i++)
  {
    int row = lowestLocalRow + i;

    Array<int>& cols = colIndices[i];

    while (cols.size() == 0)
    {
      for (int j=0; j<colDimension; j++)
      {
        double acceptProb;
        if (j >= lowestLocalCol && j <= highestLocalCol)
        {
          acceptProb = onProcDensity;
        }
        else
        {
          acceptProb = offProcDensity;
        }
        double p = 0.5*(ScalarTraits<double>::random() + 1.0);

        if (p < acceptProb)
        {
          cols.append(j);
        }
      }
      if (cols.size()>0)
      {
        icmf->initializeNonzerosInRow(row, colIndices[i].size(),
          &(colIndices[i][0]));
      }
    }
        
  }
  icmf->finalize();
      
  op_ = mFact->createMatrix();
      
  RCP<LoadableMatrix<double> > mat = op_.matrix();

  /* fill in with the Laplacian operator */
  for (int i=0; i<numLocalRows; i++)
  {
    int row = lowestLocalRow + i;
    const Array<int>& cols = colIndices[i];
    Array<Scalar> colVals(cols.size());
    for (int j=0; j<cols.size(); j++)
    {
      colVals[j] = ScalarTraits<Scalar>::random();
    }
    if (cols.size() > 0)
    {
      mat->addToRow(row, colIndices[i].size(), 
        &(colIndices[i][0]), &(colVals[0]));
    }
  }
}
//----------------------------------------------------------------------------
int snl_fei::LinearSystem_General::enforceEssentialBC_LinSysCore()
{
  fei::Matrix* matptr = matrix_.get();
  fei::MatrixReducer* matred = dynamic_cast<fei::MatrixReducer*>(matptr);
  if (matred != NULL) {
    matptr = matred->getTargetMatrix().get();
  }

  fei::Matrix_Impl<LinearSystemCore>* lscmatrix =
    dynamic_cast<fei::Matrix_Impl<LinearSystemCore>*>(matptr);
  if (lscmatrix == 0) {
    return(-1);
  }

  int localsize = matrixGraph_->getRowSpace()->getNumIndices_Owned();
  fei::SharedPtr<fei::Reducer> reducer = matrixGraph_->getReducer();
  if (matrixGraph_->getGlobalNumSlaveConstraints() > 0) {
    localsize = reducer->getLocalReducedEqns().size();
  }

  fei::SharedPtr<fei::FillableMat> inner(new fei::FillableMat);
  bool zeroSharedRows = false;
  fei::SharedPtr<fei::Matrix_Impl<fei::FillableMat> > matrix;
  matrix.reset(new fei::Matrix_Impl<fei::FillableMat>(inner, matrixGraph_, localsize, zeroSharedRows));

  fei::SharedPtr<fei::SparseRowGraph> remoteGraph =
    matrixGraph_->getRemotelyOwnedGraphRows();

  if (!BCenforcement_no_column_mod_) {
    CHK_ERR( snl_fei::gatherRemoteEssBCs(*essBCvalues_, remoteGraph.get(), *matrix) );
  }

  unsigned numBCRows = inner->getNumRows();

  if (output_stream_ != NULL && output_level_ >= fei::BRIEF_LOGS) {
    FEI_OSTREAM& os = *output_stream_;
    os << "#enforceEssentialBC_LinSysCore RemEssBCs to enforce: "
       << numBCRows << FEI_ENDL;
  }

  if (numBCRows > 0 && !BCenforcement_no_column_mod_) {
    std::vector<int*> colIndices(numBCRows);
    std::vector<double*> coefs(numBCRows);
    std::vector<int> colIndLengths(numBCRows);

    fei::CSRMat csrmat(*inner);
    fei::SparseRowGraph& srg = csrmat.getGraph();

    int numEqns = csrmat.getNumRows();
    int* eqns = &(srg.rowNumbers[0]);
    int* rowOffsets = &(srg.rowOffsets[0]);

    for(int i=0; i<numEqns; ++i) {
      colIndices[i] = &(srg.packedColumnIndices[rowOffsets[i]]);
      coefs[i] = &(csrmat.getPackedCoefs()[rowOffsets[i]]);
      colIndLengths[i] = rowOffsets[i+1] - rowOffsets[i];
    }

    int** colInds = &colIndices[0];
    int* colIndLens = &colIndLengths[0];
    double** BCcoefs = &coefs[0];

    if (output_stream_ != NULL && output_level_ > fei::BRIEF_LOGS) {
      FEI_OSTREAM& os = *output_stream_;
      for(int i=0; i<numEqns; ++i) {
        os << "remBCeqn: " << eqns[i] << ", inds/coefs: ";
        for(int j=0; j<colIndLens[i]; ++j) {
          os << "("<<colInds[i][j]<<","<<BCcoefs[i][j]<<") ";
        }
        os << FEI_ENDL;
      }
    }

    int errcode = lscmatrix->getMatrix()->enforceRemoteEssBCs(numEqns,
							      eqns,
							      colInds,
							      colIndLens,
							      BCcoefs);
    if (errcode != 0) {
      return(errcode);
    }
  }

  int numEqns = essBCvalues_->size();
  if (numEqns > 0) {
    int* eqns = &(essBCvalues_->indices())[0];
    double* bccoefs = &(essBCvalues_->coefs())[0];
    std::vector<double> ones(numEqns, 1.0);

    return(lscmatrix->getMatrix()->enforceEssentialBC(eqns, &ones[0],
						    bccoefs, numEqns));
  }

  return(0);
}