C++ (Cpp) Epetra_CrsGraph::ExtractMyRowView 예제들

프로그래밍 언어: C++ (Cpp)

클래스/타입: Epetra_CrsGraph

메소드/함수: ExtractMyRowView

hotexamples.com에서의 예제들: 10

C++ (Cpp) Epetra_CrsGraph::ExtractMyRowView - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C++ (Cpp)의 Epetra_CrsGraph::ExtractMyRowView에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

RowMap(13)

ExtractMyRowView(10)

FillComplete(10)

NumMyRows(8)

ColMap(8)

NumMyIndices(4)

ExtractGlobalRowCopy(4)

Comm(4)

InsertMyIndices(4)

InsertGlobalIndices(4)

ExtractMyRowCopy(3)

NumMyBlockRows(3)

GRID64(3)

GlobalMaxNumIndices(3)

Import(3)

MaxNumIndices(2)

LCID(2)

NumGlobalIndices(2)

NumGlobalNonzeros(2)

NumMyNonzeros(2)

NumGlobalRows64(2)

DomainMap(2)

NumGlobalCols(2)

FindGlobalIndexLoc(2)

RangeMap(2)

NumGlobalNonzeros64(2)

Importer(2)

NumGlobalRows(1)

ExtractGlobalRowView(1)

NumMyBlockCols(1)

NumMyCols(1)

OptimizeStorage(1)

RemoveMyIndices(1)

NumMyEntries(1)

NoDiagonal(1)

NumGlobalEntries(1)

NumGlobalCols64(1)

NumGlobalBlockRows64(1)

NumGlobalBlockRows(1)

NumGlobalBlockCols64(1)

MyLRID(1)

LRID(1)

IndicesAreLocal(1)

IndicesAreGlobal(1)

IndexBase64(1)

ImportMap(1)

GCID64(1)

Filled(1)

Export(1)

MyGRID(1)

예제 #1

파일 보기

파일: Ifpack_CrsRiluk.cpp 프로젝트: cakeisalie/oomphlib_003

//==============================================================================
int Ifpack_CrsRiluk::BlockGraph2PointGraph(const Epetra_CrsGraph & BG, Epetra_CrsGraph & PG, bool Upper) {

  if (!BG.IndicesAreLocal()) {EPETRA_CHK_ERR(-1);} // Must have done FillComplete on BG

  int * ColFirstPointInElementList = BG.RowMap().FirstPointInElementList();
  int * ColElementSizeList = BG.RowMap().ElementSizeList();
  if (BG.Importer()!=0) {
    ColFirstPointInElementList = BG.ImportMap().FirstPointInElementList();
    ColElementSizeList = BG.ImportMap().ElementSizeList();
  }

  int Length = (BG.MaxNumIndices()+1) * BG.ImportMap().MaxMyElementSize();
  vector<int> tmpIndices(Length);

  int BlockRow, BlockOffset, NumEntries;
  int NumBlockEntries;
  int * BlockIndices;

  int NumMyRows_tmp = PG.NumMyRows();

  for (int i=0; i<NumMyRows_tmp; i++) {
    EPETRA_CHK_ERR(BG.RowMap().FindLocalElementID(i, BlockRow, BlockOffset));
    EPETRA_CHK_ERR(BG.ExtractMyRowView(BlockRow, NumBlockEntries, BlockIndices));

    int * ptr = &tmpIndices[0]; // Set pointer to beginning of buffer

    int RowDim = BG.RowMap().ElementSize(BlockRow);
    NumEntries = 0;

    // This next line make sure that the off-diagonal entries in the block diagonal of the 
    // original block entry matrix are included in the nonzero pattern of the point graph
    if (Upper) {
      int jstart = i+1;
      int jstop = EPETRA_MIN(NumMyRows_tmp,i+RowDim-BlockOffset);
      for (int j= jstart; j< jstop; j++) {*ptr++ = j; NumEntries++;}
    }

    for (int j=0; j<NumBlockEntries; j++) {
      int ColDim = ColElementSizeList[BlockIndices[j]];
      NumEntries += ColDim;
      assert(NumEntries<=Length); // Sanity test
      int Index = ColFirstPointInElementList[BlockIndices[j]];
      for (int k=0; k < ColDim; k++) *ptr++ = Index++;
    }

    // This next line make sure that the off-diagonal entries in the block diagonal of the 
    // original block entry matrix are included in the nonzero pattern of the point graph
    if (!Upper) {
      int jstart = EPETRA_MAX(0,i-RowDim+1);
      int jstop = i;
      for (int j = jstart; j < jstop; j++) {*ptr++ = j; NumEntries++;}
    }

    EPETRA_CHK_ERR(PG.InsertMyIndices(i, NumEntries, &tmpIndices[0]));
  }

  SetAllocated(true);

  return(0);
}

예제 #2

파일 보기

파일: Petra_Converters_64.cpp 프로젝트: gahansen/Albany

//EpetraCrsMatrix_To_TpetraCrsMatrix: copies Epetra_CrsMatrix to its analogous Tpetra_CrsMatrix
Teuchos::RCP<Tpetra_CrsMatrix> Petra::EpetraCrsMatrix_To_TpetraCrsMatrix(const Epetra_CrsMatrix& epetraCrsMatrix_,
                                                               const Teuchos::RCP<const Teuchos::Comm<int> >& commT_)
{
  //get row map of Epetra::CrsMatrix & convert to Tpetra::Map
  auto tpetraRowMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.RowMap(), commT_);

  //get col map of Epetra::CrsMatrix & convert to Tpetra::Map
  auto tpetraColMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.ColMap(), commT_);

  //get CrsGraph of Epetra::CrsMatrix & convert to Tpetra::CrsGraph
  const Epetra_CrsGraph epetraCrsGraph_ = epetraCrsMatrix_.Graph();
  std::size_t maxEntries = epetraCrsGraph_.GlobalMaxNumIndices();
  Teuchos::RCP<Tpetra_CrsGraph> tpetraCrsGraph_ = Teuchos::rcp(new Tpetra_CrsGraph(tpetraRowMap_, tpetraColMap_, maxEntries));

  for (LO i=0; i<epetraCrsGraph_.NumMyRows(); i++) {
     LO NumEntries; LO *Indices;
     epetraCrsGraph_.ExtractMyRowView(i, NumEntries, Indices);
     tpetraCrsGraph_->insertLocalIndices(i, NumEntries, Indices);
  }
  tpetraCrsGraph_->fillComplete();

  //convert Epetra::CrsMatrix to Tpetra::CrsMatrix, after creating Tpetra::CrsMatrix based on above Tpetra::CrsGraph
  Teuchos::RCP<Tpetra_CrsMatrix> tpetraCrsMatrix_ = Teuchos::rcp(new Tpetra_CrsMatrix(tpetraCrsGraph_));
  tpetraCrsMatrix_->setAllToScalar(0.0);

  for (LO i=0; i<epetraCrsMatrix_.NumMyRows(); i++) {
     LO NumEntries; LO *Indices; ST *Values;
     epetraCrsMatrix_.ExtractMyRowView(i, NumEntries, Values, Indices);
     tpetraCrsMatrix_->replaceLocalValues(i, NumEntries, Values, Indices);
  }
  tpetraCrsMatrix_->fillComplete();

  return tpetraCrsMatrix_;

}

예제 #3

파일 보기

파일: N_TOP_Indexor.C 프로젝트: Xycedev/Xyce

//-----------------------------------------------------------------------------
// Function      : Indexor::setupAcceleratedMatrixIndexing
// Purpose       :
// Special Notes :
// Scope         : public
// Creator       : Rob Hoekstra, SNL, Parallel Computational Sciences
// Creation Date : 08/23/02
//-----------------------------------------------------------------------------
bool Indexor::setupAcceleratedMatrixIndexing( const std::string & graph_name )
{
  Epetra_CrsGraph * graph = 0;

  assert( pdsMgr_ != 0 );
  // Never, EVER do work inside an assert argument, or that work will not
  // be done when asserts are disabled.
  graph = pdsMgr_->getMatrixGraph( graph_name );
  assert( graph != 0 );

  int NumRows = graph->NumMyRows();
  matrixIndexMap_.clear();
  matrixIndexMap_.resize( NumRows );

  int NumElements;
  int * Elements;
  for( int i = 0; i < NumRows; ++i )
  {
    graph->ExtractMyRowView( i, NumElements, Elements );
    for( int j = 0; j < NumElements; ++j ) matrixIndexMap_[i][ Elements[j] ] = j;
  }

  accelMatrixIndex_ = true;

  return true;
}

예제 #4

파일 보기

파일: N_TOP_Indexor.C 프로젝트: Xycedev/Xyce

//-----------------------------------------------------------------------------
// Function      : Indexor::matrixGlobalToLocal
// Purpose       :
// Special Notes :
// Scope         : public
// Creator       : Rob Hoekstra, SNL, Parallel Computational Sciences
// Creation Date : 08/23/02
//-----------------------------------------------------------------------------
bool Indexor::matrixGlobalToLocal( const std::string & graph_name,
                                         const std::vector<int> & gids,
                                         std::vector< std::vector<int> > & stamp )
{
  Epetra_CrsGraph * graph = 0;

  assert( pdsMgr_ != 0 );
  // Never, EVER do work inside an assert argument, or that work will not
  // be done when asserts are disabled.
  graph = pdsMgr_->getMatrixGraph( graph_name );
  assert( graph != 0 );

  int numRows = stamp.size();

  int numElements;
  int * elements;

  if( accelMatrixIndex_ )
  {
    for( int i = 0; i < numRows; ++i )
    {
      int RowLID = graph->LRID(gids[i]);
      int NumCols = stamp[i].size();
      for( int j = 0; j < NumCols; ++j )
      {
        int lid = graph->LCID(stamp[i][j]);
        stamp[i][j] = matrixIndexMap_[RowLID][lid];
      }
    }
  }
  else
  {
    for( int i = 0; i < numRows; ++i )
    {
      graph->ExtractMyRowView( graph->LRID(gids[i]), numElements, elements );

      std::map<int,int> indexToOffsetMap;
      for( int j = 0; j < numElements; ++j ) indexToOffsetMap[ elements[j] ] = j;

      int numCols = stamp[i].size();
      for( int j = 0; j < numCols; ++j )
      {
        int lid = graph->LCID(stamp[i][j]);
//        assert( indexToOffsetMap.count(lid) );
        stamp[i][j] = indexToOffsetMap[lid];
      }
    }
  }

  return true;
}

예제 #5

파일 보기

void 
BroydenOperator::removeEntriesFromBroydenUpdate( const Epetra_CrsGraph & graph )
{

  int numRemoveIndices ;
  int * removeIndPtr   ;
  int ierr             ;

  cout << graph << endl;

  for( int row = 0; row < graph.NumMyRows(); ++row) 
  {
    ierr = graph.ExtractMyRowView( row, numRemoveIndices, removeIndPtr );
    if( ierr )
    {
      cout << "ERROR (" << ierr << ") : "
           << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)"
           << " - Extract indices error for row --> " << row << endl;
      throw "NOX Broyden Operator Error";
    }

    if( 0 != numRemoveIndices )
    {
      // Create a map for quick queries
      map<int, bool> removeIndTable;
      for( int k = 0; k < numRemoveIndices; ++k )
        removeIndTable[ graph.ColMap().GID(removeIndPtr[k]) ] = true;

      // Get our matrix column indices for the current row
      int numOrigIndices = 0;
      int * indPtr;

      ierr = crsMatrix->Graph().ExtractMyRowView( row, numOrigIndices, indPtr );
      if( ierr )
      {
        cout << "ERROR (" << ierr << ") : "
             << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)"
             << " - Extract indices error for row --> " << row << endl;
        throw "NOX Broyden Operator Error";
      }

      // Remove appropriate active entities
      if( retainedEntries.end() == retainedEntries.find(row) )
      {
        list<int> inds;

        for( int k = 0; k < numOrigIndices; ++k )
        {
          if( removeIndTable.end() == removeIndTable.find( crsMatrix->Graph().ColMap().GID(indPtr[k]) ) )
            inds.push_back(k);
        }

        retainedEntries[row] = inds;
      }
      else
      {
        list<int> & inds = retainedEntries[row];

        list<int>::iterator iter     = inds.begin() ,
                            iter_end = inds.end()    ;

        for( ; iter_end != iter; ++iter )
        {
          if( !removeIndTable[ *iter ] )
            inds.remove( *iter );
        }
      }

      entriesRemoved[row] = true;
    }
  }

  return;
}

예제 #6

파일 보기

파일: EpetraExt_BlockAdjacencyGraph.cpp 프로젝트: cakeisalie/oomphlib_003

  Teuchos::RCP<Epetra_CrsGraph> BlockAdjacencyGraph::compute( Epetra_CrsGraph& B, int nbrr, std::vector<int>&r, std::vector<double>& weights, bool verbose)
  {
    // Check if the graph is on one processor.
    int myMatProc = -1, matProc = -1;
    int myPID = B.Comm().MyPID();
    for (int proc=0; proc<B.Comm().NumProc(); proc++)
      {
	if (B.NumGlobalEntries() == B.NumMyEntries())
	  myMatProc = myPID;
      }
    B.Comm().MaxAll( &myMatProc, &matProc, 1 );
    
    if( matProc == -1)
      { cout << "FAIL for Global!  All CrsGraph entries must be on one processor!\n"; abort(); }
    
    int i= 0, j = 0, k, l = 0, p, pm, q = -1, ns;
    int tree_height;
    int error = -1;    /* error detected, possibly a problem with the input */
    int nrr;           /* number of rows in B */
    int nzM = 0;       /* number of edges in graph */
    int m = 0;         /* maximum number of nonzeros in any block row of B */
    int* colstack = 0; /* stack used to process each block row */
    int* bstree = 0;   /* binary search tree */
    std::vector<int> Mi, Mj, Mnum(nbrr+1,0);
    nrr = B.NumMyRows();
    if ( matProc == myPID && verbose )
      std::printf(" Matrix Size = %d      Number of Blocks = %d\n",nrr, nbrr);
    else
      nrr = -1;     /* Prevent processor from doing any computations */
    bstree = csr_bst(nbrr);  /* 0 : nbrr-1 */
    tree_height = ceil31log2(nbrr) + 1;
    error = -1;

    l = 0; j = 0; m = 0;
    for( i = 0; i < nrr; i++ ){
      if( i >= r[l+1] ){
	++l;                 /* new block row */
	m = EPETRA_MAX(m,j) ;   /* nonzeros in block row */
	j = B.NumGlobalIndices(i);
      }else{
	j += B.NumGlobalIndices(i);
      }
    }
    /* one more time for the final block */
     m = EPETRA_MAX(m,j) ;   /* nonzeros in block row */

    colstack = (int*) malloc( EPETRA_MAX(m,1) * sizeof(int) );
    // The compressed graph is actually computed twice,
    // due to concerns about memory limitations.  First, 
    // without memory allocation, just nzM is computed.  
    // Next Mj is allocated. Then, the second time, the
    // arrays are actually populated.
    nzM = 0; q = -1; l = 0;
    int * indices;
    int numEntries;
    for( i = 0; i <= nrr; i++ ){
      if( i >= r[l+1] ){
	if( q > 0 ) std::qsort(colstack,q+1,sizeof(int),compare_ints); /* sort stack */
	if( q >= 0 ) ns = 1; /* l, colstack[0] M */
	for( j=1; j<=q ; j++ ){ /* delete copies */
	  if( colstack[j] > colstack[j-1] ) ++ns;
	}
	nzM += ns; /*M->p[l+1] = M->p[l] + ns;*/
	++l;
	q = -1;
      }
      if( i < nrr ){
	B.ExtractMyRowView( i, numEntries, indices );
	for( k = 0; k < numEntries; k++){
	  j = indices[k];  ns = 0; p = 0;
	  while( (r[bstree[p]] > j)  ||  (j >= r[bstree[p]+1])  ){
	    if( r[bstree[p]] > j){
	      p = 2*p+1;
	    }else{
	      if( r[bstree[p]+1] <= j) p = 2*p+2;
	    }
	    ++ns;
	    if( p > nbrr || ns > tree_height ) {
	      error = j;
	      std::printf("error: p %d  nbrr %d  ns %d %d\n",p,nbrr,ns,j); break;
	    }
	  }
	  colstack[++q] = bstree[p];
	}
	//if( error >-1 ){ std::printf("%d\n",error); break; }
        // p > nbrr is a fatal error that is ignored
      }
    }
    
    if ( matProc == myPID && verbose )
      std::printf("nzM =  %d \n", nzM );
    Mi.resize( nzM );
    Mj.resize( nzM );
    q = -1; l = 0; pm = -1;
    for( i = 0; i <= nrr; i++ ){
      if( i >= r[l+1] ){
	if( q > 0 ) std::qsort(colstack,q+1,sizeof(colstack[0]),compare_ints); /* sort stack */
	if( q >= 0 ){
	  Mi[++pm] = l;
	  Mj[pm] = colstack[0];
	}
	for( j=1; j<=q ; j++ ){ /* delete copies */
	  if( colstack[j] > colstack[j-1] ){ /* l, colstack[j] */
	    Mi[++pm] = l;
	    Mj[pm] = colstack[j];
	  }
	}
	++l;
	Mnum[l] = pm + 1;
	
	/* sparse row format: M->p[l+1] = M->p[l] + ns; */
	q = -1;
      }
      if( i < nrr ){
	B.ExtractMyRowView( i, numEntries, indices );
	for( k = 0; k < numEntries; k++){
	  j = indices[k]; ns = 0; p = 0;
	  while( (r[bstree[p]] > j)  ||  (j >= r[bstree[p]+1])  ){
	    if( r[bstree[p]] > j){
	      p = 2*p+1;
	    }else{
	      if( r[bstree[p]+1] <= j) p = 2*p+2;
	    }
	    ++ns;
	  }
	  colstack[++q] = bstree[p];
	}
      }
    }
    if ( bstree ) free ( bstree );
    if ( colstack ) free( colstack );
    
    // Compute weights as number of rows in each block.
    weights.resize( nbrr );
    for( l=0; l<nbrr; l++) weights[l] = r[l+1] - r[l];
    
    // Compute Epetra_CrsGraph and return
    Teuchos::RCP<Epetra_Map> newMap;
    if ( matProc == myPID )
      newMap = Teuchos::rcp( new Epetra_Map(nbrr, nbrr, 0, B.Comm() ) );
    else
      newMap = Teuchos::rcp( new Epetra_Map( nbrr, 0, 0, B.Comm() ) );
    Teuchos::RCP<Epetra_CrsGraph> newGraph = Teuchos::rcp( new Epetra_CrsGraph( Copy, *newMap, 0 ) );
    for( l=0; l<newGraph->NumMyRows(); l++) {
      newGraph->InsertGlobalIndices( l, Mnum[l+1]-Mnum[l], &Mj[Mnum[l]] );
    }
    newGraph->FillComplete();
    
    return (newGraph);  
  }

예제 #7

파일 보기

파일: cxx_main.cpp 프로젝트: KineticTheory/Trilinos

int check(Epetra_CrsGraph& L, Epetra_CrsGraph& U, Ifpack_IlukGraph& LU,
          int NumGlobalRows1, int NumMyRows1, int LevelFill1, bool verbose) {
  using std::cout;
  using std::endl;

  int i, j;
  int NumIndices, * Indices;
  int NumIndices1, * Indices1;

  bool debug = true;

  Epetra_CrsGraph& L1 = LU.L_Graph();
  Epetra_CrsGraph& U1 = LU.U_Graph();

  // Test entries and count nonzeros

  int Nout = 0;

  for (i=0; i<LU.NumMyRows(); i++) {

    assert(L.ExtractMyRowView(i, NumIndices, Indices)==0);
    assert(L1.ExtractMyRowView(i, NumIndices1, Indices1)==0);
    assert(NumIndices==NumIndices1);
    for (j=0; j<NumIndices1; j++) {
      if (debug &&(Indices[j]!=Indices1[j])) {
        int MyPID = L.RowMap().Comm().MyPID();
        cout << "Proc " << MyPID
             << " Local Row = " << i
             << "  L.Indices["<< j <<"]  = " << Indices[j]
             << " L1.Indices["<< j <<"] = " << Indices1[j] << endl;
      }
      assert(Indices[j]==Indices1[j]);
    }
    Nout += (NumIndices-NumIndices1);

    assert(U.ExtractMyRowView(i, NumIndices, Indices)==0);
    assert(U1.ExtractMyRowView(i, NumIndices1, Indices1)==0);
    assert(NumIndices==NumIndices1);
    for (j=0; j<NumIndices1; j++)  {
      if (debug &&(Indices[j]!=Indices1[j])) {
        int MyPID = L.RowMap().Comm().MyPID();
        cout << "Proc " << MyPID
             << " Local Row = " << i
             << "  U.Indices["<< j <<"]  = " << Indices[j]
             << " U1.Indices["<< j <<"] = " << Indices1[j] << endl;
      }
      assert(Indices[j]==Indices1[j]);
    }
    Nout += (NumIndices-NumIndices1);
  }

  // Test query functions

  int NumGlobalRows = LU.NumGlobalRows();
  if (verbose) cout << "\n\nNumber of Global Rows = " << NumGlobalRows << endl<< endl;

  assert(NumGlobalRows==NumGlobalRows1);

  int NumGlobalNonzeros = LU.NumGlobalNonzeros();
  if (verbose) cout << "\n\nNumber of Global Nonzero entries = "
                    << NumGlobalNonzeros << endl<< endl;

  int NoutG = 0;

  L.RowMap().Comm().SumAll(&Nout, &NoutG, 1);

  assert(NumGlobalNonzeros==L.NumGlobalNonzeros()+U.NumGlobalNonzeros()-NoutG);

  int NumMyRows = LU.NumMyRows();
  if (verbose) cout << "\n\nNumber of Rows = " << NumMyRows << endl<< endl;

  assert(NumMyRows==NumMyRows1);

  int NumMyNonzeros = LU.NumMyNonzeros();
  if (verbose) cout << "\n\nNumber of Nonzero entries = " << NumMyNonzeros << endl<< endl;

  assert(NumMyNonzeros==L.NumMyNonzeros()+U.NumMyNonzeros()-Nout);

  if (verbose) cout << "\n\nLU check OK" << endl<< endl;

  return(0);
}

예제 #8

파일 보기

파일: ispatest_lbeval_utils.cpp 프로젝트: 00liujj/trilinos

void show_matrix(const char *txt, const Epetra_CrsGraph &graph, const Epetra_Comm &comm)
{
  int me = comm.MyPID();

  if (comm.NumProc() > 10){
    if (me == 0){
      std::cerr << txt << std::endl;
      std::cerr << "Printed matrix format only works for 10 or fewer processes" << std::endl;
    }
    return;
  }

  const Epetra_BlockMap &rowmap = graph.RowMap();
  const Epetra_BlockMap &colmap = graph.ColMap();

  int myRows = rowmap.NumMyElements();
  int numRows = graph.NumGlobalRows();
  int numCols = graph.NumGlobalCols();
  int base = rowmap.IndexBase();

  if ((numRows > 200) || (numCols > 500)){
    if (me == 0){
      std::cerr << txt << std::endl;
      std::cerr << "show_matrix: problem is too large to display" << std::endl;
    }
    return;
  }

  int *myA = new int [numRows * numCols];
  memset(myA, 0, sizeof(int) * numRows * numCols);

  int *myIndices;

  int *myRowGIDs = rowmap.MyGlobalElements();

  for (int i=0; i< myRows; i++){
    int myRowLID = rowmap.LID(myRowGIDs[i]);

    int numEntries = graph.NumMyIndices(myRowLID);

    if (numEntries > 0){
      int rc = graph.ExtractMyRowView(myRowLID, numEntries, myIndices);
      if (rc){
        std::cerr << txt << std::endl;
        std::cerr << "extract graph error" << std::endl;
        return;
      }

      int *row = myA + (numCols * (myRowGIDs[i] - base));

      for (int j=0; j < numEntries; j++){
        int gid = colmap.GID(myIndices[j]);
        row[gid-base] = me+1;
      }
    }
  }

  printMatrix(txt, myA, NULL, NULL, numRows, numCols, comm);

  delete [] myA;
}

예제 #9

파일 보기

파일: cxx_main.cpp 프로젝트: 00liujj/trilinos

//==============================================================================
int check(Epetra_CrsGraph& A, int NumMyRows1, long long NumGlobalRows1, int NumMyNonzeros1,
	  long long NumGlobalNonzeros1, long long* MyGlobalElements, bool verbose)
{
  (void)MyGlobalElements;
  int ierr = 0;
	int i;
	int j;
	int forierr = 0;
  int NumGlobalIndices;
  int NumMyIndices;
	int* MyViewIndices;
  int MaxNumIndices = A.MaxNumIndices();
  int* MyCopyIndices = new int[MaxNumIndices];
  long long* GlobalCopyIndices = new long long[MaxNumIndices];

  // Test query functions

  int NumMyRows = A.NumMyRows();
  if(verbose) cout << "Number of local Rows = " << NumMyRows << endl;

  EPETRA_TEST_ERR(!(NumMyRows==NumMyRows1),ierr);

  int NumMyNonzeros = A.NumMyNonzeros();
  if(verbose) cout << "Number of local Nonzero entries = " << NumMyNonzeros << endl;

  EPETRA_TEST_ERR(!(NumMyNonzeros==NumMyNonzeros1),ierr);

  long long NumGlobalRows = A.NumGlobalRows64();
  if(verbose) cout << "Number of global Rows = " << NumGlobalRows << endl;

  EPETRA_TEST_ERR(!(NumGlobalRows==NumGlobalRows1),ierr);

  long long NumGlobalNonzeros = A.NumGlobalNonzeros64();
  if(verbose) cout << "Number of global Nonzero entries = " << NumGlobalNonzeros << endl;

  EPETRA_TEST_ERR(!(NumGlobalNonzeros==NumGlobalNonzeros1),ierr);

  // GlobalRowView should be illegal (since we have local indices)

  EPETRA_TEST_ERR(!(A.ExtractGlobalRowView(A.RowMap().MaxMyGID64(), NumGlobalIndices, GlobalCopyIndices)==-2),ierr);

  // Other binary tests

  EPETRA_TEST_ERR(A.NoDiagonal(),ierr);
  EPETRA_TEST_ERR(!(A.Filled()),ierr);
  EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MaxMyGID64())),ierr);
  EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MinMyGID64())),ierr);
  EPETRA_TEST_ERR(A.MyGRID(1+A.RowMap().MaxMyGID64()),ierr);
  EPETRA_TEST_ERR(A.MyGRID(-1+A.RowMap().MinMyGID64()),ierr);
  EPETRA_TEST_ERR(!(A.MyLRID(0)),ierr);
  EPETRA_TEST_ERR(!(A.MyLRID(NumMyRows-1)),ierr);
  EPETRA_TEST_ERR(A.MyLRID(-1),ierr);
  EPETRA_TEST_ERR(A.MyLRID(NumMyRows),ierr);

  forierr = 0;
  for(i = 0; i < NumMyRows; i++) {
    long long Row = A.GRID64(i);
    A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyIndices);
    A.ExtractMyRowView(i, NumMyIndices, MyViewIndices);
    forierr += !(NumGlobalIndices==NumMyIndices);
    for(j = 1; j < NumMyIndices; j++) EPETRA_TEST_ERR(!(MyViewIndices[j-1]<MyViewIndices[j]),ierr);
    for(j = 0; j < NumGlobalIndices; j++) {
			forierr += !(GlobalCopyIndices[j]==A.GCID64(MyViewIndices[j]));
			forierr += !(A.LCID(GlobalCopyIndices[j])==MyViewIndices[j]);
    }
  }
  EPETRA_TEST_ERR(forierr,ierr);
  forierr = 0;
  for(i = 0; i < NumMyRows; i++) {
    long long Row = A.GRID64(i);
    A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyIndices);
    A.ExtractMyRowCopy(i, MaxNumIndices, NumMyIndices, MyCopyIndices);
    forierr += !(NumGlobalIndices==NumMyIndices);
    for(j = 1; j < NumMyIndices; j++)
			EPETRA_TEST_ERR(!(MyCopyIndices[j-1]<MyCopyIndices[j]),ierr);
    for(j = 0; j < NumGlobalIndices; j++) {
			forierr += !(GlobalCopyIndices[j]==A.GCID64(MyCopyIndices[j]));
			forierr += !(A.LCID(GlobalCopyIndices[j])==MyCopyIndices[j]);
    }

  }
  EPETRA_TEST_ERR(forierr,ierr);

  delete[] MyCopyIndices;
  delete[] GlobalCopyIndices;

  if(verbose) cout << "Rows sorted check OK" << endl;

  return(ierr);
}

예제 #10

파일 보기

파일: ml_MatrixFreePreconditioner.cpp 프로젝트: haripandey/trilinos

// ============================================================================ 
int ML_Epetra::MatrixFreePreconditioner::
Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace)
{
  Epetra_Time TotalTime(Comm());

  const int NullSpaceDim = NullSpace.NumVectors();
  // get parameters from the list
  std::string PrecType = List_.get("prec: type", "hybrid");
  std::string SmootherType = List_.get("smoother: type", "Jacobi");
  std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN");
  int PolynomialDegree = List_.get("smoother: degree", 3);
  std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN");
  int MaximumIterations = List_.get("eigen-analysis: max iters", 10);
  std::string EigenType_ = List_.get("eigen-analysis: type", "cg");
  double boost = List_.get("eigen-analysis: boost for lambda max", 1.0);
  int OutputLevel = List_.get("ML output", -47);
  if (OutputLevel == -47) OutputLevel =  List_.get("output", 10);
  omega_ = List_.get("smoother: damping", omega_);
  ML_Set_PrintLevel(OutputLevel);
  bool LowMemory = List_.get("low memory", true);
  double AllocationFactor = List_.get("AP allocation factor", 0.5);

  verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5);

  // ================ //
  // check parameters //
  // ================ //

  if (PrecType == "presmoother only")
    PrecType_ = ML_MFP_PRESMOOTHER_ONLY;
  else if (PrecType == "hybrid")
    PrecType_ = ML_MFP_HYBRID;
  else if (PrecType == "additive")
    PrecType_ = ML_MFP_ADDITIVE;
  else
    ML_CHK_ERR(-3); // not recognized

  if (SmootherType == "none")
    SmootherType_ = ML_MFP_NONE;
  else if (SmootherType == "Jacobi")
    SmootherType_ = ML_MFP_JACOBI;
  else if (SmootherType == "block Jacobi")
    SmootherType_ = ML_MFP_BLOCK_JACOBI;
  else if (SmootherType == "Chebyshev")
    SmootherType_ = ML_MFP_CHEBY;
  else
    ML_CHK_ERR(-4); // not recognized

  if (AllocationFactor <= 0.0)
    ML_CHK_ERR(-1); // should be positive

  // =============================== //
  // basic checkings and some output //
  // =============================== //
  
  int OperatorDomainPoints =  Operator_.OperatorDomainMap().NumGlobalPoints();
  int OperatorRangePoints =  Operator_.OperatorRangeMap().NumGlobalPoints();
  int GraphBlockRows = Graph.NumGlobalBlockRows();
  int GraphNnz = Graph.NumGlobalNonzeros();
  NumPDEEqns_ = OperatorRangePoints / GraphBlockRows;
  NumMyBlockRows_ = Graph.NumMyBlockRows();

  if (OperatorDomainPoints != OperatorRangePoints)
    ML_CHK_ERR(-1); // only square matrices

  if (OperatorRangePoints % NumPDEEqns_ != 0)
    ML_CHK_ERR(-2); // num PDEs seems not constant

  if (verbose_)
  {
    ML_print_line("=",78);
    std::cout << "*** " << std::endl;
    std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl;
    std::cout << "***" << std::endl;
    std::cout << "Number of rows and columns      = " << OperatorDomainPoints << std::endl;
    std::cout << "Number of rows per processor    = " << OperatorDomainPoints / Comm().NumProc()
         << " (on average)" << std::endl;
    std::cout << "Number of rows in the graph     = " << GraphBlockRows << std::endl;
    std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl;
    std::cout << "Processors used in computation  = " << Comm().NumProc() << std::endl;
    std::cout << "Number of PDE equations         = " << NumPDEEqns_ << std::endl;
    std::cout << "Null space dimension            = " << NullSpaceDim << std::endl;
    std::cout << "Preconditioner type             = " << PrecType << std::endl;
    std::cout << "Smoother type                   = " << SmootherType << std::endl;
    std::cout << "Coloring type                   = " << ColoringType << std::endl;
    std::cout << "Allocation factor               = " << AllocationFactor << std::endl;
    std::cout << "Number of V-cycles for C        = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl;
    std::cout << std::endl;
  }

  ResetStartTime();

  // ==================================== //
  // compute the inverse of the diagonal, //
  // control that no elements are zero.   //
  // ==================================== //
  
  for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i)
    if ((*InvPointDiagonal_)[i] != 0.0)
      (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i];

  // ========================================================= //
  // Setup the smoother. I need to extract the block diagonal  //
  // only if block Jacobi is used. For Chebyshev, I scale with //
  // the point diagonal only. In this latter case, I need to   //
  // compute lambda_max of the scaled operator.                //
  // ========================================================= //
  
  // probes for the block diagonal of the matrix.
  if (SmootherType_ == ML_MFP_JACOBI ||
      SmootherType_ == ML_MFP_NONE)
  {
    // do-nothing here
  }
  else if (SmootherType_ == ML_MFP_BLOCK_JACOBI)
  {
    if (verbose_);
      std::cout << "Diagonal coloring type         = " << DiagonalColoringType << std::endl;
    ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType));

    AddAndResetStartTime("block diagonal construction", true);
  }
  else if (SmootherType_ == ML_MFP_CHEBY)
  {
    double lambda_min = 0.0;
    double lambda_max = 0.0;
    Teuchos::ParameterList IFPACKList;

    if (EigenType_ == "power-method")
    {
      ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_,
                                               MaximumIterations, lambda_max));
    }
    else if(EigenType_ == "cg")
    {
      ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_,
                                      MaximumIterations, lambda_min, 
                                      lambda_max));
    }
    else
      ML_CHK_ERR(-1); // not recognized

    if (verbose_)
    {
      std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl;
      std::cout << "Estimating eigenvalues using " <<  EigenType_ << std::endl;
      std::cout << "lambda_min = " << lambda_min << ", ";
      std::cout << "lambda_max = " << lambda_max << std::endl;
    }

    IFPACKList.set("chebyshev: min eigenvalue", lambda_min);
    IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max);
    // FIXME: this allocates a new std::vector inside
    IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get());
    IFPACKList.set("chebyshev: degree", PolynomialDegree);

    PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_)));
    if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error?

    IFPACKList.set("chebyshev: zero starting solution", true);
    ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList));
    ML_CHK_ERR(PreSmoother_->Initialize());
    ML_CHK_ERR(PreSmoother_->Compute());

    PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_)));
    if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error?

    IFPACKList.set("chebyshev: zero starting solution", false);
    ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList));
    ML_CHK_ERR(PostSmoother_->Initialize());
    ML_CHK_ERR(PostSmoother_->Compute());
  }

  // ========================================================= //
  // building P and R for block graph. This is done by working //
  // on the Graph_ object. Support is provided for local       //
  // aggregation schemes only so that all is basically local.  //
  // Then, build the block graph coarse problem.               //
  // ========================================================= //
  
  // ML wrapper for Graph_
  ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML);

  ML_Aggregate* BlockAggr_ML = 0;
  ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0;

  if (verbose_) std::cout << std::endl;

  ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, 
                     &CoarseGraph_ML));

  if (verbose_) std::cout << std::endl;

  Epetra_CrsMatrix* GraphCoarse;
  ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse));

  // used later to estimate the entries in AP
  ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML());
  ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX);

  int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row;
  Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1);
  ML_Operator_Destroy(&CoarseAP_ML);

  int NumAggregates = BlockPtent_ML->invec_leng;
  ML_Operator_Destroy(&BlockRtent_ML);
  ML_Operator_Destroy(&CoarseGraph_ML);

  AddAndResetStartTime("construction of block C, R, and P", true);
  if (verbose_) std::cout << std::endl;

  // ================================================== //
  // coloring of block graph:                           //
  // - color of block row `i' is given by `ColorMap[i]' //
  // - number of colors is ColorMap.NumColors().        //
  // ================================================== //
  
  ResetStartTime();

  CrsGraph_MapColoring* MapColoringTransform;
  
  if (ColoringType == "JONES_PLASSMAN")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN,
                                                     0, false, 0);
  else if (ColoringType == "PSEUDO_PARALLEL")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL,
                                                     0, false, 0);
  else if (ColoringType == "GREEDY")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY,
                                                     0, false, 0);
  else if (ColoringType == "LUBY")
    MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY,
                                                     0, false, 0);
  else 
    ML_CHK_ERR(-1);

  Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph()));

  // move the information from ColorMap to std::vector Colors
  const int NumColors = ColorMap->MaxNumColors();
  RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows()));
  for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i)
    (*Colors)[i] = (*ColorMap)[i];

  delete MapColoringTransform;
  delete ColorMap; ColorMap = 0;
  delete GraphCoarse;

  AddAndResetStartTime("coarse graph coloring", true);
  if (verbose_) std::cout << std::endl;

  // get some other information about the aggregates, to be used
  // in the QR factorization of the null space. NodesOfAggregate
  // contains the local ID of block rows contained in each aggregate.

  // FIXME: make it faster
  std::vector< std::vector<int> > NodesOfAggregate(NumAggregates);

  for (int i = 0; i < Graph.NumMyBlockRows(); ++i)
  {
    int AID = BlockAggr_ML->aggr_info[0][i];
    NodesOfAggregate[AID].push_back(i);
  }

  int MaxAggrSize = 0;
  for (int i = 0; i < NumAggregates; ++i)
  {
    const int& MySize = NodesOfAggregate[i].size();
    if (MySize > MaxAggrSize) MaxAggrSize = MySize;
  }

  // collect aggregate information, and mark all nodes that are
  // connected with each aggregate. These nodes will have a possible
  // nonzero entry after the matrix-matrix product between the Operator_
  // and the tentative prolongator.

  std::vector<vector<int> > aggregates(NumAggregates);
  std::vector<int>::iterator iter;

  for (int i = 0; i < NumAggregates; ++i)
    aggregates[i].reserve(MaxAggrSize);

  for (int i = 0; i < Graph.NumMyBlockRows(); ++i)
  {
    int AID = BlockAggr_ML->aggr_info[0][i];

    int NumEntries;
    int* Indices;

    Graph.ExtractMyRowView(i, NumEntries, Indices);

    for (int k = 0; k < NumEntries; ++k)
    {
      // FIXME: use hash??
      const int& GCID = Graph.ColMap().GID(Indices[k]);

      iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID);
      if (iter == aggregates[AID].end())
        aggregates[AID].push_back(GCID);
    }
  }
  
  int* BlockNodeList = Graph.ColMap().MyGlobalElements();

  // finally get rid of the ML_Aggregate structure.
  ML_Aggregate_Destroy(&BlockAggr_ML);

  const Epetra_Map& FineMap = Operator_.OperatorDomainMap();
  Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm());
  RefCountPtr<Epetra_Map> BlockNodeListMap = 
    rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(),
                       BlockNodeList, 0, Comm()));

  std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_);
  for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i)
    for (int m = 0; m < NumPDEEqns_; ++m)
      NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m;
  RefCountPtr<Epetra_Map> NodeListMap = 
    rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm()));

  AddAndResetStartTime("data structures", true);

  // ====================== //
  // process the null space //
  // ====================== //

  // CHECKME
  Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim);
  NewNullSpace.PutScalar(0.0);

  if (NullSpaceDim == 1)
  {
    double* ns_ptr = NullSpace.Values();

    for (int AID = 0; AID < NumAggregates; ++AID)
    {
      double dtemp = 0.0;
      for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++)
        for (int m = 0; m < NumPDEEqns_; ++m)
        {
          const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
          dtemp += (ns_ptr[pos] * ns_ptr[pos]);
        }
      dtemp = std::sqrt(dtemp);

      NewNullSpace[0][AID] = dtemp;

      dtemp = 1.0 / dtemp;

      for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++)
        for (int m = 0; m < NumPDEEqns_; ++m)
          ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp;
    }
  }
  else
  {
    // FIXME
    std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_);
    std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim);

    std::vector<double> work(NullSpaceDim);
    int info;

    for (int AID = 0; AID < NumAggregates; ++AID)
    {
      int MySize = NodesOfAggregate[AID].size();
      int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_;
      int lwork = NullSpaceDim;

      for (int k = 0; k < NullSpaceDim; ++k)
        for (int j = 0; j < MySize; ++j)
          for (int m = 0; m < NumPDEEqns_; ++m)
            qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = 
              NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m];

      DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], 
                 &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info);

      ML_CHK_ERR(info);

      if (work[0] > lwork) work.resize((int) work[0]);

      // the upper triangle of qr_tmp is now R, so copy that into the 
      //  new nullspace

      for (int j = 0; j < NullSpaceDim; j++)
        for (int k = j; k < NullSpaceDim; k++)
          NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k];
		 
      // to get this block of P, need to run qr_tmp through another LAPACK 
      // function:

      DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, 
                 &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info);
      ML_CHK_ERR(info); // dgeqtr returned a non-zero

      if (work[0] > lwork) work.resize((int) work[0]);

      // insert the Q block into the null space

      for (int k = 0; k < NullSpaceDim; ++k)
        for (int j = 0; j < MySize; ++j)
          for (int m = 0; m < NumPDEEqns_; ++m)
          {
            int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
            double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m];
            NullSpace[k][LRID] = val;
          }
    }
  }

  AddAndResetStartTime("null space setup", true);

  if (verbose_)
    std::cout << "Number of colors on processor " << Comm().MyPID() << " = "
        << NumColors << std::endl;
  if (verbose_)
    std::cout << "Maximum number of colors = " << NumColors << std::endl;

  RefCountPtr<Epetra_FECrsMatrix> AP;
  
  // try to get a good estimate of the nonzeros per row.
  // This is a compromize between efficiency -- that is, reduce
  // the memory allocation processes, and memory usage -- that, is
  // overestimating can actually kill the code. Basically, this is
  // all junk due to our dear friend, the Cray XT3.
  
  AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int)
                                  (AllocationFactor * AP_MaxNnzRow * NullSpaceDim)));
  if (AP.get() == 0) throw(-1);

  if (!LowMemory)
  {
    // ================================================= //
    // allocate one big chunk of memory, and use View    //             
    // to create Epetra_MultiVectors. Note that          //
    // NumColors * NullSpace can indeed be a quite large //
    // value. To reduce the memory consumption, both     //
    // ColoredAP and ExtColoredAP use the same memory    //
    // array.                                            //
    // ================================================= //
    
    Epetra_MultiVector* ColoredP;
    std::vector<double> ColoredAP_ptr;

    try
    {
      ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim);
      ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints());
    }
    catch (std::exception& rhs)
    {
      catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("the allocation of ColoredP", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    int ColoredAP_LDA = NodeListMap->NumMyPoints();

    ColoredP->PutScalar(0.0);

    for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i)
    {
      int allocated = 1;
      int NumEntries;
      int Indices;
      double Values;
      int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated,
                                    &Indices,&Values,&NumEntries);
      if (ierr < 0)
        ML_CHK_ERR(-1);

      assert (NumEntries == 1); // this is the block P
      const int& Color = (*Colors)[Indices] - 1;
      for (int k = 0; k < NumPDEEqns_; ++k)
        for (int j = 0; j < NullSpaceDim; ++j)
          (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = 
            NullSpace[j][i * NumPDEEqns_ + k];
    }

    ML_Operator_Destroy(&BlockPtent_ML);

    Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), 
                                 &ColoredAP_ptr[0], ColoredAP_LDA, 
                                 NumColors * NullSpaceDim);
    // move ColoredAP into ColoredP. This should not be required.
    // but I prefer to skip strange games with View pointers
    Operator_.Apply(*ColoredP, ColoredAP);
    *ColoredP = ColoredAP;

    // FIXME: only if NumProc > 1
    Epetra_MultiVector ExtColoredAP(View, *NodeListMap, 
                                 &ColoredAP_ptr[0], ColoredAP_LDA, 
                                 NumColors * NullSpaceDim);

    try 
    {
      Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap());
      ExtColoredAP.Import(*ColoredP, Importer, Insert);
    }
    catch (std::exception& rhs)
    {
      catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    delete ColoredP;

    AddAndResetStartTime("computation of AP", true); 

    // populate the actual AP operator, skip some controls to make it faster

    for (int i = 0; i < NumAggregates; ++i)
    {
      for (int j = 0; j < (int) (aggregates[i].size()); ++j)
      {
        int GRID = aggregates[i][j];
        int LRID = BlockNodeListMap->LID(GRID); // this is the block ID
        //assert (LRID != -1);
        int GCID = CoarseMap.GID(i * NullSpaceDim);
        //assert (GCID != -1); 
        int color = (*Colors)[i] - 1;
        for (int k = 0; k < NumPDEEqns_; ++k)
          for (int j = 0; j < NullSpaceDim; ++j)
          {
            double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k];
            if (val != 0.0)
            {
              int GRID2 = GRID * NumPDEEqns_ + k;
              int GCID2 = GCID + j;
              AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val);
              //if (ierr < 0) ML_CHK_ERR(ierr);
            }
          }
      }
    }
  }
  else
  {
    // =============================================================== //
    // apply the operator one color at-a-time. This requires NumColors //
    // cycles over BlockPtent. However, the memory requirements are    //
    // drastically reduced. As for low-memory == false, both ColoredAP //
    // and ExtColoredAP point to the same memory location.             //
    // =============================================================== //
    
    if (verbose_)
      std::cout << "Using low-memory computation for AP" << std::endl;

    Epetra_MultiVector ColoredP(FineMap, NullSpaceDim);
    std::vector<double> ColoredAP_ptr;
    try
    {
      ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints());
    }
    catch (std::exception& rhs)
    {
      catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }
    catch (...)
    {
      catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__);
      ML_CHK_ERR(-1);
    }

    Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), 
                                 &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), 
                                 NullSpaceDim);
    Epetra_MultiVector ExtColoredAP(View, *NodeListMap, 
                                 &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), 
                                 NullSpaceDim);
    Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap());

    for (int ic = 0; ic < NumColors; ++ic)
    {
      if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0)
      {
        if (ic % 20 == 0)
          std::cout << "Processing color " << flush;

        std::cout << ic << " " << flush;
        if (ic % 20 == 19 || ic == NumColors - 1)
          std::cout << std::endl;
        if (ic == NumColors - 1) std::cout << std::endl;
      }

      ColoredP.PutScalar(0.0);

      for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i)
      {
        int allocated = 1;
        int NumEntries;
        int Indices;
        double Values;
        int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated,
                                      &Indices,&Values,&NumEntries);
        if (ierr < 0 ||  // something strange in getrow
            NumEntries != 1) // this is the block P
          ML_CHK_ERR(-1);

        const int& Color = (*Colors)[Indices] - 1;
        if (Color != ic)
          continue; // skip this color for this cycle

        for (int k = 0; k < NumPDEEqns_; ++k)
          for (int j = 0; j < NullSpaceDim; ++j)
            ColoredP[j][i * NumPDEEqns_ + k] = 
              NullSpace[j][i * NumPDEEqns_ + k];
      }

      Operator_.Apply(ColoredP, ColoredAP);
      ColoredP = ColoredAP; // just to be safe

      ExtColoredAP.Import(ColoredP, Importer, Insert);

      // populate the actual AP operator, skip some controls to make it faster

      std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_);
      std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_);

      for (int i = 0; i < NumAggregates; ++i)
      {
        for (int j = 0; j < (int) (aggregates[i].size()); ++j)
        {
          int GRID = aggregates[i][j];
          int LRID = BlockNodeListMap->LID(GRID); // this is the block ID
          //assert (LRID != -1);
          int GCID = CoarseMap.GID(i * NullSpaceDim);
          //assert (GCID != -1); 
          int color = (*Colors)[i] - 1;
          if (color != ic) continue;

          for (int k = 0; k < NumPDEEqns_; ++k)
          {
            int count = 0;
            int GRID2 = GRID * NumPDEEqns_ + k;
            for (int j = 0; j < NullSpaceDim; ++j)
            {
              double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k];
              if (val != 0.0)
              {
                InsertCols[count] = GCID + j;
                InsertValues[count] = val;
                ++count;
              }
            }
            AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], 
                                   &InsertValues[0]);
          }
        }
      }
    }

    ML_Operator_Destroy(&BlockPtent_ML);
  }

  aggregates.resize(0);
  BlockNodeListMap = Teuchos::null;
  NodeListMap = Teuchos::null;

  Colors = Teuchos::null;

  AP->GlobalAssemble(false);
  AP->FillComplete(CoarseMap, FineMap);

#if 0
  try
  {
    AP->OptimizeStorage();
  }
  catch(...)
  {
    // a memory error was reported, typically ReportError.
    // We just continue with fingers crossed.
  }
#endif

  AddAndResetStartTime("computation of the final AP", true); 

  ML_Operator* AP_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML);

  // ======== //
  // create R //
  // ======== //
  
  std::vector<int> REntries(NumAggregates * NullSpaceDim);
  for (int AID = 0; AID < NumAggregates; ++AID)
  {
    for (int m = 0; m < NullSpaceDim; ++m)
      REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_;
  }

  R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true));
  REntries.resize(0);

  for (int AID = 0; AID < NumAggregates; ++AID)
  {
    const int& MySize = NodesOfAggregate[AID].size();

    // FIXME: make it faster
    for (int j = 0; j < MySize; ++j)
      for (int m = 0; m < NumPDEEqns_; ++m)
        for (int k = 0; k < NullSpaceDim; ++k)
        {
          int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m;
          int GCID = FineMap.GID(LCID);
          assert (GCID != -1);

          double& val = NullSpace[k][LCID];

          int GRID = CoarseMap.GID(AID * NullSpaceDim + k);
          int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID);
          if (ierr < 0)
            ML_CHK_ERR(-1);
        }
  }

  NodesOfAggregate.resize(0);

  R_->FillComplete(FineMap, CoarseMap);
#if 0
  try
  {
    R_->OptimizeStorage();
  }
  catch(...)
  {
    // a memory error was reported, typically ReportError.
    // We just continue with fingers crossed.
  }
#endif

  ML_Operator* R_ML = ML_Operator_Create(Comm_ML());
  ML_Operator_WrapEpetraMatrix(R_.get(), R_ML);

  AddAndResetStartTime("computation of R", true); 

  // ======== //
  // Create C //
  // ======== //

  C_ML_ = ML_Operator_Create(Comm_ML());
  ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX);

  ML_Operator_Destroy(&AP_ML);
  ML_Operator_Destroy(&R_ML);
  AP = Teuchos::null;

  C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false));
  assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap()));

  TotalTime.ResetStartTime();

  AddAndResetStartTime("computation of C", true); 

  if (verbose_)
  {
    std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; 
  }

  Teuchos::ParameterList& sublist = List_.sublist("ML list");
  sublist.set("PDE equations", NullSpaceDim);
  sublist.set("null space: type", "pre-computed");
  sublist.set("null space: dimension", NewNullSpace.NumVectors());
  sublist.set("null space: vectors", NewNullSpace.Values());

  MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true));

  assert (MLP_.get() != 0);

  IsComputed_ = true;

  AddAndResetStartTime("computation of the preconditioner for C", true); 

  if (verbose_)
  {
    std::cout << std::endl;
    std::cout << "Total CPU time for construction (all included) = ";
    std::cout << TotalCPUTime() << std::endl;
    ML_print_line("=",78);
  }

  return(0);
}