//============================================================================== int Ifpack_CrsRiluk::BlockGraph2PointGraph(const Epetra_CrsGraph & BG, Epetra_CrsGraph & PG, bool Upper) { if (!BG.IndicesAreLocal()) {EPETRA_CHK_ERR(-1);} // Must have done FillComplete on BG int * ColFirstPointInElementList = BG.RowMap().FirstPointInElementList(); int * ColElementSizeList = BG.RowMap().ElementSizeList(); if (BG.Importer()!=0) { ColFirstPointInElementList = BG.ImportMap().FirstPointInElementList(); ColElementSizeList = BG.ImportMap().ElementSizeList(); } int Length = (BG.MaxNumIndices()+1) * BG.ImportMap().MaxMyElementSize(); vector<int> tmpIndices(Length); int BlockRow, BlockOffset, NumEntries; int NumBlockEntries; int * BlockIndices; int NumMyRows_tmp = PG.NumMyRows(); for (int i=0; i<NumMyRows_tmp; i++) { EPETRA_CHK_ERR(BG.RowMap().FindLocalElementID(i, BlockRow, BlockOffset)); EPETRA_CHK_ERR(BG.ExtractMyRowView(BlockRow, NumBlockEntries, BlockIndices)); int * ptr = &tmpIndices[0]; // Set pointer to beginning of buffer int RowDim = BG.RowMap().ElementSize(BlockRow); NumEntries = 0; // This next line make sure that the off-diagonal entries in the block diagonal of the // original block entry matrix are included in the nonzero pattern of the point graph if (Upper) { int jstart = i+1; int jstop = EPETRA_MIN(NumMyRows_tmp,i+RowDim-BlockOffset); for (int j= jstart; j< jstop; j++) {*ptr++ = j; NumEntries++;} } for (int j=0; j<NumBlockEntries; j++) { int ColDim = ColElementSizeList[BlockIndices[j]]; NumEntries += ColDim; assert(NumEntries<=Length); // Sanity test int Index = ColFirstPointInElementList[BlockIndices[j]]; for (int k=0; k < ColDim; k++) *ptr++ = Index++; } // This next line make sure that the off-diagonal entries in the block diagonal of the // original block entry matrix are included in the nonzero pattern of the point graph if (!Upper) { int jstart = EPETRA_MAX(0,i-RowDim+1); int jstop = i; for (int j = jstart; j < jstop; j++) {*ptr++ = j; NumEntries++;} } EPETRA_CHK_ERR(PG.InsertMyIndices(i, NumEntries, &tmpIndices[0])); } SetAllocated(true); return(0); }
//EpetraCrsMatrix_To_TpetraCrsMatrix: copies Epetra_CrsMatrix to its analogous Tpetra_CrsMatrix Teuchos::RCP<Tpetra_CrsMatrix> Petra::EpetraCrsMatrix_To_TpetraCrsMatrix(const Epetra_CrsMatrix& epetraCrsMatrix_, const Teuchos::RCP<const Teuchos::Comm<int> >& commT_) { //get row map of Epetra::CrsMatrix & convert to Tpetra::Map auto tpetraRowMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.RowMap(), commT_); //get col map of Epetra::CrsMatrix & convert to Tpetra::Map auto tpetraColMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.ColMap(), commT_); //get CrsGraph of Epetra::CrsMatrix & convert to Tpetra::CrsGraph const Epetra_CrsGraph epetraCrsGraph_ = epetraCrsMatrix_.Graph(); std::size_t maxEntries = epetraCrsGraph_.GlobalMaxNumIndices(); Teuchos::RCP<Tpetra_CrsGraph> tpetraCrsGraph_ = Teuchos::rcp(new Tpetra_CrsGraph(tpetraRowMap_, tpetraColMap_, maxEntries)); for (LO i=0; i<epetraCrsGraph_.NumMyRows(); i++) { LO NumEntries; LO *Indices; epetraCrsGraph_.ExtractMyRowView(i, NumEntries, Indices); tpetraCrsGraph_->insertLocalIndices(i, NumEntries, Indices); } tpetraCrsGraph_->fillComplete(); //convert Epetra::CrsMatrix to Tpetra::CrsMatrix, after creating Tpetra::CrsMatrix based on above Tpetra::CrsGraph Teuchos::RCP<Tpetra_CrsMatrix> tpetraCrsMatrix_ = Teuchos::rcp(new Tpetra_CrsMatrix(tpetraCrsGraph_)); tpetraCrsMatrix_->setAllToScalar(0.0); for (LO i=0; i<epetraCrsMatrix_.NumMyRows(); i++) { LO NumEntries; LO *Indices; ST *Values; epetraCrsMatrix_.ExtractMyRowView(i, NumEntries, Values, Indices); tpetraCrsMatrix_->replaceLocalValues(i, NumEntries, Values, Indices); } tpetraCrsMatrix_->fillComplete(); return tpetraCrsMatrix_; }
//----------------------------------------------------------------------------- // Function : Indexor::setupAcceleratedMatrixIndexing // Purpose : // Special Notes : // Scope : public // Creator : Rob Hoekstra, SNL, Parallel Computational Sciences // Creation Date : 08/23/02 //----------------------------------------------------------------------------- bool Indexor::setupAcceleratedMatrixIndexing( const std::string & graph_name ) { Epetra_CrsGraph * graph = 0; assert( pdsMgr_ != 0 ); // Never, EVER do work inside an assert argument, or that work will not // be done when asserts are disabled. graph = pdsMgr_->getMatrixGraph( graph_name ); assert( graph != 0 ); int NumRows = graph->NumMyRows(); matrixIndexMap_.clear(); matrixIndexMap_.resize( NumRows ); int NumElements; int * Elements; for( int i = 0; i < NumRows; ++i ) { graph->ExtractMyRowView( i, NumElements, Elements ); for( int j = 0; j < NumElements; ++j ) matrixIndexMap_[i][ Elements[j] ] = j; } accelMatrixIndex_ = true; return true; }
//----------------------------------------------------------------------------- // Function : Indexor::matrixGlobalToLocal // Purpose : // Special Notes : // Scope : public // Creator : Rob Hoekstra, SNL, Parallel Computational Sciences // Creation Date : 08/23/02 //----------------------------------------------------------------------------- bool Indexor::matrixGlobalToLocal( const std::string & graph_name, const std::vector<int> & gids, std::vector< std::vector<int> > & stamp ) { Epetra_CrsGraph * graph = 0; assert( pdsMgr_ != 0 ); // Never, EVER do work inside an assert argument, or that work will not // be done when asserts are disabled. graph = pdsMgr_->getMatrixGraph( graph_name ); assert( graph != 0 ); int numRows = stamp.size(); int numElements; int * elements; if( accelMatrixIndex_ ) { for( int i = 0; i < numRows; ++i ) { int RowLID = graph->LRID(gids[i]); int NumCols = stamp[i].size(); for( int j = 0; j < NumCols; ++j ) { int lid = graph->LCID(stamp[i][j]); stamp[i][j] = matrixIndexMap_[RowLID][lid]; } } } else { for( int i = 0; i < numRows; ++i ) { graph->ExtractMyRowView( graph->LRID(gids[i]), numElements, elements ); std::map<int,int> indexToOffsetMap; for( int j = 0; j < numElements; ++j ) indexToOffsetMap[ elements[j] ] = j; int numCols = stamp[i].size(); for( int j = 0; j < numCols; ++j ) { int lid = graph->LCID(stamp[i][j]); // assert( indexToOffsetMap.count(lid) ); stamp[i][j] = indexToOffsetMap[lid]; } } } return true; }
void BroydenOperator::removeEntriesFromBroydenUpdate( const Epetra_CrsGraph & graph ) { int numRemoveIndices ; int * removeIndPtr ; int ierr ; cout << graph << endl; for( int row = 0; row < graph.NumMyRows(); ++row) { ierr = graph.ExtractMyRowView( row, numRemoveIndices, removeIndPtr ); if( ierr ) { cout << "ERROR (" << ierr << ") : " << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)" << " - Extract indices error for row --> " << row << endl; throw "NOX Broyden Operator Error"; } if( 0 != numRemoveIndices ) { // Create a map for quick queries map<int, bool> removeIndTable; for( int k = 0; k < numRemoveIndices; ++k ) removeIndTable[ graph.ColMap().GID(removeIndPtr[k]) ] = true; // Get our matrix column indices for the current row int numOrigIndices = 0; int * indPtr; ierr = crsMatrix->Graph().ExtractMyRowView( row, numOrigIndices, indPtr ); if( ierr ) { cout << "ERROR (" << ierr << ") : " << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)" << " - Extract indices error for row --> " << row << endl; throw "NOX Broyden Operator Error"; } // Remove appropriate active entities if( retainedEntries.end() == retainedEntries.find(row) ) { list<int> inds; for( int k = 0; k < numOrigIndices; ++k ) { if( removeIndTable.end() == removeIndTable.find( crsMatrix->Graph().ColMap().GID(indPtr[k]) ) ) inds.push_back(k); } retainedEntries[row] = inds; } else { list<int> & inds = retainedEntries[row]; list<int>::iterator iter = inds.begin() , iter_end = inds.end() ; for( ; iter_end != iter; ++iter ) { if( !removeIndTable[ *iter ] ) inds.remove( *iter ); } } entriesRemoved[row] = true; } } return; }
Teuchos::RCP<Epetra_CrsGraph> BlockAdjacencyGraph::compute( Epetra_CrsGraph& B, int nbrr, std::vector<int>&r, std::vector<double>& weights, bool verbose) { // Check if the graph is on one processor. int myMatProc = -1, matProc = -1; int myPID = B.Comm().MyPID(); for (int proc=0; proc<B.Comm().NumProc(); proc++) { if (B.NumGlobalEntries() == B.NumMyEntries()) myMatProc = myPID; } B.Comm().MaxAll( &myMatProc, &matProc, 1 ); if( matProc == -1) { cout << "FAIL for Global! All CrsGraph entries must be on one processor!\n"; abort(); } int i= 0, j = 0, k, l = 0, p, pm, q = -1, ns; int tree_height; int error = -1; /* error detected, possibly a problem with the input */ int nrr; /* number of rows in B */ int nzM = 0; /* number of edges in graph */ int m = 0; /* maximum number of nonzeros in any block row of B */ int* colstack = 0; /* stack used to process each block row */ int* bstree = 0; /* binary search tree */ std::vector<int> Mi, Mj, Mnum(nbrr+1,0); nrr = B.NumMyRows(); if ( matProc == myPID && verbose ) std::printf(" Matrix Size = %d Number of Blocks = %d\n",nrr, nbrr); else nrr = -1; /* Prevent processor from doing any computations */ bstree = csr_bst(nbrr); /* 0 : nbrr-1 */ tree_height = ceil31log2(nbrr) + 1; error = -1; l = 0; j = 0; m = 0; for( i = 0; i < nrr; i++ ){ if( i >= r[l+1] ){ ++l; /* new block row */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ j = B.NumGlobalIndices(i); }else{ j += B.NumGlobalIndices(i); } } /* one more time for the final block */ m = EPETRA_MAX(m,j) ; /* nonzeros in block row */ colstack = (int*) malloc( EPETRA_MAX(m,1) * sizeof(int) ); // The compressed graph is actually computed twice, // due to concerns about memory limitations. First, // without memory allocation, just nzM is computed. // Next Mj is allocated. Then, the second time, the // arrays are actually populated. nzM = 0; q = -1; l = 0; int * indices; int numEntries; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(int),compare_ints); /* sort stack */ if( q >= 0 ) ns = 1; /* l, colstack[0] M */ for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ) ++ns; } nzM += ns; /*M->p[l+1] = M->p[l] + ns;*/ ++l; q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; if( p > nbrr || ns > tree_height ) { error = j; std::printf("error: p %d nbrr %d ns %d %d\n",p,nbrr,ns,j); break; } } colstack[++q] = bstree[p]; } //if( error >-1 ){ std::printf("%d\n",error); break; } // p > nbrr is a fatal error that is ignored } } if ( matProc == myPID && verbose ) std::printf("nzM = %d \n", nzM ); Mi.resize( nzM ); Mj.resize( nzM ); q = -1; l = 0; pm = -1; for( i = 0; i <= nrr; i++ ){ if( i >= r[l+1] ){ if( q > 0 ) std::qsort(colstack,q+1,sizeof(colstack[0]),compare_ints); /* sort stack */ if( q >= 0 ){ Mi[++pm] = l; Mj[pm] = colstack[0]; } for( j=1; j<=q ; j++ ){ /* delete copies */ if( colstack[j] > colstack[j-1] ){ /* l, colstack[j] */ Mi[++pm] = l; Mj[pm] = colstack[j]; } } ++l; Mnum[l] = pm + 1; /* sparse row format: M->p[l+1] = M->p[l] + ns; */ q = -1; } if( i < nrr ){ B.ExtractMyRowView( i, numEntries, indices ); for( k = 0; k < numEntries; k++){ j = indices[k]; ns = 0; p = 0; while( (r[bstree[p]] > j) || (j >= r[bstree[p]+1]) ){ if( r[bstree[p]] > j){ p = 2*p+1; }else{ if( r[bstree[p]+1] <= j) p = 2*p+2; } ++ns; } colstack[++q] = bstree[p]; } } } if ( bstree ) free ( bstree ); if ( colstack ) free( colstack ); // Compute weights as number of rows in each block. weights.resize( nbrr ); for( l=0; l<nbrr; l++) weights[l] = r[l+1] - r[l]; // Compute Epetra_CrsGraph and return Teuchos::RCP<Epetra_Map> newMap; if ( matProc == myPID ) newMap = Teuchos::rcp( new Epetra_Map(nbrr, nbrr, 0, B.Comm() ) ); else newMap = Teuchos::rcp( new Epetra_Map( nbrr, 0, 0, B.Comm() ) ); Teuchos::RCP<Epetra_CrsGraph> newGraph = Teuchos::rcp( new Epetra_CrsGraph( Copy, *newMap, 0 ) ); for( l=0; l<newGraph->NumMyRows(); l++) { newGraph->InsertGlobalIndices( l, Mnum[l+1]-Mnum[l], &Mj[Mnum[l]] ); } newGraph->FillComplete(); return (newGraph); }
int check(Epetra_CrsGraph& L, Epetra_CrsGraph& U, Ifpack_IlukGraph& LU, int NumGlobalRows1, int NumMyRows1, int LevelFill1, bool verbose) { using std::cout; using std::endl; int i, j; int NumIndices, * Indices; int NumIndices1, * Indices1; bool debug = true; Epetra_CrsGraph& L1 = LU.L_Graph(); Epetra_CrsGraph& U1 = LU.U_Graph(); // Test entries and count nonzeros int Nout = 0; for (i=0; i<LU.NumMyRows(); i++) { assert(L.ExtractMyRowView(i, NumIndices, Indices)==0); assert(L1.ExtractMyRowView(i, NumIndices1, Indices1)==0); assert(NumIndices==NumIndices1); for (j=0; j<NumIndices1; j++) { if (debug &&(Indices[j]!=Indices1[j])) { int MyPID = L.RowMap().Comm().MyPID(); cout << "Proc " << MyPID << " Local Row = " << i << " L.Indices["<< j <<"] = " << Indices[j] << " L1.Indices["<< j <<"] = " << Indices1[j] << endl; } assert(Indices[j]==Indices1[j]); } Nout += (NumIndices-NumIndices1); assert(U.ExtractMyRowView(i, NumIndices, Indices)==0); assert(U1.ExtractMyRowView(i, NumIndices1, Indices1)==0); assert(NumIndices==NumIndices1); for (j=0; j<NumIndices1; j++) { if (debug &&(Indices[j]!=Indices1[j])) { int MyPID = L.RowMap().Comm().MyPID(); cout << "Proc " << MyPID << " Local Row = " << i << " U.Indices["<< j <<"] = " << Indices[j] << " U1.Indices["<< j <<"] = " << Indices1[j] << endl; } assert(Indices[j]==Indices1[j]); } Nout += (NumIndices-NumIndices1); } // Test query functions int NumGlobalRows = LU.NumGlobalRows(); if (verbose) cout << "\n\nNumber of Global Rows = " << NumGlobalRows << endl<< endl; assert(NumGlobalRows==NumGlobalRows1); int NumGlobalNonzeros = LU.NumGlobalNonzeros(); if (verbose) cout << "\n\nNumber of Global Nonzero entries = " << NumGlobalNonzeros << endl<< endl; int NoutG = 0; L.RowMap().Comm().SumAll(&Nout, &NoutG, 1); assert(NumGlobalNonzeros==L.NumGlobalNonzeros()+U.NumGlobalNonzeros()-NoutG); int NumMyRows = LU.NumMyRows(); if (verbose) cout << "\n\nNumber of Rows = " << NumMyRows << endl<< endl; assert(NumMyRows==NumMyRows1); int NumMyNonzeros = LU.NumMyNonzeros(); if (verbose) cout << "\n\nNumber of Nonzero entries = " << NumMyNonzeros << endl<< endl; assert(NumMyNonzeros==L.NumMyNonzeros()+U.NumMyNonzeros()-Nout); if (verbose) cout << "\n\nLU check OK" << endl<< endl; return(0); }
void show_matrix(const char *txt, const Epetra_CrsGraph &graph, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } const Epetra_BlockMap &rowmap = graph.RowMap(); const Epetra_BlockMap &colmap = graph.ColMap(); int myRows = rowmap.NumMyElements(); int numRows = graph.NumGlobalRows(); int numCols = graph.NumGlobalCols(); int base = rowmap.IndexBase(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; memset(myA, 0, sizeof(int) * numRows * numCols); int *myIndices; int *myRowGIDs = rowmap.MyGlobalElements(); for (int i=0; i< myRows; i++){ int myRowLID = rowmap.LID(myRowGIDs[i]); int numEntries = graph.NumMyIndices(myRowLID); if (numEntries > 0){ int rc = graph.ExtractMyRowView(myRowLID, numEntries, myIndices); if (rc){ std::cerr << txt << std::endl; std::cerr << "extract graph error" << std::endl; return; } int *row = myA + (numCols * (myRowGIDs[i] - base)); for (int j=0; j < numEntries; j++){ int gid = colmap.GID(myIndices[j]); row[gid-base] = me+1; } } } printMatrix(txt, myA, NULL, NULL, numRows, numCols, comm); delete [] myA; }
//============================================================================== int check(Epetra_CrsGraph& A, int NumMyRows1, long long NumGlobalRows1, int NumMyNonzeros1, long long NumGlobalNonzeros1, long long* MyGlobalElements, bool verbose) { (void)MyGlobalElements; int ierr = 0; int i; int j; int forierr = 0; int NumGlobalIndices; int NumMyIndices; int* MyViewIndices; int MaxNumIndices = A.MaxNumIndices(); int* MyCopyIndices = new int[MaxNumIndices]; long long* GlobalCopyIndices = new long long[MaxNumIndices]; // Test query functions int NumMyRows = A.NumMyRows(); if(verbose) cout << "Number of local Rows = " << NumMyRows << endl; EPETRA_TEST_ERR(!(NumMyRows==NumMyRows1),ierr); int NumMyNonzeros = A.NumMyNonzeros(); if(verbose) cout << "Number of local Nonzero entries = " << NumMyNonzeros << endl; EPETRA_TEST_ERR(!(NumMyNonzeros==NumMyNonzeros1),ierr); long long NumGlobalRows = A.NumGlobalRows64(); if(verbose) cout << "Number of global Rows = " << NumGlobalRows << endl; EPETRA_TEST_ERR(!(NumGlobalRows==NumGlobalRows1),ierr); long long NumGlobalNonzeros = A.NumGlobalNonzeros64(); if(verbose) cout << "Number of global Nonzero entries = " << NumGlobalNonzeros << endl; EPETRA_TEST_ERR(!(NumGlobalNonzeros==NumGlobalNonzeros1),ierr); // GlobalRowView should be illegal (since we have local indices) EPETRA_TEST_ERR(!(A.ExtractGlobalRowView(A.RowMap().MaxMyGID64(), NumGlobalIndices, GlobalCopyIndices)==-2),ierr); // Other binary tests EPETRA_TEST_ERR(A.NoDiagonal(),ierr); EPETRA_TEST_ERR(!(A.Filled()),ierr); EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MaxMyGID64())),ierr); EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MinMyGID64())),ierr); EPETRA_TEST_ERR(A.MyGRID(1+A.RowMap().MaxMyGID64()),ierr); EPETRA_TEST_ERR(A.MyGRID(-1+A.RowMap().MinMyGID64()),ierr); EPETRA_TEST_ERR(!(A.MyLRID(0)),ierr); EPETRA_TEST_ERR(!(A.MyLRID(NumMyRows-1)),ierr); EPETRA_TEST_ERR(A.MyLRID(-1),ierr); EPETRA_TEST_ERR(A.MyLRID(NumMyRows),ierr); forierr = 0; for(i = 0; i < NumMyRows; i++) { long long Row = A.GRID64(i); A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyIndices); A.ExtractMyRowView(i, NumMyIndices, MyViewIndices); forierr += !(NumGlobalIndices==NumMyIndices); for(j = 1; j < NumMyIndices; j++) EPETRA_TEST_ERR(!(MyViewIndices[j-1]<MyViewIndices[j]),ierr); for(j = 0; j < NumGlobalIndices; j++) { forierr += !(GlobalCopyIndices[j]==A.GCID64(MyViewIndices[j])); forierr += !(A.LCID(GlobalCopyIndices[j])==MyViewIndices[j]); } } EPETRA_TEST_ERR(forierr,ierr); forierr = 0; for(i = 0; i < NumMyRows; i++) { long long Row = A.GRID64(i); A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyIndices); A.ExtractMyRowCopy(i, MaxNumIndices, NumMyIndices, MyCopyIndices); forierr += !(NumGlobalIndices==NumMyIndices); for(j = 1; j < NumMyIndices; j++) EPETRA_TEST_ERR(!(MyCopyIndices[j-1]<MyCopyIndices[j]),ierr); for(j = 0; j < NumGlobalIndices; j++) { forierr += !(GlobalCopyIndices[j]==A.GCID64(MyCopyIndices[j])); forierr += !(A.LCID(GlobalCopyIndices[j])==MyCopyIndices[j]); } } EPETRA_TEST_ERR(forierr,ierr); delete[] MyCopyIndices; delete[] GlobalCopyIndices; if(verbose) cout << "Rows sorted check OK" << endl; return(ierr); }
// ============================================================================ int ML_Epetra::MatrixFreePreconditioner:: Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace) { Epetra_Time TotalTime(Comm()); const int NullSpaceDim = NullSpace.NumVectors(); // get parameters from the list std::string PrecType = List_.get("prec: type", "hybrid"); std::string SmootherType = List_.get("smoother: type", "Jacobi"); std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN"); int PolynomialDegree = List_.get("smoother: degree", 3); std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN"); int MaximumIterations = List_.get("eigen-analysis: max iters", 10); std::string EigenType_ = List_.get("eigen-analysis: type", "cg"); double boost = List_.get("eigen-analysis: boost for lambda max", 1.0); int OutputLevel = List_.get("ML output", -47); if (OutputLevel == -47) OutputLevel = List_.get("output", 10); omega_ = List_.get("smoother: damping", omega_); ML_Set_PrintLevel(OutputLevel); bool LowMemory = List_.get("low memory", true); double AllocationFactor = List_.get("AP allocation factor", 0.5); verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5); // ================ // // check parameters // // ================ // if (PrecType == "presmoother only") PrecType_ = ML_MFP_PRESMOOTHER_ONLY; else if (PrecType == "hybrid") PrecType_ = ML_MFP_HYBRID; else if (PrecType == "additive") PrecType_ = ML_MFP_ADDITIVE; else ML_CHK_ERR(-3); // not recognized if (SmootherType == "none") SmootherType_ = ML_MFP_NONE; else if (SmootherType == "Jacobi") SmootherType_ = ML_MFP_JACOBI; else if (SmootherType == "block Jacobi") SmootherType_ = ML_MFP_BLOCK_JACOBI; else if (SmootherType == "Chebyshev") SmootherType_ = ML_MFP_CHEBY; else ML_CHK_ERR(-4); // not recognized if (AllocationFactor <= 0.0) ML_CHK_ERR(-1); // should be positive // =============================== // // basic checkings and some output // // =============================== // int OperatorDomainPoints = Operator_.OperatorDomainMap().NumGlobalPoints(); int OperatorRangePoints = Operator_.OperatorRangeMap().NumGlobalPoints(); int GraphBlockRows = Graph.NumGlobalBlockRows(); int GraphNnz = Graph.NumGlobalNonzeros(); NumPDEEqns_ = OperatorRangePoints / GraphBlockRows; NumMyBlockRows_ = Graph.NumMyBlockRows(); if (OperatorDomainPoints != OperatorRangePoints) ML_CHK_ERR(-1); // only square matrices if (OperatorRangePoints % NumPDEEqns_ != 0) ML_CHK_ERR(-2); // num PDEs seems not constant if (verbose_) { ML_print_line("=",78); std::cout << "*** " << std::endl; std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl; std::cout << "***" << std::endl; std::cout << "Number of rows and columns = " << OperatorDomainPoints << std::endl; std::cout << "Number of rows per processor = " << OperatorDomainPoints / Comm().NumProc() << " (on average)" << std::endl; std::cout << "Number of rows in the graph = " << GraphBlockRows << std::endl; std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl; std::cout << "Processors used in computation = " << Comm().NumProc() << std::endl; std::cout << "Number of PDE equations = " << NumPDEEqns_ << std::endl; std::cout << "Null space dimension = " << NullSpaceDim << std::endl; std::cout << "Preconditioner type = " << PrecType << std::endl; std::cout << "Smoother type = " << SmootherType << std::endl; std::cout << "Coloring type = " << ColoringType << std::endl; std::cout << "Allocation factor = " << AllocationFactor << std::endl; std::cout << "Number of V-cycles for C = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl; std::cout << std::endl; } ResetStartTime(); // ==================================== // // compute the inverse of the diagonal, // // control that no elements are zero. // // ==================================== // for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i) if ((*InvPointDiagonal_)[i] != 0.0) (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i]; // ========================================================= // // Setup the smoother. I need to extract the block diagonal // // only if block Jacobi is used. For Chebyshev, I scale with // // the point diagonal only. In this latter case, I need to // // compute lambda_max of the scaled operator. // // ========================================================= // // probes for the block diagonal of the matrix. if (SmootherType_ == ML_MFP_JACOBI || SmootherType_ == ML_MFP_NONE) { // do-nothing here } else if (SmootherType_ == ML_MFP_BLOCK_JACOBI) { if (verbose_); std::cout << "Diagonal coloring type = " << DiagonalColoringType << std::endl; ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType)); AddAndResetStartTime("block diagonal construction", true); } else if (SmootherType_ == ML_MFP_CHEBY) { double lambda_min = 0.0; double lambda_max = 0.0; Teuchos::ParameterList IFPACKList; if (EigenType_ == "power-method") { ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_max)); } else if(EigenType_ == "cg") { ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_min, lambda_max)); } else ML_CHK_ERR(-1); // not recognized if (verbose_) { std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl; std::cout << "Estimating eigenvalues using " << EigenType_ << std::endl; std::cout << "lambda_min = " << lambda_min << ", "; std::cout << "lambda_max = " << lambda_max << std::endl; } IFPACKList.set("chebyshev: min eigenvalue", lambda_min); IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max); // FIXME: this allocates a new std::vector inside IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get()); IFPACKList.set("chebyshev: degree", PolynomialDegree); PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", true); ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PreSmoother_->Initialize()); ML_CHK_ERR(PreSmoother_->Compute()); PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", false); ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PostSmoother_->Initialize()); ML_CHK_ERR(PostSmoother_->Compute()); } // ========================================================= // // building P and R for block graph. This is done by working // // on the Graph_ object. Support is provided for local // // aggregation schemes only so that all is basically local. // // Then, build the block graph coarse problem. // // ========================================================= // // ML wrapper for Graph_ ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML); ML_Aggregate* BlockAggr_ML = 0; ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0; if (verbose_) std::cout << std::endl; ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, &CoarseGraph_ML)); if (verbose_) std::cout << std::endl; Epetra_CrsMatrix* GraphCoarse; ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse)); // used later to estimate the entries in AP ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML()); ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX); int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row; Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1); ML_Operator_Destroy(&CoarseAP_ML); int NumAggregates = BlockPtent_ML->invec_leng; ML_Operator_Destroy(&BlockRtent_ML); ML_Operator_Destroy(&CoarseGraph_ML); AddAndResetStartTime("construction of block C, R, and P", true); if (verbose_) std::cout << std::endl; // ================================================== // // coloring of block graph: // // - color of block row `i' is given by `ColorMap[i]' // // - number of colors is ColorMap.NumColors(). // // ================================================== // ResetStartTime(); CrsGraph_MapColoring* MapColoringTransform; if (ColoringType == "JONES_PLASSMAN") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN, 0, false, 0); else if (ColoringType == "PSEUDO_PARALLEL") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL, 0, false, 0); else if (ColoringType == "GREEDY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY, 0, false, 0); else if (ColoringType == "LUBY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY, 0, false, 0); else ML_CHK_ERR(-1); Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph())); // move the information from ColorMap to std::vector Colors const int NumColors = ColorMap->MaxNumColors(); RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows())); for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i) (*Colors)[i] = (*ColorMap)[i]; delete MapColoringTransform; delete ColorMap; ColorMap = 0; delete GraphCoarse; AddAndResetStartTime("coarse graph coloring", true); if (verbose_) std::cout << std::endl; // get some other information about the aggregates, to be used // in the QR factorization of the null space. NodesOfAggregate // contains the local ID of block rows contained in each aggregate. // FIXME: make it faster std::vector< std::vector<int> > NodesOfAggregate(NumAggregates); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; NodesOfAggregate[AID].push_back(i); } int MaxAggrSize = 0; for (int i = 0; i < NumAggregates; ++i) { const int& MySize = NodesOfAggregate[i].size(); if (MySize > MaxAggrSize) MaxAggrSize = MySize; } // collect aggregate information, and mark all nodes that are // connected with each aggregate. These nodes will have a possible // nonzero entry after the matrix-matrix product between the Operator_ // and the tentative prolongator. std::vector<vector<int> > aggregates(NumAggregates); std::vector<int>::iterator iter; for (int i = 0; i < NumAggregates; ++i) aggregates[i].reserve(MaxAggrSize); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; int NumEntries; int* Indices; Graph.ExtractMyRowView(i, NumEntries, Indices); for (int k = 0; k < NumEntries; ++k) { // FIXME: use hash?? const int& GCID = Graph.ColMap().GID(Indices[k]); iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID); if (iter == aggregates[AID].end()) aggregates[AID].push_back(GCID); } } int* BlockNodeList = Graph.ColMap().MyGlobalElements(); // finally get rid of the ML_Aggregate structure. ML_Aggregate_Destroy(&BlockAggr_ML); const Epetra_Map& FineMap = Operator_.OperatorDomainMap(); Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm()); RefCountPtr<Epetra_Map> BlockNodeListMap = rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(), BlockNodeList, 0, Comm())); std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_); for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i) for (int m = 0; m < NumPDEEqns_; ++m) NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m; RefCountPtr<Epetra_Map> NodeListMap = rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm())); AddAndResetStartTime("data structures", true); // ====================== // // process the null space // // ====================== // // CHECKME Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim); NewNullSpace.PutScalar(0.0); if (NullSpaceDim == 1) { double* ns_ptr = NullSpace.Values(); for (int AID = 0; AID < NumAggregates; ++AID) { double dtemp = 0.0; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) { const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; dtemp += (ns_ptr[pos] * ns_ptr[pos]); } dtemp = std::sqrt(dtemp); NewNullSpace[0][AID] = dtemp; dtemp = 1.0 / dtemp; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp; } } else { // FIXME std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_); std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim); std::vector<double> work(NullSpaceDim); int info; for (int AID = 0; AID < NumAggregates; ++AID) { int MySize = NodesOfAggregate[AID].size(); int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_; int lwork = NullSpaceDim; for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m]; DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); if (work[0] > lwork) work.resize((int) work[0]); // the upper triangle of qr_tmp is now R, so copy that into the // new nullspace for (int j = 0; j < NullSpaceDim; j++) for (int k = j; k < NullSpaceDim; k++) NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k]; // to get this block of P, need to run qr_tmp through another LAPACK // function: DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); // dgeqtr returned a non-zero if (work[0] > lwork) work.resize((int) work[0]); // insert the Q block into the null space for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) { int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m]; NullSpace[k][LRID] = val; } } } AddAndResetStartTime("null space setup", true); if (verbose_) std::cout << "Number of colors on processor " << Comm().MyPID() << " = " << NumColors << std::endl; if (verbose_) std::cout << "Maximum number of colors = " << NumColors << std::endl; RefCountPtr<Epetra_FECrsMatrix> AP; // try to get a good estimate of the nonzeros per row. // This is a compromize between efficiency -- that is, reduce // the memory allocation processes, and memory usage -- that, is // overestimating can actually kill the code. Basically, this is // all junk due to our dear friend, the Cray XT3. AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int) (AllocationFactor * AP_MaxNnzRow * NullSpaceDim))); if (AP.get() == 0) throw(-1); if (!LowMemory) { // ================================================= // // allocate one big chunk of memory, and use View // // to create Epetra_MultiVectors. Note that // // NumColors * NullSpace can indeed be a quite large // // value. To reduce the memory consumption, both // // ColoredAP and ExtColoredAP use the same memory // // array. // // ================================================= // Epetra_MultiVector* ColoredP; std::vector<double> ColoredAP_ptr; try { ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim); ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("the allocation of ColoredP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } int ColoredAP_LDA = NodeListMap->NumMyPoints(); ColoredP->PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0) ML_CHK_ERR(-1); assert (NumEntries == 1); // this is the block P const int& Color = (*Colors)[Indices] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } ML_Operator_Destroy(&BlockPtent_ML); Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); // move ColoredAP into ColoredP. This should not be required. // but I prefer to skip strange games with View pointers Operator_.Apply(*ColoredP, ColoredAP); *ColoredP = ColoredAP; // FIXME: only if NumProc > 1 Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); try { Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); ExtColoredAP.Import(*ColoredP, Importer, Insert); } catch (std::exception& rhs) { catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } delete ColoredP; AddAndResetStartTime("computation of AP", true); // populate the actual AP operator, skip some controls to make it faster for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { int GRID2 = GRID * NumPDEEqns_ + k; int GCID2 = GCID + j; AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val); //if (ierr < 0) ML_CHK_ERR(ierr); } } } } } else { // =============================================================== // // apply the operator one color at-a-time. This requires NumColors // // cycles over BlockPtent. However, the memory requirements are // // drastically reduced. As for low-memory == false, both ColoredAP // // and ExtColoredAP point to the same memory location. // // =============================================================== // if (verbose_) std::cout << "Using low-memory computation for AP" << std::endl; Epetra_MultiVector ColoredP(FineMap, NullSpaceDim); std::vector<double> ColoredAP_ptr; try { ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); for (int ic = 0; ic < NumColors; ++ic) { if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0) { if (ic % 20 == 0) std::cout << "Processing color " << flush; std::cout << ic << " " << flush; if (ic % 20 == 19 || ic == NumColors - 1) std::cout << std::endl; if (ic == NumColors - 1) std::cout << std::endl; } ColoredP.PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0 || // something strange in getrow NumEntries != 1) // this is the block P ML_CHK_ERR(-1); const int& Color = (*Colors)[Indices] - 1; if (Color != ic) continue; // skip this color for this cycle for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) ColoredP[j][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } Operator_.Apply(ColoredP, ColoredAP); ColoredP = ColoredAP; // just to be safe ExtColoredAP.Import(ColoredP, Importer, Insert); // populate the actual AP operator, skip some controls to make it faster std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_); std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_); for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; if (color != ic) continue; for (int k = 0; k < NumPDEEqns_; ++k) { int count = 0; int GRID2 = GRID * NumPDEEqns_ + k; for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { InsertCols[count] = GCID + j; InsertValues[count] = val; ++count; } } AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], &InsertValues[0]); } } } } ML_Operator_Destroy(&BlockPtent_ML); } aggregates.resize(0); BlockNodeListMap = Teuchos::null; NodeListMap = Teuchos::null; Colors = Teuchos::null; AP->GlobalAssemble(false); AP->FillComplete(CoarseMap, FineMap); #if 0 try { AP->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif AddAndResetStartTime("computation of the final AP", true); ML_Operator* AP_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML); // ======== // // create R // // ======== // std::vector<int> REntries(NumAggregates * NullSpaceDim); for (int AID = 0; AID < NumAggregates; ++AID) { for (int m = 0; m < NullSpaceDim; ++m) REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_; } R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true)); REntries.resize(0); for (int AID = 0; AID < NumAggregates; ++AID) { const int& MySize = NodesOfAggregate[AID].size(); // FIXME: make it faster for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) for (int k = 0; k < NullSpaceDim; ++k) { int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; int GCID = FineMap.GID(LCID); assert (GCID != -1); double& val = NullSpace[k][LCID]; int GRID = CoarseMap.GID(AID * NullSpaceDim + k); int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID); if (ierr < 0) ML_CHK_ERR(-1); } } NodesOfAggregate.resize(0); R_->FillComplete(FineMap, CoarseMap); #if 0 try { R_->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif ML_Operator* R_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(R_.get(), R_ML); AddAndResetStartTime("computation of R", true); // ======== // // Create C // // ======== // C_ML_ = ML_Operator_Create(Comm_ML()); ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX); ML_Operator_Destroy(&AP_ML); ML_Operator_Destroy(&R_ML); AP = Teuchos::null; C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false)); assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap())); TotalTime.ResetStartTime(); AddAndResetStartTime("computation of C", true); if (verbose_) { std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; } Teuchos::ParameterList& sublist = List_.sublist("ML list"); sublist.set("PDE equations", NullSpaceDim); sublist.set("null space: type", "pre-computed"); sublist.set("null space: dimension", NewNullSpace.NumVectors()); sublist.set("null space: vectors", NewNullSpace.Values()); MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true)); assert (MLP_.get() != 0); IsComputed_ = true; AddAndResetStartTime("computation of the preconditioner for C", true); if (verbose_) { std::cout << std::endl; std::cout << "Total CPU time for construction (all included) = "; std::cout << TotalCPUTime() << std::endl; ML_print_line("=",78); } return(0); }