//============================================================================== BlockCrsMatrix::BlockCrsMatrix( const Epetra_CrsGraph & BaseGraph, const Epetra_CrsGraph & LocalBlockGraph, const Epetra_Comm & GlobalComm ) : Epetra_CrsMatrix( Copy, *(BlockUtility::GenerateBlockGraph( BaseGraph, LocalBlockGraph, GlobalComm )) ), BaseGraph_( BaseGraph ), #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES RowStencil_int_( ), RowIndices_int_( ), #endif #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES RowStencil_LL_( ), RowIndices_LL_( ), #endif ROffset_(BlockUtility::CalculateOffset64(BaseGraph.RowMap())), COffset_(BlockUtility::CalculateOffset64(BaseGraph.ColMap())) { #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES if(Epetra_CrsMatrix::RowMatrixRowMap().GlobalIndicesInt() && LocalBlockGraph.RowMap().GlobalIndicesInt()) BlockUtility::GenerateRowStencil(LocalBlockGraph, RowIndices_int_, RowStencil_int_); else #endif #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES if(Epetra_CrsMatrix::RowMatrixRowMap().GlobalIndicesLongLong() && LocalBlockGraph.RowMap().GlobalIndicesLongLong()) BlockUtility::GenerateRowStencil(LocalBlockGraph, RowIndices_LL_, RowStencil_LL_); else #endif throw "EpetraExt::BlockCrsMatrix::BlockCrsMatrix: Error, Global indices unknown."; }
int compute_graph_metrics(const Epetra_CrsGraph &graph, Isorropia::Epetra::CostDescriber &costs, double &myGoalWeight, double &balance, int &numCuts, double &cutWgt, double &cutn, double &cutl) { const Epetra_BlockMap & rmap = graph.RowMap(); const Epetra_BlockMap & cmap = graph.ColMap(); int maxEdges = cmap.NumMyElements(); std::vector<std::vector<int> > myRows(rmap.NumMyElements()); if (maxEdges > 0){ int numEdges = 0; int *nborLID = new int [maxEdges]; for (int i=0; i<rmap.NumMyElements(); i++){ graph.ExtractMyRowCopy(i, maxEdges, numEdges, nborLID); std::vector<int> cols(numEdges); for (int j=0; j<numEdges; j++){ cols[j] = nborLID[j]; } myRows[i] = cols; } delete [] nborLID; } return compute_graph_metrics(rmap, cmap, myRows, costs, myGoalWeight, balance, numCuts, cutWgt, cutn, cutl); }
int compute_hypergraph_metrics(const Epetra_CrsGraph &graph, Isorropia::Epetra::CostDescriber &costs, double &myGoalWeight, double &balance, double &cutn, double &cutl) // output { return compute_hypergraph_metrics(graph.RowMap(), graph.ColMap(), graph.NumGlobalCols(), costs, myGoalWeight, balance, cutn, cutl); }
BlockCrsMatrix::BlockCrsMatrix( const Epetra_CrsGraph & BaseGraph, const vector< vector<long long> > & RowStencil, const vector<long long> & RowIndices, const Epetra_Comm & GlobalComm ) : Epetra_CrsMatrix( Copy, *(BlockUtility::GenerateBlockGraph( BaseGraph, RowStencil, RowIndices, GlobalComm )) ), BaseGraph_( BaseGraph ), RowStencil_LL_( RowStencil ), RowIndices_LL_( RowIndices ), ROffset_(BlockUtility::CalculateOffset64(BaseGraph.RowMap())), COffset_(BlockUtility::CalculateOffset64(BaseGraph.ColMap())) { }
BlockCrsMatrix::BlockCrsMatrix( const Epetra_CrsGraph & BaseGraph, const vector<int> & RowStencil, int RowIndex, const Epetra_Comm & GlobalComm ) : Epetra_CrsMatrix( Copy, *(BlockUtility::GenerateBlockGraph( BaseGraph, vector< vector<int> >(1,RowStencil), vector<int>(1,RowIndex), GlobalComm )) ), BaseGraph_( BaseGraph ), RowStencil_int_( vector< vector<int> >(1,RowStencil) ), RowIndices_int_( vector<int>(1,RowIndex) ), ROffset_(BlockUtility::CalculateOffset64(BaseGraph.RowMap())), COffset_(BlockUtility::CalculateOffset64(BaseGraph.ColMap())) { }
void BroydenOperator::removeEntriesFromBroydenUpdate( const Epetra_CrsGraph & graph ) { int numRemoveIndices ; int * removeIndPtr ; int ierr ; cout << graph << endl; for( int row = 0; row < graph.NumMyRows(); ++row) { ierr = graph.ExtractMyRowView( row, numRemoveIndices, removeIndPtr ); if( ierr ) { cout << "ERROR (" << ierr << ") : " << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)" << " - Extract indices error for row --> " << row << endl; throw "NOX Broyden Operator Error"; } if( 0 != numRemoveIndices ) { // Create a map for quick queries map<int, bool> removeIndTable; for( int k = 0; k < numRemoveIndices; ++k ) removeIndTable[ graph.ColMap().GID(removeIndPtr[k]) ] = true; // Get our matrix column indices for the current row int numOrigIndices = 0; int * indPtr; ierr = crsMatrix->Graph().ExtractMyRowView( row, numOrigIndices, indPtr ); if( ierr ) { cout << "ERROR (" << ierr << ") : " << "NOX::Epetra::BroydenOperator::removeEntriesFromBroydenUpdate(...)" << " - Extract indices error for row --> " << row << endl; throw "NOX Broyden Operator Error"; } // Remove appropriate active entities if( retainedEntries.end() == retainedEntries.find(row) ) { list<int> inds; for( int k = 0; k < numOrigIndices; ++k ) { if( removeIndTable.end() == removeIndTable.find( crsMatrix->Graph().ColMap().GID(indPtr[k]) ) ) inds.push_back(k); } retainedEntries[row] = inds; } else { list<int> & inds = retainedEntries[row]; list<int>::iterator iter = inds.begin() , iter_end = inds.end() ; for( ; iter_end != iter; ++iter ) { if( !removeIndTable[ *iter ] ) inds.remove( *iter ); } } entriesRemoved[row] = true; } } return; }
void show_matrix(const char *txt, const Epetra_CrsGraph &graph, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } const Epetra_BlockMap &rowmap = graph.RowMap(); const Epetra_BlockMap &colmap = graph.ColMap(); int myRows = rowmap.NumMyElements(); int numRows = graph.NumGlobalRows(); int numCols = graph.NumGlobalCols(); int base = rowmap.IndexBase(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; memset(myA, 0, sizeof(int) * numRows * numCols); int *myIndices; int *myRowGIDs = rowmap.MyGlobalElements(); for (int i=0; i< myRows; i++){ int myRowLID = rowmap.LID(myRowGIDs[i]); int numEntries = graph.NumMyIndices(myRowLID); if (numEntries > 0){ int rc = graph.ExtractMyRowView(myRowLID, numEntries, myIndices); if (rc){ std::cerr << txt << std::endl; std::cerr << "extract graph error" << std::endl; return; } int *row = myA + (numCols * (myRowGIDs[i] - base)); for (int j=0; j < numEntries; j++){ int gid = colmap.GID(myIndices[j]); row[gid-base] = me+1; } } } printMatrix(txt, myA, NULL, NULL, numRows, numCols, comm); delete [] myA; }
// ============================================================================ int ML_Epetra::MatrixFreePreconditioner:: Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace) { Epetra_Time TotalTime(Comm()); const int NullSpaceDim = NullSpace.NumVectors(); // get parameters from the list std::string PrecType = List_.get("prec: type", "hybrid"); std::string SmootherType = List_.get("smoother: type", "Jacobi"); std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN"); int PolynomialDegree = List_.get("smoother: degree", 3); std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN"); int MaximumIterations = List_.get("eigen-analysis: max iters", 10); std::string EigenType_ = List_.get("eigen-analysis: type", "cg"); double boost = List_.get("eigen-analysis: boost for lambda max", 1.0); int OutputLevel = List_.get("ML output", -47); if (OutputLevel == -47) OutputLevel = List_.get("output", 10); omega_ = List_.get("smoother: damping", omega_); ML_Set_PrintLevel(OutputLevel); bool LowMemory = List_.get("low memory", true); double AllocationFactor = List_.get("AP allocation factor", 0.5); verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5); // ================ // // check parameters // // ================ // if (PrecType == "presmoother only") PrecType_ = ML_MFP_PRESMOOTHER_ONLY; else if (PrecType == "hybrid") PrecType_ = ML_MFP_HYBRID; else if (PrecType == "additive") PrecType_ = ML_MFP_ADDITIVE; else ML_CHK_ERR(-3); // not recognized if (SmootherType == "none") SmootherType_ = ML_MFP_NONE; else if (SmootherType == "Jacobi") SmootherType_ = ML_MFP_JACOBI; else if (SmootherType == "block Jacobi") SmootherType_ = ML_MFP_BLOCK_JACOBI; else if (SmootherType == "Chebyshev") SmootherType_ = ML_MFP_CHEBY; else ML_CHK_ERR(-4); // not recognized if (AllocationFactor <= 0.0) ML_CHK_ERR(-1); // should be positive // =============================== // // basic checkings and some output // // =============================== // int OperatorDomainPoints = Operator_.OperatorDomainMap().NumGlobalPoints(); int OperatorRangePoints = Operator_.OperatorRangeMap().NumGlobalPoints(); int GraphBlockRows = Graph.NumGlobalBlockRows(); int GraphNnz = Graph.NumGlobalNonzeros(); NumPDEEqns_ = OperatorRangePoints / GraphBlockRows; NumMyBlockRows_ = Graph.NumMyBlockRows(); if (OperatorDomainPoints != OperatorRangePoints) ML_CHK_ERR(-1); // only square matrices if (OperatorRangePoints % NumPDEEqns_ != 0) ML_CHK_ERR(-2); // num PDEs seems not constant if (verbose_) { ML_print_line("=",78); std::cout << "*** " << std::endl; std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl; std::cout << "***" << std::endl; std::cout << "Number of rows and columns = " << OperatorDomainPoints << std::endl; std::cout << "Number of rows per processor = " << OperatorDomainPoints / Comm().NumProc() << " (on average)" << std::endl; std::cout << "Number of rows in the graph = " << GraphBlockRows << std::endl; std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl; std::cout << "Processors used in computation = " << Comm().NumProc() << std::endl; std::cout << "Number of PDE equations = " << NumPDEEqns_ << std::endl; std::cout << "Null space dimension = " << NullSpaceDim << std::endl; std::cout << "Preconditioner type = " << PrecType << std::endl; std::cout << "Smoother type = " << SmootherType << std::endl; std::cout << "Coloring type = " << ColoringType << std::endl; std::cout << "Allocation factor = " << AllocationFactor << std::endl; std::cout << "Number of V-cycles for C = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl; std::cout << std::endl; } ResetStartTime(); // ==================================== // // compute the inverse of the diagonal, // // control that no elements are zero. // // ==================================== // for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i) if ((*InvPointDiagonal_)[i] != 0.0) (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i]; // ========================================================= // // Setup the smoother. I need to extract the block diagonal // // only if block Jacobi is used. For Chebyshev, I scale with // // the point diagonal only. In this latter case, I need to // // compute lambda_max of the scaled operator. // // ========================================================= // // probes for the block diagonal of the matrix. if (SmootherType_ == ML_MFP_JACOBI || SmootherType_ == ML_MFP_NONE) { // do-nothing here } else if (SmootherType_ == ML_MFP_BLOCK_JACOBI) { if (verbose_); std::cout << "Diagonal coloring type = " << DiagonalColoringType << std::endl; ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType)); AddAndResetStartTime("block diagonal construction", true); } else if (SmootherType_ == ML_MFP_CHEBY) { double lambda_min = 0.0; double lambda_max = 0.0; Teuchos::ParameterList IFPACKList; if (EigenType_ == "power-method") { ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_max)); } else if(EigenType_ == "cg") { ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_min, lambda_max)); } else ML_CHK_ERR(-1); // not recognized if (verbose_) { std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl; std::cout << "Estimating eigenvalues using " << EigenType_ << std::endl; std::cout << "lambda_min = " << lambda_min << ", "; std::cout << "lambda_max = " << lambda_max << std::endl; } IFPACKList.set("chebyshev: min eigenvalue", lambda_min); IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max); // FIXME: this allocates a new std::vector inside IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get()); IFPACKList.set("chebyshev: degree", PolynomialDegree); PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", true); ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PreSmoother_->Initialize()); ML_CHK_ERR(PreSmoother_->Compute()); PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", false); ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PostSmoother_->Initialize()); ML_CHK_ERR(PostSmoother_->Compute()); } // ========================================================= // // building P and R for block graph. This is done by working // // on the Graph_ object. Support is provided for local // // aggregation schemes only so that all is basically local. // // Then, build the block graph coarse problem. // // ========================================================= // // ML wrapper for Graph_ ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML); ML_Aggregate* BlockAggr_ML = 0; ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0; if (verbose_) std::cout << std::endl; ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, &CoarseGraph_ML)); if (verbose_) std::cout << std::endl; Epetra_CrsMatrix* GraphCoarse; ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse)); // used later to estimate the entries in AP ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML()); ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX); int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row; Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1); ML_Operator_Destroy(&CoarseAP_ML); int NumAggregates = BlockPtent_ML->invec_leng; ML_Operator_Destroy(&BlockRtent_ML); ML_Operator_Destroy(&CoarseGraph_ML); AddAndResetStartTime("construction of block C, R, and P", true); if (verbose_) std::cout << std::endl; // ================================================== // // coloring of block graph: // // - color of block row `i' is given by `ColorMap[i]' // // - number of colors is ColorMap.NumColors(). // // ================================================== // ResetStartTime(); CrsGraph_MapColoring* MapColoringTransform; if (ColoringType == "JONES_PLASSMAN") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN, 0, false, 0); else if (ColoringType == "PSEUDO_PARALLEL") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL, 0, false, 0); else if (ColoringType == "GREEDY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY, 0, false, 0); else if (ColoringType == "LUBY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY, 0, false, 0); else ML_CHK_ERR(-1); Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph())); // move the information from ColorMap to std::vector Colors const int NumColors = ColorMap->MaxNumColors(); RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows())); for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i) (*Colors)[i] = (*ColorMap)[i]; delete MapColoringTransform; delete ColorMap; ColorMap = 0; delete GraphCoarse; AddAndResetStartTime("coarse graph coloring", true); if (verbose_) std::cout << std::endl; // get some other information about the aggregates, to be used // in the QR factorization of the null space. NodesOfAggregate // contains the local ID of block rows contained in each aggregate. // FIXME: make it faster std::vector< std::vector<int> > NodesOfAggregate(NumAggregates); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; NodesOfAggregate[AID].push_back(i); } int MaxAggrSize = 0; for (int i = 0; i < NumAggregates; ++i) { const int& MySize = NodesOfAggregate[i].size(); if (MySize > MaxAggrSize) MaxAggrSize = MySize; } // collect aggregate information, and mark all nodes that are // connected with each aggregate. These nodes will have a possible // nonzero entry after the matrix-matrix product between the Operator_ // and the tentative prolongator. std::vector<vector<int> > aggregates(NumAggregates); std::vector<int>::iterator iter; for (int i = 0; i < NumAggregates; ++i) aggregates[i].reserve(MaxAggrSize); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; int NumEntries; int* Indices; Graph.ExtractMyRowView(i, NumEntries, Indices); for (int k = 0; k < NumEntries; ++k) { // FIXME: use hash?? const int& GCID = Graph.ColMap().GID(Indices[k]); iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID); if (iter == aggregates[AID].end()) aggregates[AID].push_back(GCID); } } int* BlockNodeList = Graph.ColMap().MyGlobalElements(); // finally get rid of the ML_Aggregate structure. ML_Aggregate_Destroy(&BlockAggr_ML); const Epetra_Map& FineMap = Operator_.OperatorDomainMap(); Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm()); RefCountPtr<Epetra_Map> BlockNodeListMap = rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(), BlockNodeList, 0, Comm())); std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_); for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i) for (int m = 0; m < NumPDEEqns_; ++m) NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m; RefCountPtr<Epetra_Map> NodeListMap = rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm())); AddAndResetStartTime("data structures", true); // ====================== // // process the null space // // ====================== // // CHECKME Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim); NewNullSpace.PutScalar(0.0); if (NullSpaceDim == 1) { double* ns_ptr = NullSpace.Values(); for (int AID = 0; AID < NumAggregates; ++AID) { double dtemp = 0.0; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) { const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; dtemp += (ns_ptr[pos] * ns_ptr[pos]); } dtemp = std::sqrt(dtemp); NewNullSpace[0][AID] = dtemp; dtemp = 1.0 / dtemp; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp; } } else { // FIXME std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_); std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim); std::vector<double> work(NullSpaceDim); int info; for (int AID = 0; AID < NumAggregates; ++AID) { int MySize = NodesOfAggregate[AID].size(); int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_; int lwork = NullSpaceDim; for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m]; DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); if (work[0] > lwork) work.resize((int) work[0]); // the upper triangle of qr_tmp is now R, so copy that into the // new nullspace for (int j = 0; j < NullSpaceDim; j++) for (int k = j; k < NullSpaceDim; k++) NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k]; // to get this block of P, need to run qr_tmp through another LAPACK // function: DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); // dgeqtr returned a non-zero if (work[0] > lwork) work.resize((int) work[0]); // insert the Q block into the null space for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) { int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m]; NullSpace[k][LRID] = val; } } } AddAndResetStartTime("null space setup", true); if (verbose_) std::cout << "Number of colors on processor " << Comm().MyPID() << " = " << NumColors << std::endl; if (verbose_) std::cout << "Maximum number of colors = " << NumColors << std::endl; RefCountPtr<Epetra_FECrsMatrix> AP; // try to get a good estimate of the nonzeros per row. // This is a compromize between efficiency -- that is, reduce // the memory allocation processes, and memory usage -- that, is // overestimating can actually kill the code. Basically, this is // all junk due to our dear friend, the Cray XT3. AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int) (AllocationFactor * AP_MaxNnzRow * NullSpaceDim))); if (AP.get() == 0) throw(-1); if (!LowMemory) { // ================================================= // // allocate one big chunk of memory, and use View // // to create Epetra_MultiVectors. Note that // // NumColors * NullSpace can indeed be a quite large // // value. To reduce the memory consumption, both // // ColoredAP and ExtColoredAP use the same memory // // array. // // ================================================= // Epetra_MultiVector* ColoredP; std::vector<double> ColoredAP_ptr; try { ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim); ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("the allocation of ColoredP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } int ColoredAP_LDA = NodeListMap->NumMyPoints(); ColoredP->PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0) ML_CHK_ERR(-1); assert (NumEntries == 1); // this is the block P const int& Color = (*Colors)[Indices] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } ML_Operator_Destroy(&BlockPtent_ML); Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); // move ColoredAP into ColoredP. This should not be required. // but I prefer to skip strange games with View pointers Operator_.Apply(*ColoredP, ColoredAP); *ColoredP = ColoredAP; // FIXME: only if NumProc > 1 Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); try { Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); ExtColoredAP.Import(*ColoredP, Importer, Insert); } catch (std::exception& rhs) { catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } delete ColoredP; AddAndResetStartTime("computation of AP", true); // populate the actual AP operator, skip some controls to make it faster for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { int GRID2 = GRID * NumPDEEqns_ + k; int GCID2 = GCID + j; AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val); //if (ierr < 0) ML_CHK_ERR(ierr); } } } } } else { // =============================================================== // // apply the operator one color at-a-time. This requires NumColors // // cycles over BlockPtent. However, the memory requirements are // // drastically reduced. As for low-memory == false, both ColoredAP // // and ExtColoredAP point to the same memory location. // // =============================================================== // if (verbose_) std::cout << "Using low-memory computation for AP" << std::endl; Epetra_MultiVector ColoredP(FineMap, NullSpaceDim); std::vector<double> ColoredAP_ptr; try { ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); for (int ic = 0; ic < NumColors; ++ic) { if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0) { if (ic % 20 == 0) std::cout << "Processing color " << flush; std::cout << ic << " " << flush; if (ic % 20 == 19 || ic == NumColors - 1) std::cout << std::endl; if (ic == NumColors - 1) std::cout << std::endl; } ColoredP.PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0 || // something strange in getrow NumEntries != 1) // this is the block P ML_CHK_ERR(-1); const int& Color = (*Colors)[Indices] - 1; if (Color != ic) continue; // skip this color for this cycle for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) ColoredP[j][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } Operator_.Apply(ColoredP, ColoredAP); ColoredP = ColoredAP; // just to be safe ExtColoredAP.Import(ColoredP, Importer, Insert); // populate the actual AP operator, skip some controls to make it faster std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_); std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_); for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; if (color != ic) continue; for (int k = 0; k < NumPDEEqns_; ++k) { int count = 0; int GRID2 = GRID * NumPDEEqns_ + k; for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { InsertCols[count] = GCID + j; InsertValues[count] = val; ++count; } } AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], &InsertValues[0]); } } } } ML_Operator_Destroy(&BlockPtent_ML); } aggregates.resize(0); BlockNodeListMap = Teuchos::null; NodeListMap = Teuchos::null; Colors = Teuchos::null; AP->GlobalAssemble(false); AP->FillComplete(CoarseMap, FineMap); #if 0 try { AP->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif AddAndResetStartTime("computation of the final AP", true); ML_Operator* AP_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML); // ======== // // create R // // ======== // std::vector<int> REntries(NumAggregates * NullSpaceDim); for (int AID = 0; AID < NumAggregates; ++AID) { for (int m = 0; m < NullSpaceDim; ++m) REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_; } R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true)); REntries.resize(0); for (int AID = 0; AID < NumAggregates; ++AID) { const int& MySize = NodesOfAggregate[AID].size(); // FIXME: make it faster for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) for (int k = 0; k < NullSpaceDim; ++k) { int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; int GCID = FineMap.GID(LCID); assert (GCID != -1); double& val = NullSpace[k][LCID]; int GRID = CoarseMap.GID(AID * NullSpaceDim + k); int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID); if (ierr < 0) ML_CHK_ERR(-1); } } NodesOfAggregate.resize(0); R_->FillComplete(FineMap, CoarseMap); #if 0 try { R_->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif ML_Operator* R_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(R_.get(), R_ML); AddAndResetStartTime("computation of R", true); // ======== // // Create C // // ======== // C_ML_ = ML_Operator_Create(Comm_ML()); ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX); ML_Operator_Destroy(&AP_ML); ML_Operator_Destroy(&R_ML); AP = Teuchos::null; C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false)); assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap())); TotalTime.ResetStartTime(); AddAndResetStartTime("computation of C", true); if (verbose_) { std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; } Teuchos::ParameterList& sublist = List_.sublist("ML list"); sublist.set("PDE equations", NullSpaceDim); sublist.set("null space: type", "pre-computed"); sublist.set("null space: dimension", NewNullSpace.NumVectors()); sublist.set("null space: vectors", NewNullSpace.Values()); MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true)); assert (MLP_.get() != 0); IsComputed_ = true; AddAndResetStartTime("computation of the preconditioner for C", true); if (verbose_) { std::cout << std::endl; std::cout << "Total CPU time for construction (all included) = "; std::cout << TotalCPUTime() << std::endl; ML_print_line("=",78); } return(0); }