//EpetraCrsMatrix_To_TpetraCrsMatrix: copies Epetra_CrsMatrix to its analogous Tpetra_CrsMatrix Teuchos::RCP<Tpetra_CrsMatrix> Petra::EpetraCrsMatrix_To_TpetraCrsMatrix(const Epetra_CrsMatrix& epetraCrsMatrix_, const Teuchos::RCP<const Teuchos::Comm<int> >& commT_) { //get row map of Epetra::CrsMatrix & convert to Tpetra::Map auto tpetraRowMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.RowMap(), commT_); //get col map of Epetra::CrsMatrix & convert to Tpetra::Map auto tpetraColMap_ = EpetraMap_To_TpetraMap(epetraCrsMatrix_.ColMap(), commT_); //get CrsGraph of Epetra::CrsMatrix & convert to Tpetra::CrsGraph const Epetra_CrsGraph epetraCrsGraph_ = epetraCrsMatrix_.Graph(); std::size_t maxEntries = epetraCrsGraph_.GlobalMaxNumIndices(); Teuchos::RCP<Tpetra_CrsGraph> tpetraCrsGraph_ = Teuchos::rcp(new Tpetra_CrsGraph(tpetraRowMap_, tpetraColMap_, maxEntries)); for (LO i=0; i<epetraCrsGraph_.NumMyRows(); i++) { LO NumEntries; LO *Indices; epetraCrsGraph_.ExtractMyRowView(i, NumEntries, Indices); tpetraCrsGraph_->insertLocalIndices(i, NumEntries, Indices); } tpetraCrsGraph_->fillComplete(); //convert Epetra::CrsMatrix to Tpetra::CrsMatrix, after creating Tpetra::CrsMatrix based on above Tpetra::CrsGraph Teuchos::RCP<Tpetra_CrsMatrix> tpetraCrsMatrix_ = Teuchos::rcp(new Tpetra_CrsMatrix(tpetraCrsGraph_)); tpetraCrsMatrix_->setAllToScalar(0.0); for (LO i=0; i<epetraCrsMatrix_.NumMyRows(); i++) { LO NumEntries; LO *Indices; ST *Values; epetraCrsMatrix_.ExtractMyRowView(i, NumEntries, Values, Indices); tpetraCrsMatrix_->replaceLocalValues(i, NumEntries, Values, Indices); } tpetraCrsMatrix_->fillComplete(); return tpetraCrsMatrix_; }
bool probing_test(Epetra_CrsMatrix & in_mat, bool build_list){ const Epetra_CrsGraph & graph=in_mat.Graph(); Teuchos::ParameterList main, zoltan; if(build_list){ // zoltan.set("DISTANCE","2"); main.set("ZOLTAN",zoltan); } Isorropia::Epetra::Prober prober(Teuchos::rcp<const Epetra_CrsGraph>(&graph,false),main); Epetra_CrsMatrix out_mat(Copy,graph); int rv=prober.probe(in_mat,out_mat); if(rv!=0) {printf("ERROR: probing failed\n");return false;} #ifdef HAVE_EPETRAEXT EpetraExt::MatrixMatrix::Add(in_mat,false,1,out_mat,-1); double nrm=out_mat.NormInf()/in_mat.NormInf(); if(!in_mat.Comm().MyPID()) printf("diff norm = %22.16e\n",nrm); if(nrm < 1e-12) return true; else return false; #endif return true; }
// Performs any setup that can be done once to reduce the cost of the applyInverse function int applyInverseSetup(Epetra_CrsMatrix &A, Ifpack_CrsRiluk * & M) { int LevelFill = 4; int Overlap = 0; Ifpack_IlukGraph * IlukGraph = new Ifpack_IlukGraph(A.Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); M = new Ifpack_CrsRiluk(A, *IlukGraph); M->SetFlopCounter(A); assert(M->InitValues()==0); assert(M->Factor()==0); return(0); }
//========================================================================= // NOTE: This method should be removed and replaced with calls to Epetra_Util_ExtractHbData() int Epetra_LinearProblemRedistor::ExtractHbData(int & M, int & N, int & nz, int * & ptr, int * & ind, double * & val, int & Nrhs, double * & rhs, int & ldrhs, double * & lhs, int & ldlhs) const { Epetra_CrsMatrix * RedistMatrix = dynamic_cast<Epetra_CrsMatrix *>(RedistProblem_->GetMatrix()); if (RedistMatrix==0) EPETRA_CHK_ERR(-1); // This matrix is zero or not an Epetra_CrsMatrix if (!RedistMatrix->IndicesAreContiguous()) { // Data must be contiguous for this to work EPETRA_CHK_ERR(-2); } M = RedistMatrix->NumMyRows(); N = RedistMatrix->NumMyCols(); nz = RedistMatrix->NumMyNonzeros(); val = (*RedistMatrix)[0]; // Dangerous, but cheap and effective way to access first element in const Epetra_CrsGraph & RedistGraph = RedistMatrix->Graph(); ind = RedistGraph[0]; // list of values and indices Epetra_MultiVector * LHS = RedistProblem_->GetLHS(); Epetra_MultiVector * RHS = RedistProblem_->GetRHS(); Nrhs = RHS->NumVectors(); if (Nrhs>1) { if (!RHS->ConstantStride()) {EPETRA_CHK_ERR(-3)}; // Must have strided vectors if (!LHS->ConstantStride()) {EPETRA_CHK_ERR(-4)}; // Must have strided vectors } ldrhs = RHS->Stride(); rhs = (*RHS)[0]; // Dangerous but effective (again) ldlhs = LHS->Stride(); lhs = (*LHS)[0]; // Finally build ptr vector if (ptr_==0) { ptr_ = new int[M+1]; ptr_[0] = 0; for (int i=0; i<M; i++) ptr_[i+1] = ptr_[i] + RedistGraph.NumMyIndices(i); } ptr = ptr_; return(0); }
int check_graph_sharing(Epetra_Comm& Comm) { int numLocalElems = 5; int localProc = Comm.MyPID(); int firstElem = localProc*numLocalElems; int err; Epetra_Map map(-1, numLocalElems, 0, Comm); Epetra_CrsMatrix* A = new Epetra_CrsMatrix(Copy, map, 1); for (int i=0; i<numLocalElems; ++i) { int row = firstElem+i; int col = row; double val = 1.0; err = A->InsertGlobalValues(row, 1, &val, &col); if (err != 0) { cerr << "A->InsertGlobalValues("<<row<<") returned err="<<err<<endl; return(err); } } A->FillComplete(false); Epetra_CrsMatrix B(Copy, A->Graph()); delete A; for (int i=0; i<numLocalElems; ++i) { int row = firstElem+i; int col = row; double val = 1.0; err = B.ReplaceGlobalValues(row, 1, &val, &col); if (err != 0) { cerr << "B.InsertGlobalValues("<<row<<") returned err="<<err<<endl; return(err); } } return(0); }
/*----------------------------------------------------------------------* | Constructor (public) m.gee 12/04| | IMPORTANT: | | No matter on which level we are here, the vector xfine is ALWAYS | | a fine grid vector here! | *----------------------------------------------------------------------*/ ML_NOX::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel(int level, int nlevel, int plevel, ML* ml, ML_Aggregate* ag, Epetra_CrsMatrix** P, ML_NOX::Ml_Nox_Fineinterface& interface, const Epetra_Comm& comm, const Epetra_Vector& xfine, double fd_alpha, double fd_beta, bool fd_centered, bool isDiagonalOnly, int bsize) : fineinterface_(interface), comm_(comm) { level_ = level; nlevel_ = nlevel; ml_printlevel_ = plevel; ml_ = ml; ag_ = ag; fd_alpha_ = fd_alpha; fd_beta_ = fd_beta; fd_centered_ = fd_centered; isDiagonalOnly_ = isDiagonalOnly; A_ = 0; coarseinterface_= 0; bsize_ = bsize; // we need the graph of the operator on this level. On the fine grid we can just ask the // fineinterface for it, on the coarser levels it has to be extracted from the ML-hierachy if (level_==0) { // the Epetra_CrsGraph-copy-constructor shares data with the original one. // We want a really deep copy here so we cannot use it // graph_ will be given to the FiniteDifferencing class and will be destroyed by it graph_ = ML_NOX::deepcopy_graph(interface.getGraph()); } else { // Note that ML has no understanding of global indices, so it makes up new GIDs // (This also holds for the Prolongators P) Epetra_CrsMatrix* tmpMat = 0; int maxnnz = 0; double cputime = 0.0; ML_Operator2EpetraCrsMatrix(&(ml_->Amat[level_]), tmpMat, maxnnz, false, cputime); // copy the graph double t0 = GetClock(); graph_ = ML_NOX::deepcopy_graph(&(tmpMat->Graph())); // delete the copy of the Epetra_CrsMatrix if (tmpMat) delete tmpMat; tmpMat = 0; double t1 = GetClock(); if (ml_printlevel_ > 0 && 0 == comm_.MyPID()) cout << "matrixfreeML (level " << level_ << "): extraction/copy of Graph in " << cputime+t1-t0 << " sec\n" << " max-nonzeros in Graph: " << maxnnz << "\n"; } // create this levels coarse interface coarseinterface_ = new ML_NOX::Nox_CoarseProblem_Interface(fineinterface_,level_,ml_printlevel_, P,&(graph_->RowMap()),nlevel_); // restrict the xfine-vector to this level Epetra_Vector* xthis = coarseinterface_->restrict_fine_to_this(xfine); if (!xthis) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: ML_Epetra::Nox_CoarseProblem_Interface::restrict_fine_to_this returned NULL on level " << level_ << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } Epetra_Vector* xc = new Epetra_Vector(graph_->RowMap(),false); // FIXME: after intesive testing, this test might be obsolet #if 0 bool samemap = xc->Map().PointSameAs(xthis->Map()); if (samemap) { #endif xc->Update(1.0,*xthis,0.0); #if 0 } else { cout << "**WRN** Maps are not equal in\n" << "**WRN** file/line: " << __FILE__ << "/" << __LINE__ << "\n"; // import the xthis vector in the Map that ML produced for graph_ Epetra_Import* importer = new Epetra_Import(graph_->RowMap(),xthis->Map()); int ierr = xc->Import(*xthis,*importer,Insert); if (ierr) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: export from xthis to xc returned err=" << ierr <<"\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } if (importer) delete importer; importer = 0; } #endif if (xthis) delete xthis; xthis = 0; // create the coloring of the graph if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Entering Coloring on level " << level_ << "\n"; fflush(stdout); } double t0 = GetClock(); colorMap_ = ML_NOX::ML_Nox_collapsedcoloring(graph_,bsize_,isDiagonalOnly,ml_printlevel_); if (!colorMap_) colorMap_ = ML_NOX::ML_Nox_standardcoloring(graph_,isDiagonalOnly); colorMapIndex_ = new EpetraExt::CrsGraph_MapColoringIndex(*colorMap_); colorcolumns_ = &(*colorMapIndex_)(*graph_); double t1 = GetClock(); if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Proc " << comm_.MyPID() <<" Coloring time is " << (t1-t0) << " sec\n"; fflush(stdout); } // construct the FiniteDifferenceColoring-Matrix if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Entering Construction FD-Operator on level " << level_ << "\n"; fflush(stdout); } t0 = GetClock(); #if 1 // FD-operator with coloring FD_ = new NOX::EpetraNew::FiniteDifferenceColoring(*coarseinterface_, *xc, *graph_, *colorMap_, *colorcolumns_, true, isDiagonalOnly_, fd_beta_,fd_alpha_); #else // FD-operator without coloring FD_ = new NOX::EpetraNew::FiniteDifference(*coarseinterface_, *xc, *graph_, fd_beta_,fd_alpha_); #endif // set differencing method if (fd_centered_) FD_->setDifferenceMethod(NOX::EpetraNew::FiniteDifferenceColoring::Centered); bool err = FD_->computeJacobian(*xc); if (err==false) { cout << "**ERR**: ML_NOX::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: NOX::Epetra::FiniteDifferenceColoring returned an error on level " << level_ << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } // print number of calls to the coarse interface if (ml_printlevel_>0 && comm_.MyPID()==0) cout << "matrixfreeML (level " << level_ << "): Calls to coarse-computeF in FD-Operator: " << coarseinterface_->numcallscomputeF() << "\n"; t1 = GetClock(); if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Proc " << comm_.MyPID() <<" colored Finite Differencing time is " << (t1-t0) << " sec\n"; fflush(stdout); } // get ref to computed Epetra_CrsMatrix A_ = dynamic_cast<Epetra_CrsMatrix*>(&(FD_->getUnderlyingMatrix())); // set counter for number of calls to the coarseinterface_->computeF back to zero coarseinterface_->resetnumcallscomputeF(); // tidy up if (xc) delete xc; xc = 0; return; }
/*----------------------------------------------------------------------* | recreate this level (public) m.gee 01/05| | this function assumes, that the graph of the fine level problem has | | not changed since call to the constructor and therefore | | the graph and it's coloring do not have to be recomputed | | IMPORTANT: | | No matter on which level we are here, the vector xfine is ALWAYS | | a fine grid vector here! | *----------------------------------------------------------------------*/ bool ML_NOX::ML_Nox_MatrixfreeLevel::recreateLevel(int level, int nlevel, int plevel, ML* ml, ML_Aggregate* ag, Epetra_CrsMatrix** P, ML_NOX::Ml_Nox_Fineinterface& interface, const Epetra_Comm& comm, const Epetra_Vector& xfine) { // make some tests if (level != level_) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::recreateLevel:\n" << "**ERR**: level_ " << level_ << " not equal level " << level << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } if (nlevel != nlevel_) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::recreateLevel:\n" << "**ERR**: nlevel_ " << nlevel_ << " not equal nlevel " << nlevel << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } // printlevel might have changed ml_printlevel_ = plevel; ml_ = ml; ag_ = ag; destroyP(); // safer to use the new Ps setP(NULL); // we need the graph of the operator on this level. On the fine grid we can just ask the // fineinterface for it, on the coarser levels it has to be extracted from the ML-hierachy bool same; if (level_==0) { const Epetra_CrsGraph* graph = interface.getGraph(); // check whether the old graph matches the new one same = compare_graphs(graph,graph_); destroyFD(); // we are here to recompute the FD-operator (this destroys graph_) graph_ = ML_NOX::deepcopy_graph(graph); } else { // Note that ML has no understanding of global indices, so it makes up new GIDs // (This also holds for the Prolongators P) Epetra_CrsMatrix* tmpMat = 0; int maxnnz = 0; double cputime = 0.0; ML_Operator2EpetraCrsMatrix(&(ml_->Amat[level_]), tmpMat, maxnnz, false, cputime); // get a view from the graph const Epetra_CrsGraph& graph = tmpMat->Graph(); // compare the graph to the existing one same = compare_graphs(&graph,graph_); destroyFD(); // we are here to recompute the FD-operator (this destroys graph_) double t0 = GetClock(); graph_ = ML_NOX::deepcopy_graph(&graph); // delete the copy of the Epetra_CrsMatrix if (tmpMat) delete tmpMat; tmpMat = 0; double t1 = GetClock(); if (ml_printlevel_ > 0 && 0 == comm_.MyPID()) cout << "matrixfreeML (level " << level_ << "): extraction/copy of Graph in " << cputime+t1-t0 << " sec\n" << " max-nonzeros in Graph: " << maxnnz << "\n"; } // recreate this levels coarse interface if (same) coarseinterface_->recreate(ml_printlevel_,P,&(graph_->RowMap())); else { delete coarseinterface_; coarseinterface_ = new ML_NOX::Nox_CoarseProblem_Interface(fineinterface_,level_,ml_printlevel_, P,&(graph_->RowMap()),nlevel_); } // restrict the xfine-vector to this level Epetra_Vector* xthis = coarseinterface_->restrict_fine_to_this(xfine); if (!xthis) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: ML_Epetra::Nox_CoarseProblem_Interface::restrict_fine_to_this returned NULL on level " << level_ << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } Epetra_Vector* xc = new Epetra_Vector(graph_->RowMap(),false); // FIXME: after intesive testing, this test might be obsolet #if 0 bool samemap = xc->Map().PointSameAs(xthis->Map()); if (samemap) { #endif xc->Update(1.0,*xthis,0.0); #if 0 } else { cout << "**WRN** Maps are not equal in\n" << "**WRN** file/line: " << __FILE__ << "/" << __LINE__ << "\n"; // import the xthis vector in the Map that ML produced for graph_ Epetra_Import* importer = new Epetra_Import(graph_->RowMap(),xthis->Map()); int ierr = xc->Import(*xthis,*importer,Insert); if (ierr) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: export from xthis to xc returned err=" << ierr <<"\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } if (importer) delete importer; importer = 0; } #endif if (xthis) delete xthis; xthis = 0; // create the coloring of the graph if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Entering Recoloring on level " << level_ << "\n"; fflush(stdout); } double t0 = GetClock(); if (!same) // te graph has obviously changed, so we need to recolor { if (colorMap_) delete colorMap_; colorMap_ = 0; if (colorMapIndex_) delete colorMapIndex_; colorMapIndex_ = 0; if (colorcolumns_) delete colorcolumns_; colorcolumns_ = 0; colorMap_ = ML_NOX::ML_Nox_collapsedcoloring(graph_,bsize_,isDiagonalOnly_,ml_printlevel_); if (!colorMap_) colorMap_ = ML_NOX::ML_Nox_standardcoloring(graph_,isDiagonalOnly_); colorMapIndex_ = new EpetraExt::CrsGraph_MapColoringIndex(*colorMap_); colorcolumns_ = &(*colorMapIndex_)(*graph_); } else if (ml_printlevel_>0 && comm_.MyPID()==0) cout << "matrixfreeML (level " << level_ << "): Reusing existing Coloring on level " << level_ << "\n"; double t1 = GetClock(); if (ml_printlevel_>5) { cout << "matrixfreeML (level " << level_ << "): Proc " << comm_.MyPID() <<" (Re)Coloring time is " << (t1-t0) << " sec\n"; fflush(stdout); } #if 0 // print the colorMap_ if (comm_.MyPID()==0) cout << "colorMap_\n"; cout << *colorMap_; for (int i=0; i<colorcolumns_->size(); i++) { if (comm_.MyPID()==0) cout << "the " << i << " th colorcolumn_ - vector\n"; cout << colorcolumns_->operator[](i); } #endif // construct the FiniteDifferenceColoring-Matrix if (ml_printlevel_>0 && comm_.MyPID()==0) { cout << "matrixfreeML (level " << level_ << "): Entering Construction FD-Operator on level " << level_ << "\n"; fflush(stdout); } t0 = GetClock(); #if 1 // FD-operator with coloring (see the #if 1 in ml_nox_matrixfreelevel.H as well!) FD_ = new NOX::EpetraNew::FiniteDifferenceColoring(*coarseinterface_, *xc, *graph_, *colorMap_, *colorcolumns_, true, isDiagonalOnly_, fd_beta_,fd_alpha_); #else // FD-operator without coloring FD_ = new NOX::EpetraNew::FiniteDifference(*coarseinterface_, *xc, *graph_, fd_beta_,fd_alpha_); #endif // set differencing method if (fd_centered_) { FD_->setDifferenceMethod(NOX::EpetraNew::FiniteDifferenceColoring::Centered); } bool err = FD_->computeJacobian(*xc); if (err==false) { cout << "**ERR**: ML_Epetra::ML_Nox_MatrixfreeLevel::ML_Nox_MatrixfreeLevel:\n" << "**ERR**: NOX::Epetra::FiniteDifferenceColoring returned an error on level " << level_ << "\n" << "**ERR**: file/line: " << __FILE__ << "/" << __LINE__ << "\n"; throw -1; } t1 = GetClock(); if (ml_printlevel_>5) cout << "matrixfreeML (level " << level_ << "): Proc " << comm_.MyPID() <<" Finite Differencing operator constr. in " << (t1-t0) << " sec\n"; // get ref to computed Epetra_CrsMatrix A_ = dynamic_cast<Epetra_CrsMatrix*>(&(FD_->getUnderlyingMatrix())); // print number of calls to the coarse interface if (ml_printlevel_>5 && comm_.MyPID()==0) cout << "matrixfreeML (level " << level_ << "): Calls to coarse-computeF in FD-Operator: " << coarseinterface_->numcallscomputeF() << "\n"; // set counter for number of calls to the coarseinterface_->computeF back to zero coarseinterface_->resetnumcallscomputeF(); // tidy up if (xc) delete xc; xc = 0; return true; }
//============================================================================= int Amesos_Dscpack::PerformSymbolicFactorization() { ResetTimer(0); ResetTimer(1); MyPID_ = Comm().MyPID(); NumProcs_ = Comm().NumProc(); Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ; const Epetra_MpiComm& comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm()); int numrows = RowMatrixA->NumGlobalRows(); int numentries = RowMatrixA->NumGlobalNonzeros(); Teuchos::RCP<Epetra_CrsGraph> Graph; Epetra_CrsMatrix* CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA); if (CastCrsMatrixA) { Graph = Teuchos::rcp(const_cast<Epetra_CrsGraph*>(&(CastCrsMatrixA->Graph())), false); } else { int MaxNumEntries = RowMatrixA->MaxNumEntries(); Graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, OriginalMap, MaxNumEntries)); std::vector<int> Indices(MaxNumEntries); std::vector<double> Values(MaxNumEntries); for (int i = 0 ; i < RowMatrixA->NumMyRows() ; ++i) { int NumEntries; RowMatrixA->ExtractMyRowCopy(i, MaxNumEntries, NumEntries, &Values[0], &Indices[0]); for (int j = 0 ; j < NumEntries ; ++j) Indices[j] = RowMatrixA->RowMatrixColMap().GID(Indices[j]); int GlobalRow = RowMatrixA->RowMatrixRowMap().GID(i); Graph->InsertGlobalIndices(GlobalRow, NumEntries, &Indices[0]); } Graph->FillComplete(); } // // Create a replicated map and graph // std::vector<int> AllIDs( numrows ) ; for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; Epetra_Map ReplicatedMap( -1, numrows, &AllIDs[0], 0, Comm()); Epetra_Import ReplicatedImporter(ReplicatedMap, OriginalMap); Epetra_CrsGraph ReplicatedGraph( Copy, ReplicatedMap, 0 ); AMESOS_CHK_ERR(ReplicatedGraph.Import(*Graph, ReplicatedImporter, Insert)); AMESOS_CHK_ERR(ReplicatedGraph.FillComplete()); // // Convert the matrix to Ap, Ai // std::vector <int> Replicates(numrows); std::vector <int> Ap(numrows + 1); std::vector <int> Ai(EPETRA_MAX(numrows, numentries)); for( int i = 0 ; i < numrows; i++ ) Replicates[i] = 1; int NumEntriesPerRow ; int *ColIndices = 0 ; int Ai_index = 0 ; for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { AMESOS_CHK_ERR( ReplicatedGraph.ExtractMyRowView( MyRow, NumEntriesPerRow, ColIndices ) ); Ap[MyRow] = Ai_index ; for ( int j = 0; j < NumEntriesPerRow; j++ ) { Ai[Ai_index] = ColIndices[j] ; Ai_index++; } } assert( Ai_index == numentries ) ; Ap[ numrows ] = Ai_index ; MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); ResetTimer(0); // // Call Dscpack Symbolic Factorization // int OrderCode = 2; std::vector<double> MyANonZ; NumLocalNonz = 0 ; GlobalStructNewColNum = 0 ; GlobalStructNewNum = 0 ; GlobalStructOwner = 0 ; LocalStructOldNum = 0 ; NumGlobalCols = 0 ; // MS // Have to define the maximum number of processes to be used // MS // This is only a suggestion as Dscpack uses a number of processes that is a power of 2 int NumGlobalNonzeros = GetProblem()->GetMatrix()->NumGlobalNonzeros(); int NumRows = GetProblem()->GetMatrix()->NumGlobalRows(); // optimal value for MaxProcs == -1 int OptNumProcs1 = 1+EPETRA_MAX( NumRows/10000, NumGlobalNonzeros/1000000 ); OptNumProcs1 = EPETRA_MIN(NumProcs_,OptNumProcs1 ); // optimal value for MaxProcs == -2 int OptNumProcs2 = (int)sqrt(1.0 * NumProcs_); if( OptNumProcs2 < 1 ) OptNumProcs2 = 1; // fix the value of MaxProcs switch (MaxProcs_) { case -1: MaxProcs_ = EPETRA_MIN(OptNumProcs1, NumProcs_); break; case -2: MaxProcs_ = EPETRA_MIN(OptNumProcs2, NumProcs_); break; case -3: MaxProcs_ = NumProcs_; break; } #if 0 if (MyDscRank>=0 && A_and_LU_built) { DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject); } #endif // if ( ! A_and_LU_built ) { // DSC_End( PrivateDscpackData_->MyDSCObject ) ; // PrivateDscpackData_->MyDSCObject = DSC_Begin() ; // } // MS // here I continue with the old code... OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); DscNumProcs = 1 ; int DscMax = DSC_Analyze( numrows, &Ap[0], &Ai[0], &Replicates[0] ); while ( DscNumProcs * 2 <=EPETRA_MIN( MaxProcs_, DscMax ) ) DscNumProcs *= 2 ; MyDscRank = -1; DSC_Open0( PrivateDscpackData_->MyDSCObject_, DscNumProcs, &MyDscRank, comm1.Comm()) ; NumLocalCols = 0 ; // This is for those processes not in the Dsc grid if ( MyDscRank >= 0 ) { assert( MyPID_ == MyDscRank ) ; AMESOS_CHK_ERR( DSC_Order ( PrivateDscpackData_->MyDSCObject_, OrderCode, numrows, &Ap[0], &Ai[0], &Replicates[0], &NumGlobalCols, &NumLocalStructs, &NumLocalCols, &NumLocalNonz, &GlobalStructNewColNum, &GlobalStructNewNum, &GlobalStructOwner, &LocalStructOldNum ) ) ; assert( NumGlobalCols == numrows ) ; assert( NumLocalCols == NumLocalStructs ) ; } if ( MyDscRank >= 0 ) { int MaxSingleBlock; const int Limit = 5000000 ; // Memory Limit set to 5 Terabytes AMESOS_CHK_ERR( DSC_SFactor ( PrivateDscpackData_->MyDSCObject_, &TotalMemory_, &MaxSingleBlock, Limit, DSC_LBLAS3, DSC_DBLAS2 ) ) ; } // A_and_LU_built = true; // If you uncomment this, TestOptions fails SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0); return(0); }
int MyCreateCrsMatrix( char *in_filename, const Epetra_Comm &Comm, Epetra_Map *& readMap, const bool transpose, const bool distribute, bool& symmetric, Epetra_CrsMatrix *& Matrix ) { Epetra_CrsMatrix * readA = 0; Epetra_Vector * readx = 0; Epetra_Vector * readb = 0; Epetra_Vector * readxexact = 0; // // This hack allows TestOptions to be run from either the test/TestOptions/ directory or from // the test/ directory (as it is in nightly testing and in make "run-tests") // FILE *in_file = fopen( in_filename, "r"); char *filename; if (in_file == NULL ) filename = &in_filename[1] ; // Strip off ithe "." from // "../" and try again else { filename = in_filename ; fclose( in_file ); } symmetric = false ; std::string FileName = filename ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); if ( LastFiveBytes == ".triU" ) { // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( filename, false, Comm, readMap, readA, readx, readb, readxexact) ); symmetric = false; } else { if ( LastFiveBytes == ".triS" ) { // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( filename, true, Comm, readMap, readA, readx, readb, readxexact) ); symmetric = true; } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( filename, Comm, readMap, readA, readx, readb, readxexact) ); FILE* in_file = fopen( filename, "r"); assert (in_file != NULL) ; // Checked in Trilinos_Util_CountMatrixMarket() const int BUFSIZE = 800 ; char buffer[BUFSIZE] ; fgets( buffer, BUFSIZE, in_file ) ; // Pick symmetry info off of this string std::string headerline1 = buffer; #ifdef TFLOP if ( headerline1.find("symmetric") < BUFSIZE ) symmetric = true; #else if ( headerline1.find("symmetric") != std::string::npos) symmetric = true; #endif fclose(in_file); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( filename, Comm, readMap, readA, readx, readb, readxexact) ; if ( LastFourBytes == ".rsa" ) symmetric = true ; } } } if ( readb ) delete readb; if ( readx ) delete readx; if ( readxexact ) delete readxexact; Epetra_CrsMatrix *serialA ; Epetra_CrsMatrix *transposeA; if ( transpose ) { transposeA = new Epetra_CrsMatrix( Copy, *readMap, 0 ); assert( CrsMatrixTranspose( readA, transposeA ) == 0 ); serialA = transposeA ; delete readA; readA = 0 ; } else { serialA = readA ; } assert( (void *) &serialA->Graph() ) ; assert( (void *) &serialA->RowMap() ) ; assert( serialA->RowMap().SameAs(*readMap) ) ; if ( distribute ) { // Create uniform distributed map Epetra_Map DistMap(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter( *readMap, DistMap ); Epetra_CrsMatrix *Amat = new Epetra_CrsMatrix( Copy, DistMap, 0 ); Amat->Export(*serialA, exporter, Add); assert(Amat->FillComplete()==0); Matrix = Amat; // // Make sure that deleting Amat->RowMap() will delete map // // Bug: We can't manage to delete map his way anyway, // and this fails on tranposes, so for now I just accept // the memory loss. // assert( &(Amat->RowMap()) == map ) ; delete readMap; readMap = 0 ; delete serialA; } else { Matrix = serialA; } return 0; }
//========================================================================= RowMatrix_Transpose::NewTypeRef RowMatrix_Transpose:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; int i, j, err; if( !TransposeRowMap_ ) { if( IgnoreNonLocalCols_ ) TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorRangeMap()); // Should be replaced with refcount = else TransposeRowMap_ = (Epetra_Map *) &(orig.OperatorDomainMap()); // Should be replaced with refcount = } // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if // possible (because we can then use a View of the matrix and graph, which is much cheaper). // First get the local indices to count how many nonzeros will be in the // transpose graph on each processor Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix*>(&orig); OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked NumMyRows_ = orig.NumMyRows(); NumMyCols_ = orig.NumMyCols(); TransNumNz_ = new int[NumMyCols_]; TransIndices_ = new int*[NumMyCols_]; TransValues_ = new double*[NumMyCols_]; TransMyGlobalEquations_ = new int[NumMyCols_]; int NumIndices; if (OrigMatrixIsCrsMatrix_) { const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { err = OrigGraph.ExtractMyRowView(i, NumIndices, Indices_); // Get view of ith row if (err != 0) throw OrigGraph.ReportError("ExtractMyRowView failed",err); for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } else // Original is not a CrsMatrix { MaxNumEntries_ = 0; int NumEntries; for (i=0; i<NumMyRows_; i++) { orig.NumMyRowEntries(i, NumEntries); MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries); } Indices_ = new int[MaxNumEntries_]; Values_ = new double[MaxNumEntries_]; for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); if (err != 0) { std::cerr << "ExtractMyRowCopy failed."<<std::endl; throw err; } for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } // Most of remaining code is common to both cases for(i=0; i<NumMyCols_; i++) { NumIndices = TransNumNz_[i]; if (NumIndices>0) { TransIndices_[i] = new int[NumIndices]; TransValues_[i] = new double[NumIndices]; } } // Now copy values and global indices into newly create transpose storage for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter for (i=0; i<NumMyRows_; i++) { if (OrigMatrixIsCrsMatrix_) err = OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_); else err = orig.ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_); if (err != 0) { std::cerr << "ExtractMyRowCopy failed."<<std::endl; throw err; } int ii = orig.RowMatrixRowMap().GID(i); for (j=0; j<NumIndices; j++) { int TransRow = Indices_[j]; int loc = TransNumNz_[TransRow]; TransIndices_[TransRow][loc] = ii; TransValues_[TransRow][loc] = Values_[j]; ++TransNumNz_[TransRow]; // increment counter into current transpose row } } // Build Transpose matrix with some rows being shared across processors. // We will use a view here since the matrix will not be used for anything else const Epetra_Map & TransMap = orig.RowMatrixColMap(); Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_); TransMap.MyGlobalElements(TransMyGlobalEquations_); for (i=0; i<NumMyCols_; i++) { err = TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], TransNumNz_[i], TransValues_[i], TransIndices_[i]); if (err < 0) throw TempTransA1.ReportError("InsertGlobalValues failed.",err); } // Note: The following call to FillComplete is currently necessary because // some global constants that are needed by the Export () are computed in this routine err = TempTransA1.FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_, false); if (err != 0) { throw TempTransA1.ReportError("FillComplete failed.",err); } // Now that transpose matrix with shared rows is entered, create a new matrix that will // get the transpose with uniquely owned rows (using the same row distribution as A). if( IgnoreNonLocalCols_ ) TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_, *TransposeRowMap_, 0); else TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0); // Create an Export object that will move TempTransA around TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_); err = TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add); if (err != 0) throw TransposeMatrix_->ReportError("Export failed.",err); err = TransposeMatrix_->FillComplete(orig.OperatorRangeMap(),*TransposeRowMap_); if (err != 0) throw TransposeMatrix_->ReportError("FillComplete failed.",err); if (MakeDataContiguous_) { err = TransposeMatrix_->MakeDataContiguous(); if (err != 0) throw TransposeMatrix_->ReportError("MakeDataContiguous failed.",err); } newObj_ = TransposeMatrix_; return *newObj_; }
int check(Epetra_CrsMatrix& A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int* MyGlobalElements, bool verbose) { (void)MyGlobalElements; int ierr = 0, forierr = 0; int NumGlobalIndices; int NumMyIndices; int* MyViewIndices = 0; int* GlobalViewIndices = 0; double* MyViewValues = 0; double* GlobalViewValues = 0; int MaxNumIndices = A.Graph().MaxNumIndices(); int* MyCopyIndices = new int[MaxNumIndices]; int* GlobalCopyIndices = new int[MaxNumIndices]; double* MyCopyValues = new double[MaxNumIndices]; double* GlobalCopyValues = new double[MaxNumIndices]; // Test query functions int NumMyRows = A.NumMyRows(); if (verbose) cout << "\n\nNumber of local Rows = " << NumMyRows << endl<< endl; EPETRA_TEST_ERR(!(NumMyRows==NumMyRows1),ierr); int NumMyNonzeros = A.NumMyNonzeros(); if (verbose) cout << "\n\nNumber of local Nonzero entries = " << NumMyNonzeros << endl<< endl; EPETRA_TEST_ERR(!(NumMyNonzeros==NumMyNonzeros1),ierr); int NumGlobalRows = A.NumGlobalRows(); if (verbose) cout << "\n\nNumber of global Rows = " << NumGlobalRows << endl<< endl; EPETRA_TEST_ERR(!(NumGlobalRows==NumGlobalRows1),ierr); int NumGlobalNonzeros = A.NumGlobalNonzeros(); if (verbose) cout << "\n\nNumber of global Nonzero entries = " << NumGlobalNonzeros << endl<< endl; EPETRA_TEST_ERR(!(NumGlobalNonzeros==NumGlobalNonzeros1),ierr); // GlobalRowView should be illegal (since we have local indices) EPETRA_TEST_ERR(!(A.ExtractGlobalRowView(A.RowMap().MaxMyGID(), NumGlobalIndices, GlobalViewValues, GlobalViewIndices)==-2),ierr); // Other binary tests EPETRA_TEST_ERR(A.NoDiagonal(),ierr); EPETRA_TEST_ERR(!(A.Filled()),ierr); EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MaxMyGID())),ierr); EPETRA_TEST_ERR(!(A.MyGRID(A.RowMap().MinMyGID())),ierr); EPETRA_TEST_ERR(A.MyGRID(1+A.RowMap().MaxMyGID()),ierr); EPETRA_TEST_ERR(A.MyGRID(-1+A.RowMap().MinMyGID()),ierr); EPETRA_TEST_ERR(!(A.MyLRID(0)),ierr); EPETRA_TEST_ERR(!(A.MyLRID(NumMyRows-1)),ierr); EPETRA_TEST_ERR(A.MyLRID(-1),ierr); EPETRA_TEST_ERR(A.MyLRID(NumMyRows),ierr); forierr = 0; for (int i = 0; i < NumMyRows; i++) { int Row = A.GRID(i); A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyValues, GlobalCopyIndices); A.ExtractMyRowView(i, NumMyIndices, MyViewValues, MyViewIndices); // this is where the problem comes from forierr += !(NumGlobalIndices == NumMyIndices); for(int j = 1; j < NumMyIndices; j++) { forierr += !(MyViewIndices[j-1] < MyViewIndices[j]); // this is where the test fails } for(int j = 0; j < NumGlobalIndices; j++) { forierr += !(GlobalCopyIndices[j] == A.GCID(MyViewIndices[j])); forierr += !(A.LCID(GlobalCopyIndices[j]) == MyViewIndices[j]); forierr += !(GlobalCopyValues[j] == MyViewValues[j]); } } EPETRA_TEST_ERR(forierr,ierr); forierr = 0; for (int i = 0; i < NumMyRows; i++) { int Row = A.GRID(i); A.ExtractGlobalRowCopy(Row, MaxNumIndices, NumGlobalIndices, GlobalCopyValues, GlobalCopyIndices); A.ExtractMyRowCopy(i, MaxNumIndices, NumMyIndices, MyCopyValues, MyCopyIndices); forierr += !(NumGlobalIndices == NumMyIndices); for (int j = 1; j < NumMyIndices; j++) forierr += !(MyCopyIndices[j-1] < MyCopyIndices[j]); for (int j = 0; j < NumGlobalIndices; j++) { forierr += !(GlobalCopyIndices[j] == A.GCID(MyCopyIndices[j])); forierr += !(A.LCID(GlobalCopyIndices[j]) == MyCopyIndices[j]); forierr += !(GlobalCopyValues[j] == MyCopyValues[j]); } } EPETRA_TEST_ERR(forierr,ierr); delete [] MyCopyIndices; delete [] GlobalCopyIndices; delete [] MyCopyValues; delete [] GlobalCopyValues; if (verbose) cout << "\n\nRows sorted check OK" << endl<< endl; return (ierr); }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; bool verbose1 = true; if (MyPID==0) verbose = true; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl << "where:" << endl << "HB_filename - filename and path of a Harwell-Boeing data set" << endl << "level_fill - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl << "level_overlap - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl << "To specify a non-default value for one of these parameters, you must specify all" << endl << " preceding values but not any subsequent parameters. Example:" << endl << "ifpackHpcSerialMsr.exe mymatrix.hpc 1 - loads mymatrix.hpc, uses level fill of one, all other values are defaults" << endl << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); bool smallProblem = false; if (A.RowMap().NumGlobalElements()<100) smallProblem = true; if (smallProblem) cout << "Original Matrix = " << endl << A << endl; x.PutScalar(0.0); Epetra_LinearProblem FullProblem(&A, &x, &b); double normb, norma; b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; Epetra_Time ReductionTimer(Comm); Epetra_CrsSingletonFilter SingletonFilter; Comm.Barrier(); double reduceInitTime = ReductionTimer.ElapsedTime(); SingletonFilter.Analyze(&A); Comm.Barrier(); double reduceAnalyzeTime = ReductionTimer.ElapsedTime() - reduceInitTime; if (SingletonFilter.SingletonsDetected()) cout << "Singletons found" << endl; else { cout << "Singletons not found" << endl; exit(1); } SingletonFilter.ConstructReducedProblem(&FullProblem); Comm.Barrier(); double reduceConstructTime = ReductionTimer.ElapsedTime() - reduceInitTime; double totalReduceTime = ReductionTimer.ElapsedTime(); if (verbose) cout << "\n\n****************************************************" << endl << " Reduction init time (sec) = " << reduceInitTime<< endl << " Reduction Analyze time (sec) = " << reduceAnalyzeTime << endl << " Construct Reduced Problem time (sec) = " << reduceConstructTime << endl << " Reduction Total time (sec) = " << totalReduceTime << endl<< endl; Statistics(SingletonFilter); Epetra_LinearProblem * ReducedProblem = SingletonFilter.ReducedProblem(); Epetra_CrsMatrix * Ap = dynamic_cast<Epetra_CrsMatrix *>(ReducedProblem->GetMatrix()); Epetra_Vector * bp = (*ReducedProblem->GetRHS())(0); Epetra_Vector * xp = (*ReducedProblem->GetLHS())(0); if (smallProblem) cout << " Reduced Matrix = " << endl << *Ap << endl << " LHS before sol = " << endl << *xp << endl << " RHS = " << endl << *bp << endl; // Construct ILU preconditioner double elapsed_time, total_flops, MFLOPs; Epetra_Time timer(Comm); int LevelFill = 0; if (argc > 2) LevelFill = atoi(argv[2]); if (verbose) cout << "Using Level Fill = " << LevelFill << endl; int Overlap = 0; if (argc > 3) Overlap = atoi(argv[3]); if (verbose) cout << "Using Level Overlap = " << Overlap << endl; double Athresh = 0.0; if (argc > 4) Athresh = atof(argv[4]); if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl; double Rthresh = 1.0; if (argc > 5) Rthresh = atof(argv[5]); if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl; Ifpack_IlukGraph * IlukGraph = 0; Ifpack_CrsRiluk * ILUK = 0; if (LevelFill>-1) { elapsed_time = timer.ElapsedTime(); IlukGraph = new Ifpack_IlukGraph(Ap->Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl; Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); ILUK = new Ifpack_CrsRiluk(*IlukGraph); ILUK->SetFlopCounter(fact_counter); ILUK->SetAbsoluteThreshold(Athresh); ILUK->SetRelativeThreshold(Rthresh); //assert(ILUK->InitValues()==0); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; //cout << *ILUK << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } int Maxiter = 100; double Tolerance = 1.0E-8; Epetra_Flops counter; Ap->SetFlopCounter(counter); xp->SetFlopCounter(*Ap); bp->SetFlopCounter(*Ap); if (ILUK!=0) ILUK->SetFlopCounter(*Ap); elapsed_time = timer.ElapsedTime(); double normreducedb, normreduceda; bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; Epetra_Vector resid(x); resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); double normx, normxexact; x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (verbose) cout << "********************************************************" << endl << " Solving again with 2*Ax=2*b" << endl << "********************************************************" << endl; A.Scale(1.0); // A = 2*A b.Scale(1.0); // b = 2*b x.PutScalar(0.0); b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; double updateReducedProblemTime = ReductionTimer.ElapsedTime(); SingletonFilter.UpdateReducedProblem(&FullProblem); Comm.Barrier(); updateReducedProblemTime = ReductionTimer.ElapsedTime() - updateReducedProblemTime; if (verbose) cout << "\n\n****************************************************" << endl << " Update Reduced Problem time (sec) = " << updateReducedProblemTime<< endl << "****************************************************" << endl; Statistics(SingletonFilter); if (LevelFill>-1) { Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (ILUK!=0) delete ILUK; if (IlukGraph!=0) delete IlukGraph; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }
// ============================================================================ int ML_Epetra::MatrixFreePreconditioner:: Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace) { Epetra_Time TotalTime(Comm()); const int NullSpaceDim = NullSpace.NumVectors(); // get parameters from the list std::string PrecType = List_.get("prec: type", "hybrid"); std::string SmootherType = List_.get("smoother: type", "Jacobi"); std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN"); int PolynomialDegree = List_.get("smoother: degree", 3); std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN"); int MaximumIterations = List_.get("eigen-analysis: max iters", 10); std::string EigenType_ = List_.get("eigen-analysis: type", "cg"); double boost = List_.get("eigen-analysis: boost for lambda max", 1.0); int OutputLevel = List_.get("ML output", -47); if (OutputLevel == -47) OutputLevel = List_.get("output", 10); omega_ = List_.get("smoother: damping", omega_); ML_Set_PrintLevel(OutputLevel); bool LowMemory = List_.get("low memory", true); double AllocationFactor = List_.get("AP allocation factor", 0.5); verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5); // ================ // // check parameters // // ================ // if (PrecType == "presmoother only") PrecType_ = ML_MFP_PRESMOOTHER_ONLY; else if (PrecType == "hybrid") PrecType_ = ML_MFP_HYBRID; else if (PrecType == "additive") PrecType_ = ML_MFP_ADDITIVE; else ML_CHK_ERR(-3); // not recognized if (SmootherType == "none") SmootherType_ = ML_MFP_NONE; else if (SmootherType == "Jacobi") SmootherType_ = ML_MFP_JACOBI; else if (SmootherType == "block Jacobi") SmootherType_ = ML_MFP_BLOCK_JACOBI; else if (SmootherType == "Chebyshev") SmootherType_ = ML_MFP_CHEBY; else ML_CHK_ERR(-4); // not recognized if (AllocationFactor <= 0.0) ML_CHK_ERR(-1); // should be positive // =============================== // // basic checkings and some output // // =============================== // int OperatorDomainPoints = Operator_.OperatorDomainMap().NumGlobalPoints(); int OperatorRangePoints = Operator_.OperatorRangeMap().NumGlobalPoints(); int GraphBlockRows = Graph.NumGlobalBlockRows(); int GraphNnz = Graph.NumGlobalNonzeros(); NumPDEEqns_ = OperatorRangePoints / GraphBlockRows; NumMyBlockRows_ = Graph.NumMyBlockRows(); if (OperatorDomainPoints != OperatorRangePoints) ML_CHK_ERR(-1); // only square matrices if (OperatorRangePoints % NumPDEEqns_ != 0) ML_CHK_ERR(-2); // num PDEs seems not constant if (verbose_) { ML_print_line("=",78); std::cout << "*** " << std::endl; std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl; std::cout << "***" << std::endl; std::cout << "Number of rows and columns = " << OperatorDomainPoints << std::endl; std::cout << "Number of rows per processor = " << OperatorDomainPoints / Comm().NumProc() << " (on average)" << std::endl; std::cout << "Number of rows in the graph = " << GraphBlockRows << std::endl; std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl; std::cout << "Processors used in computation = " << Comm().NumProc() << std::endl; std::cout << "Number of PDE equations = " << NumPDEEqns_ << std::endl; std::cout << "Null space dimension = " << NullSpaceDim << std::endl; std::cout << "Preconditioner type = " << PrecType << std::endl; std::cout << "Smoother type = " << SmootherType << std::endl; std::cout << "Coloring type = " << ColoringType << std::endl; std::cout << "Allocation factor = " << AllocationFactor << std::endl; std::cout << "Number of V-cycles for C = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl; std::cout << std::endl; } ResetStartTime(); // ==================================== // // compute the inverse of the diagonal, // // control that no elements are zero. // // ==================================== // for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i) if ((*InvPointDiagonal_)[i] != 0.0) (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i]; // ========================================================= // // Setup the smoother. I need to extract the block diagonal // // only if block Jacobi is used. For Chebyshev, I scale with // // the point diagonal only. In this latter case, I need to // // compute lambda_max of the scaled operator. // // ========================================================= // // probes for the block diagonal of the matrix. if (SmootherType_ == ML_MFP_JACOBI || SmootherType_ == ML_MFP_NONE) { // do-nothing here } else if (SmootherType_ == ML_MFP_BLOCK_JACOBI) { if (verbose_); std::cout << "Diagonal coloring type = " << DiagonalColoringType << std::endl; ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType)); AddAndResetStartTime("block diagonal construction", true); } else if (SmootherType_ == ML_MFP_CHEBY) { double lambda_min = 0.0; double lambda_max = 0.0; Teuchos::ParameterList IFPACKList; if (EigenType_ == "power-method") { ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_max)); } else if(EigenType_ == "cg") { ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_min, lambda_max)); } else ML_CHK_ERR(-1); // not recognized if (verbose_) { std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl; std::cout << "Estimating eigenvalues using " << EigenType_ << std::endl; std::cout << "lambda_min = " << lambda_min << ", "; std::cout << "lambda_max = " << lambda_max << std::endl; } IFPACKList.set("chebyshev: min eigenvalue", lambda_min); IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max); // FIXME: this allocates a new std::vector inside IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get()); IFPACKList.set("chebyshev: degree", PolynomialDegree); PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", true); ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PreSmoother_->Initialize()); ML_CHK_ERR(PreSmoother_->Compute()); PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", false); ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PostSmoother_->Initialize()); ML_CHK_ERR(PostSmoother_->Compute()); } // ========================================================= // // building P and R for block graph. This is done by working // // on the Graph_ object. Support is provided for local // // aggregation schemes only so that all is basically local. // // Then, build the block graph coarse problem. // // ========================================================= // // ML wrapper for Graph_ ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML); ML_Aggregate* BlockAggr_ML = 0; ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0; if (verbose_) std::cout << std::endl; ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, &CoarseGraph_ML)); if (verbose_) std::cout << std::endl; Epetra_CrsMatrix* GraphCoarse; ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse)); // used later to estimate the entries in AP ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML()); ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX); int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row; Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1); ML_Operator_Destroy(&CoarseAP_ML); int NumAggregates = BlockPtent_ML->invec_leng; ML_Operator_Destroy(&BlockRtent_ML); ML_Operator_Destroy(&CoarseGraph_ML); AddAndResetStartTime("construction of block C, R, and P", true); if (verbose_) std::cout << std::endl; // ================================================== // // coloring of block graph: // // - color of block row `i' is given by `ColorMap[i]' // // - number of colors is ColorMap.NumColors(). // // ================================================== // ResetStartTime(); CrsGraph_MapColoring* MapColoringTransform; if (ColoringType == "JONES_PLASSMAN") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN, 0, false, 0); else if (ColoringType == "PSEUDO_PARALLEL") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL, 0, false, 0); else if (ColoringType == "GREEDY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY, 0, false, 0); else if (ColoringType == "LUBY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY, 0, false, 0); else ML_CHK_ERR(-1); Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph())); // move the information from ColorMap to std::vector Colors const int NumColors = ColorMap->MaxNumColors(); RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows())); for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i) (*Colors)[i] = (*ColorMap)[i]; delete MapColoringTransform; delete ColorMap; ColorMap = 0; delete GraphCoarse; AddAndResetStartTime("coarse graph coloring", true); if (verbose_) std::cout << std::endl; // get some other information about the aggregates, to be used // in the QR factorization of the null space. NodesOfAggregate // contains the local ID of block rows contained in each aggregate. // FIXME: make it faster std::vector< std::vector<int> > NodesOfAggregate(NumAggregates); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; NodesOfAggregate[AID].push_back(i); } int MaxAggrSize = 0; for (int i = 0; i < NumAggregates; ++i) { const int& MySize = NodesOfAggregate[i].size(); if (MySize > MaxAggrSize) MaxAggrSize = MySize; } // collect aggregate information, and mark all nodes that are // connected with each aggregate. These nodes will have a possible // nonzero entry after the matrix-matrix product between the Operator_ // and the tentative prolongator. std::vector<vector<int> > aggregates(NumAggregates); std::vector<int>::iterator iter; for (int i = 0; i < NumAggregates; ++i) aggregates[i].reserve(MaxAggrSize); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; int NumEntries; int* Indices; Graph.ExtractMyRowView(i, NumEntries, Indices); for (int k = 0; k < NumEntries; ++k) { // FIXME: use hash?? const int& GCID = Graph.ColMap().GID(Indices[k]); iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID); if (iter == aggregates[AID].end()) aggregates[AID].push_back(GCID); } } int* BlockNodeList = Graph.ColMap().MyGlobalElements(); // finally get rid of the ML_Aggregate structure. ML_Aggregate_Destroy(&BlockAggr_ML); const Epetra_Map& FineMap = Operator_.OperatorDomainMap(); Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm()); RefCountPtr<Epetra_Map> BlockNodeListMap = rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(), BlockNodeList, 0, Comm())); std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_); for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i) for (int m = 0; m < NumPDEEqns_; ++m) NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m; RefCountPtr<Epetra_Map> NodeListMap = rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm())); AddAndResetStartTime("data structures", true); // ====================== // // process the null space // // ====================== // // CHECKME Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim); NewNullSpace.PutScalar(0.0); if (NullSpaceDim == 1) { double* ns_ptr = NullSpace.Values(); for (int AID = 0; AID < NumAggregates; ++AID) { double dtemp = 0.0; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) { const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; dtemp += (ns_ptr[pos] * ns_ptr[pos]); } dtemp = std::sqrt(dtemp); NewNullSpace[0][AID] = dtemp; dtemp = 1.0 / dtemp; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp; } } else { // FIXME std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_); std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim); std::vector<double> work(NullSpaceDim); int info; for (int AID = 0; AID < NumAggregates; ++AID) { int MySize = NodesOfAggregate[AID].size(); int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_; int lwork = NullSpaceDim; for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m]; DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); if (work[0] > lwork) work.resize((int) work[0]); // the upper triangle of qr_tmp is now R, so copy that into the // new nullspace for (int j = 0; j < NullSpaceDim; j++) for (int k = j; k < NullSpaceDim; k++) NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k]; // to get this block of P, need to run qr_tmp through another LAPACK // function: DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); // dgeqtr returned a non-zero if (work[0] > lwork) work.resize((int) work[0]); // insert the Q block into the null space for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) { int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m]; NullSpace[k][LRID] = val; } } } AddAndResetStartTime("null space setup", true); if (verbose_) std::cout << "Number of colors on processor " << Comm().MyPID() << " = " << NumColors << std::endl; if (verbose_) std::cout << "Maximum number of colors = " << NumColors << std::endl; RefCountPtr<Epetra_FECrsMatrix> AP; // try to get a good estimate of the nonzeros per row. // This is a compromize between efficiency -- that is, reduce // the memory allocation processes, and memory usage -- that, is // overestimating can actually kill the code. Basically, this is // all junk due to our dear friend, the Cray XT3. AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int) (AllocationFactor * AP_MaxNnzRow * NullSpaceDim))); if (AP.get() == 0) throw(-1); if (!LowMemory) { // ================================================= // // allocate one big chunk of memory, and use View // // to create Epetra_MultiVectors. Note that // // NumColors * NullSpace can indeed be a quite large // // value. To reduce the memory consumption, both // // ColoredAP and ExtColoredAP use the same memory // // array. // // ================================================= // Epetra_MultiVector* ColoredP; std::vector<double> ColoredAP_ptr; try { ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim); ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("the allocation of ColoredP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } int ColoredAP_LDA = NodeListMap->NumMyPoints(); ColoredP->PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0) ML_CHK_ERR(-1); assert (NumEntries == 1); // this is the block P const int& Color = (*Colors)[Indices] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } ML_Operator_Destroy(&BlockPtent_ML); Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); // move ColoredAP into ColoredP. This should not be required. // but I prefer to skip strange games with View pointers Operator_.Apply(*ColoredP, ColoredAP); *ColoredP = ColoredAP; // FIXME: only if NumProc > 1 Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); try { Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); ExtColoredAP.Import(*ColoredP, Importer, Insert); } catch (std::exception& rhs) { catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } delete ColoredP; AddAndResetStartTime("computation of AP", true); // populate the actual AP operator, skip some controls to make it faster for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { int GRID2 = GRID * NumPDEEqns_ + k; int GCID2 = GCID + j; AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val); //if (ierr < 0) ML_CHK_ERR(ierr); } } } } } else { // =============================================================== // // apply the operator one color at-a-time. This requires NumColors // // cycles over BlockPtent. However, the memory requirements are // // drastically reduced. As for low-memory == false, both ColoredAP // // and ExtColoredAP point to the same memory location. // // =============================================================== // if (verbose_) std::cout << "Using low-memory computation for AP" << std::endl; Epetra_MultiVector ColoredP(FineMap, NullSpaceDim); std::vector<double> ColoredAP_ptr; try { ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); for (int ic = 0; ic < NumColors; ++ic) { if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0) { if (ic % 20 == 0) std::cout << "Processing color " << flush; std::cout << ic << " " << flush; if (ic % 20 == 19 || ic == NumColors - 1) std::cout << std::endl; if (ic == NumColors - 1) std::cout << std::endl; } ColoredP.PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0 || // something strange in getrow NumEntries != 1) // this is the block P ML_CHK_ERR(-1); const int& Color = (*Colors)[Indices] - 1; if (Color != ic) continue; // skip this color for this cycle for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) ColoredP[j][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } Operator_.Apply(ColoredP, ColoredAP); ColoredP = ColoredAP; // just to be safe ExtColoredAP.Import(ColoredP, Importer, Insert); // populate the actual AP operator, skip some controls to make it faster std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_); std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_); for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; if (color != ic) continue; for (int k = 0; k < NumPDEEqns_; ++k) { int count = 0; int GRID2 = GRID * NumPDEEqns_ + k; for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { InsertCols[count] = GCID + j; InsertValues[count] = val; ++count; } } AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], &InsertValues[0]); } } } } ML_Operator_Destroy(&BlockPtent_ML); } aggregates.resize(0); BlockNodeListMap = Teuchos::null; NodeListMap = Teuchos::null; Colors = Teuchos::null; AP->GlobalAssemble(false); AP->FillComplete(CoarseMap, FineMap); #if 0 try { AP->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif AddAndResetStartTime("computation of the final AP", true); ML_Operator* AP_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML); // ======== // // create R // // ======== // std::vector<int> REntries(NumAggregates * NullSpaceDim); for (int AID = 0; AID < NumAggregates; ++AID) { for (int m = 0; m < NullSpaceDim; ++m) REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_; } R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true)); REntries.resize(0); for (int AID = 0; AID < NumAggregates; ++AID) { const int& MySize = NodesOfAggregate[AID].size(); // FIXME: make it faster for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) for (int k = 0; k < NullSpaceDim; ++k) { int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; int GCID = FineMap.GID(LCID); assert (GCID != -1); double& val = NullSpace[k][LCID]; int GRID = CoarseMap.GID(AID * NullSpaceDim + k); int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID); if (ierr < 0) ML_CHK_ERR(-1); } } NodesOfAggregate.resize(0); R_->FillComplete(FineMap, CoarseMap); #if 0 try { R_->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif ML_Operator* R_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(R_.get(), R_ML); AddAndResetStartTime("computation of R", true); // ======== // // Create C // // ======== // C_ML_ = ML_Operator_Create(Comm_ML()); ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX); ML_Operator_Destroy(&AP_ML); ML_Operator_Destroy(&R_ML); AP = Teuchos::null; C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false)); assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap())); TotalTime.ResetStartTime(); AddAndResetStartTime("computation of C", true); if (verbose_) { std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; } Teuchos::ParameterList& sublist = List_.sublist("ML list"); sublist.set("PDE equations", NullSpaceDim); sublist.set("null space: type", "pre-computed"); sublist.set("null space: dimension", NewNullSpace.NumVectors()); sublist.set("null space: vectors", NewNullSpace.Values()); MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true)); assert (MLP_.get() != 0); IsComputed_ = true; AddAndResetStartTime("computation of the preconditioner for C", true); if (verbose_) { std::cout << std::endl; std::cout << "Total CPU time for construction (all included) = "; std::cout << TotalCPUTime() << std::endl; ML_print_line("=",78); } return(0); }
//========================================================================= int Epetra_RowMatrixTransposer::CreateTranspose (const bool MakeDataContiguous, Epetra_CrsMatrix *& TransposeMatrix, Epetra_Map * TransposeRowMap_in) { // FIXME long long int i, j; if (TransposeCreated_) DeleteData(); // Get rid of existing data first if (TransposeRowMap_in==0) TransposeRowMap_ = (Epetra_Map *) &(OrigMatrix_->OperatorDomainMap()); // Should be replaced with refcount = else TransposeRowMap_ = TransposeRowMap_in; // This routine will work for any RowMatrix object, but will attempt cast the matrix to a CrsMatrix if // possible (because we can then use a View of the matrix and graph, which is much cheaper). // First get the local indices to count how many nonzeros will be in the // transpose graph on each processor Epetra_CrsMatrix * OrigCrsMatrix = dynamic_cast<Epetra_CrsMatrix *>(OrigMatrix_); OrigMatrixIsCrsMatrix_ = (OrigCrsMatrix!=0); // If this pointer is non-zero, the cast to CrsMatrix worked NumMyRows_ = OrigMatrix_->NumMyRows(); NumMyCols_ = OrigMatrix_->NumMyCols(); NumMyRows_ = OrigMatrix_->NumMyRows(); TransNumNz_ = new int[NumMyCols_]; TransIndices_ = new int*[NumMyCols_]; TransValues_ = new double*[NumMyCols_]; int NumIndices; if (OrigMatrixIsCrsMatrix_) { const Epetra_CrsGraph & OrigGraph = OrigCrsMatrix->Graph(); // Get matrix graph for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { EPETRA_CHK_ERR(OrigGraph.ExtractMyRowView(i, NumIndices, Indices_)); // Get view of ith row for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } else { // OrigMatrix is not a CrsMatrix MaxNumEntries_ = 0; int NumEntries; for (i=0; i<NumMyRows_; i++) { OrigMatrix_->NumMyRowEntries(i, NumEntries); MaxNumEntries_ = EPETRA_MAX(MaxNumEntries_, NumEntries); } Indices_ = new int[MaxNumEntries_]; Values_ = new double[MaxNumEntries_]; for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; for (i=0; i<NumMyRows_; i++) { // Get ith row EPETRA_CHK_ERR(OrigMatrix_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_)); for (j=0; j<NumIndices; j++) ++TransNumNz_[Indices_[j]]; } } // Most of remaining code is common to both cases for(i=0; i<NumMyCols_; i++) { NumIndices = TransNumNz_[i]; if (NumIndices>0) { TransIndices_[i] = new int[NumIndices]; TransValues_[i] = new double[NumIndices]; } } // Now copy values and global indices into newly created transpose storage for (i=0;i<NumMyCols_; i++) TransNumNz_[i] = 0; // Reset transpose NumNz counter for (i=0; i<NumMyRows_; i++) { if (OrigMatrixIsCrsMatrix_) { EPETRA_CHK_ERR(OrigCrsMatrix->ExtractMyRowView(i, NumIndices, Values_, Indices_)); } else { EPETRA_CHK_ERR(OrigMatrix_->ExtractMyRowCopy(i, MaxNumEntries_, NumIndices, Values_, Indices_)); } int ii = OrigMatrix_->RowMatrixRowMap().GID64(i); // FIXME long long for (j=0; j<NumIndices; j++) { int TransRow = Indices_[j]; int loc = TransNumNz_[TransRow]; TransIndices_[TransRow][loc] = ii; TransValues_[TransRow][loc] = Values_[j]; ++TransNumNz_[TransRow]; // increment counter into current transpose row } } // Build Transpose matrix with some rows being shared across processors. // We will use a view here since the matrix will not be used for anything else const Epetra_Map & TransMap = OrigMatrix_->RowMatrixColMap(); Epetra_CrsMatrix TempTransA1(View, TransMap, TransNumNz_); TransMyGlobalEquations_ = new int[NumMyCols_]; TransMap.MyGlobalElements(TransMyGlobalEquations_); /* Add rows one-at-a-time */ for (i=0; i<NumMyCols_; i++) { EPETRA_CHK_ERR(TempTransA1.InsertGlobalValues(TransMyGlobalEquations_[i], TransNumNz_[i], TransValues_[i], TransIndices_[i])); } // Note: The following call to FillComplete is currently necessary because // some global constants that are needed by the Export () are computed in this routine const Epetra_Map& domain_map = OrigMatrix_->OperatorDomainMap(); const Epetra_Map& range_map = OrigMatrix_->OperatorRangeMap(); EPETRA_CHK_ERR(TempTransA1.FillComplete(range_map, domain_map, false)); // Now that transpose matrix with shared rows is entered, create a new matrix that will // get the transpose with uniquely owned rows (using the same row distribution as A). TransposeMatrix_ = new Epetra_CrsMatrix(Copy, *TransposeRowMap_,0); // Create an Export object that will move TempTransA around TransposeExporter_ = new Epetra_Export(TransMap, *TransposeRowMap_); EPETRA_CHK_ERR(TransposeMatrix_->Export(TempTransA1, *TransposeExporter_, Add)); EPETRA_CHK_ERR(TransposeMatrix_->FillComplete(range_map, domain_map)); if (MakeDataContiguous) { EPETRA_CHK_ERR(TransposeMatrix_->MakeDataContiguous()); } TransposeMatrix = TransposeMatrix_; TransposeCreated_ = true; return(0); }
AmesosBTFGlobal_LinearProblem::NewTypeRef AmesosBTFGlobal_LinearProblem:: operator()( OriginalTypeRef orig ) { origObj_ = &orig; // Extract the matrix and vectors from the linear problem OldRHS_ = Teuchos::rcp( orig.GetRHS(), false ); OldLHS_ = Teuchos::rcp( orig.GetLHS(), false ); OldMatrix_ = Teuchos::rcp( dynamic_cast<Epetra_CrsMatrix *>( orig.GetMatrix() ), false ); int nGlobal = OldMatrix_->NumGlobalRows(); int n = OldMatrix_->NumMyRows(); // Check if the matrix is on one processor. int myMatProc = -1, matProc = -1; int myPID = OldMatrix_->Comm().MyPID(); int numProcs = OldMatrix_->Comm().NumProc(); const Epetra_BlockMap& oldRowMap = OldMatrix_->RowMap(); // Get some information about the parallel distribution. int maxMyRows = 0; std::vector<int> numGlobalElem( numProcs ); OldMatrix_->Comm().GatherAll(&n, &numGlobalElem[0], 1); OldMatrix_->Comm().MaxAll(&n, &maxMyRows, 1); for (int proc=0; proc<numProcs; proc++) { if (OldMatrix_->NumGlobalNonzeros() == OldMatrix_->NumMyNonzeros()) myMatProc = myPID; } OldMatrix_->Comm().MaxAll( &myMatProc, &matProc, 1 ); Teuchos::RCP<Epetra_CrsMatrix> serialMatrix; Teuchos::RCP<Epetra_Map> serialMap; if( oldRowMap.DistributedGlobal() && matProc == -1) { // The matrix is distributed and needs to be moved to processor zero. // Set the zero processor as the master. matProc = 0; serialMap = Teuchos::rcp( new Epetra_Map( Epetra_Util::Create_Root_Map( OldMatrix_->RowMap(), matProc ) ) ); Epetra_Import serialImporter( *serialMap, OldMatrix_->RowMap() ); serialMatrix = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *serialMap, 0 ) ); serialMatrix->Import( *OldMatrix_, serialImporter, Insert ); serialMatrix->FillComplete(); } else { // The old matrix has already been moved to one processor (matProc). serialMatrix = OldMatrix_; } if( debug_ ) { cout << "Original (serial) Matrix:\n"; cout << *serialMatrix << endl; } // Obtain the current row and column orderings std::vector<int> origGlobalRows(nGlobal), origGlobalCols(nGlobal); serialMatrix->RowMap().MyGlobalElements( &origGlobalRows[0] ); serialMatrix->ColMap().MyGlobalElements( &origGlobalCols[0] ); // Perform reindexing on the full serial matrix (needed for BTF). Epetra_Map reIdxMap( serialMatrix->RowMap().NumGlobalElements(), serialMatrix->RowMap().NumMyElements(), 0, serialMatrix->Comm() ); Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans = Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap ) ); Epetra_CrsMatrix newSerialMatrix = (*reIdxTrans)( *serialMatrix ); reIdxTrans->fwd(); // Compute and apply BTF to the serial CrsMatrix and has been filtered by the threshold EpetraExt::AmesosBTF_CrsMatrix BTFTrans( threshold_, upperTri_, verbose_, debug_ ); Epetra_CrsMatrix newSerialMatrixBTF = BTFTrans( newSerialMatrix ); rowPerm_ = BTFTrans.RowPerm(); colPerm_ = BTFTrans.ColPerm(); blockPtr_ = BTFTrans.BlockPtr(); numBlocks_ = BTFTrans.NumBlocks(); if (myPID == matProc && verbose_) { bool isSym = true; for (int i=0; i<nGlobal; ++i) { if (rowPerm_[i] != colPerm_[i]) { isSym = false; break; } } std::cout << "The BTF permutation symmetry (0=false,1=true) is : " << isSym << std::endl; } // Compute the permutation w.r.t. the original row and column GIDs. std::vector<int> origGlobalRowsPerm(nGlobal), origGlobalColsPerm(nGlobal); if (myPID == matProc) { for (int i=0; i<nGlobal; ++i) { origGlobalRowsPerm[i] = origGlobalRows[ rowPerm_[i] ]; origGlobalColsPerm[i] = origGlobalCols[ colPerm_[i] ]; } } OldMatrix_->Comm().Broadcast( &origGlobalRowsPerm[0], nGlobal, matProc ); OldMatrix_->Comm().Broadcast( &origGlobalColsPerm[0], nGlobal, matProc ); // Generate the full serial matrix that imports according to the previously computed BTF. Epetra_CrsMatrix newSerialMatrixT( Copy, newSerialMatrixBTF.RowMap(), 0 ); newSerialMatrixT.Import( newSerialMatrix, *(BTFTrans.Importer()), Insert ); newSerialMatrixT.FillComplete(); if( debug_ ) { cout << "Original (serial) Matrix permuted via BTF:\n"; cout << newSerialMatrixT << endl; } // Perform reindexing on the full serial matrix (needed for balancing). Epetra_Map reIdxMap2( newSerialMatrixT.RowMap().NumGlobalElements(), newSerialMatrixT.RowMap().NumMyElements(), 0, newSerialMatrixT.Comm() ); Teuchos::RCP<EpetraExt::ViewTransform<Epetra_CrsMatrix> > reIdxTrans2 = Teuchos::rcp( new EpetraExt::CrsMatrix_Reindex( reIdxMap2 ) ); Epetra_CrsMatrix tNewSerialMatrixT = (*reIdxTrans2)( newSerialMatrixT ); reIdxTrans2->fwd(); Teuchos::RCP<Epetra_Map> balancedMap; if (balance_ == "linear") { // Distribute block somewhat evenly across processors std::vector<int> rowDist(numProcs+1,0); int balRows = nGlobal / numProcs + 1; int numRows = balRows, currProc = 1; for ( int i=0; i<numBlocks_ || currProc < numProcs; ++i ) { if (blockPtr_[i] > numRows) { rowDist[currProc++] = blockPtr_[i-1]; numRows = blockPtr_[i-1] + balRows; } } rowDist[numProcs] = nGlobal; // Create new Map based on this linear distribution. int numMyBalancedRows = rowDist[myPID+1]-rowDist[myPID]; NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, &origGlobalRowsPerm[ rowDist[myPID] ], 0, OldMatrix_->Comm() ) ); // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric! //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) ); if ( verbose_ ) std::cout << "Processor " << myPID << " has " << numMyBalancedRows << " rows." << std::endl; //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, numMyBalancedRows, 0, serialMatrix->Comm() ) ); } else if (balance_ == "isorropia") { // Compute block adjacency graph for partitioning. std::vector<double> weight; Teuchos::RCP<Epetra_CrsGraph> blkGraph; EpetraExt::BlockAdjacencyGraph adjGraph; blkGraph = adjGraph.compute( const_cast<Epetra_CrsGraph&>(tNewSerialMatrixT.Graph()), numBlocks_, blockPtr_, weight, verbose_); Epetra_Vector rowWeights( View, blkGraph->Map(), &weight[0] ); // Call Isorropia to rebalance this graph. Teuchos::RCP<Epetra_CrsGraph> balancedGraph = Isorropia::Epetra::create_balanced_copy( *blkGraph, rowWeights ); int myNumBlkRows = balancedGraph->NumMyRows(); //std::vector<int> myGlobalElements(nGlobal); std::vector<int> newRangeElements(nGlobal), newDomainElements(nGlobal); int grid = 0, myElements = 0; for (int i=0; i<myNumBlkRows; ++i) { grid = balancedGraph->GRID( i ); for (int j=blockPtr_[grid]; j<blockPtr_[grid+1]; ++j) { newRangeElements[myElements++] = origGlobalRowsPerm[j]; //myGlobalElements[myElements++] = j; } } NewRowMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &newRangeElements[0], 0, OldMatrix_->Comm() ) ); // Right now we do not explicitly build the column map and assume the BTF permutation is symmetric! //NewColMap_ = Teuchos::rcp( new Epetra_Map( nGlobal, nGlobal, &colPerm_[0], 0, OldMatrix_->Comm() ) ); //balancedMap = Teuchos::rcp( new Epetra_Map( nGlobal, myElements, &myGlobalElements[0], 0, serialMatrix->Comm() ) ); if ( verbose_ ) std::cout << "Processor " << myPID << " has " << myElements << " rows." << std::endl; } // Use New Domain and Range Maps to Generate Importer //for now, assume they start out as identical Epetra_Map OldRowMap = OldMatrix_->RowMap(); Epetra_Map OldColMap = OldMatrix_->ColMap(); if( debug_ ) { cout << "New Row Map\n"; cout << *NewRowMap_ << endl; //cout << "New Col Map\n"; //cout << *NewColMap_ << endl; } // Generate New Graph // NOTE: Right now we are creating the graph, assuming that the permutation is symmetric! // NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, *NewColMap_, 0 ) ); NewGraph_ = Teuchos::rcp( new Epetra_CrsGraph( Copy, *NewRowMap_, 0 ) ); Importer_ = Teuchos::rcp( new Epetra_Import( *NewRowMap_, OldRowMap ) ); Importer2_ = Teuchos::rcp( new Epetra_Import( OldRowMap, *NewRowMap_ ) ); NewGraph_->Import( OldMatrix_->Graph(), *Importer_, Insert ); NewGraph_->FillComplete(); if( debug_ ) { cout << "NewGraph\n"; cout << *NewGraph_; } // Create new linear problem and import information from old linear problem NewMatrix_ = Teuchos::rcp( new Epetra_CrsMatrix( Copy, *NewGraph_ ) ); NewMatrix_->Import( *OldMatrix_, *Importer_, Insert ); NewMatrix_->FillComplete(); NewLHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldLHS_->NumVectors() ) ); NewLHS_->Import( *OldLHS_, *Importer_, Insert ); NewRHS_ = Teuchos::rcp( new Epetra_MultiVector( *NewRowMap_, OldRHS_->NumVectors() ) ); NewRHS_->Import( *OldRHS_, *Importer_, Insert ); if( debug_ ) { cout << "New Matrix\n"; cout << *NewMatrix_ << endl; } newObj_ = new Epetra_LinearProblem( &*NewMatrix_, &*NewLHS_, &*NewRHS_ ); return *newObj_; }