void ModeLaplace1DQ1::memoryInfo() const { int myPid = MyComm.MyPID(); Epetra_RowMatrix *Mat = dynamic_cast<Epetra_RowMatrix *>(M); if ((myPid == 0) && (Mat)) { cout << " Total number of nonzero entries in mass matrix = "; cout.width(15); cout << Mat->NumGlobalNonzeros() << endl; double memSize = Mat->NumGlobalNonzeros()*(sizeof(double) + sizeof(int)); memSize += 2*Mat->NumGlobalRows()*sizeof(int); cout << " Memory requested for mass matrix per processor = (EST) "; cout.precision(2); cout.width(6); cout.setf(ios::fixed, ios::floatfield); cout << memSize/1024.0/1024.0/MyComm.NumProc() << " MB " << endl; cout << endl; } Mat = dynamic_cast<Epetra_RowMatrix *>(K); if ((myPid == 0) && (Mat)) { cout << " Total number of nonzero entries in stiffness matrix = "; cout.width(15); cout << Mat->NumGlobalNonzeros() << endl; double memSize = Mat->NumGlobalNonzeros()*(sizeof(double) + sizeof(int)); memSize += 2*Mat->NumGlobalRows()*sizeof(int); cout << " Memory requested for stiffness matrix per processor = (EST) "; cout.precision(2); cout.width(6); cout.setf(ios::fixed, ios::floatfield); cout << memSize/1024.0/1024.0/MyComm.NumProc() << " MB " << endl; cout << endl; } }
void show_matrix(const char *txt, const Epetra_RowMatrix &matrix, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cout << txt << std::endl; std::cout << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } int numRows = matrix.NumGlobalRows(); int numCols = matrix.NumGlobalCols(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; make_my_A(matrix, myA, comm); printMatrix(txt, myA, NULL, NULL, numRows, numCols, comm); delete [] myA; }
void show_matrix(const char *txt, const Epetra_LinearProblem &problem, const Epetra_Comm &comm) { int me = comm.MyPID(); if (comm.NumProc() > 10){ if (me == 0){ std::cout << txt << std::endl; std::cout << "Printed matrix format only works for 10 or fewer processes" << std::endl; } return; } Epetra_RowMatrix *matrix = problem.GetMatrix(); Epetra_MultiVector *lhs = problem.GetLHS(); Epetra_MultiVector *rhs = problem.GetRHS(); int numRows = matrix->NumGlobalRows(); int numCols = matrix->NumGlobalCols(); if ((numRows > 200) || (numCols > 500)){ if (me == 0){ std::cerr << txt << std::endl; std::cerr << "show_matrix: problem is too large to display" << std::endl; } return; } int *myA = new int [numRows * numCols]; make_my_A(*matrix, myA, comm); int *myX = new int [numCols]; int *myB = new int [numRows]; memset(myX, 0, sizeof(int) * numCols); memset(myB, 0, sizeof(int) * numRows); const Epetra_BlockMap &lhsMap = lhs->Map(); const Epetra_BlockMap &rhsMap = rhs->Map(); int base = lhsMap.IndexBase(); for (int j=0; j < lhsMap.NumMyElements(); j++){ int colGID = lhsMap.GID(j); myX[colGID - base] = me + 1; } for (int i=0; i < rhsMap.NumMyElements(); i++){ int rowGID = rhsMap.GID(i); myB[rowGID - base] = me + 1; } printMatrix(txt, myA, myX, myB, numRows, numCols, comm); delete [] myA; delete [] myX; delete [] myB; }
// ============================================================================ void EpetraExt::XMLWriter:: Write(const std::string& Label, const Epetra_RowMatrix& Matrix) { TEUCHOS_TEST_FOR_EXCEPTION(IsOpen_ == false, std::logic_error, "No file has been opened"); int Rows = Matrix.NumGlobalRows(); int Cols = Matrix.NumGlobalRows(); int Nonzeros = Matrix.NumGlobalNonzeros(); if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "<PointMatrix Label=\"" << Label << '"' << " Rows=\"" << Rows << '"' << " Columns=\"" << Cols<< '"' << " Nonzeros=\"" << Nonzeros << '"' << " Type=\"double\" StartingIndex=\"0\">" << std::endl; } int Length = Matrix.MaxNumEntries(); std::vector<int> Indices(Length); std::vector<double> Values(Length); for (int iproc = 0; iproc < Comm_.NumProc(); iproc++) { if (iproc == Comm_.MyPID()) { std::ofstream of(FileName_.c_str(), std::ios::app); of.precision(15); for (int i = 0; i < Matrix.NumMyRows(); ++i) { int NumMyEntries; Matrix.ExtractMyRowCopy(i, Length, NumMyEntries, &Values[0], &Indices[0]); int GRID = Matrix.RowMatrixRowMap().GID(i); for (int j = 0; j < NumMyEntries; ++j) of << GRID << " " << Matrix.RowMatrixColMap().GID(Indices[j]) << " " << std::setiosflags(std::ios::scientific) << Values[j] << std::endl; } of.close(); } Comm_.Barrier(); } if (Comm_.MyPID() == 0) { std::ofstream of(FileName_.c_str(), std::ios::app); of << "</PointMatrix>" << std::endl; of.close(); } }
int SchwarzSolver::solve() { // compute some statistics for the original problem double condest = -1; Epetra_LinearProblem problem(_stiffnessMatrix.get(), _lhs.get(), _rhs.get()); AztecOO solverForConditionEstimate(problem); solverForConditionEstimate.SetAztecOption(AZ_solver, AZ_cg_condnum); solverForConditionEstimate.ConstructPreconditioner(condest); Epetra_RowMatrix *A = problem.GetMatrix(); double norminf = A->NormInf(); double normone = A->NormOne(); if (_printToConsole) { cout << "\n Inf-norm of stiffness matrix before scaling = " << norminf; cout << "\n One-norm of stiffness matrix before scaling = " << normone << endl << endl; cout << "Condition number estimate: " << condest << endl; } AztecOO solver(problem); int otherRows = A->NumGlobalRows() - A->NumMyRows(); int overlapLevel = std::min(otherRows,_overlapLevel); solver.SetAztecOption(AZ_precond, AZ_dom_decomp); // additive schwarz solver.SetAztecOption(AZ_overlap, overlapLevel); // level of overlap for schwarz solver.SetAztecOption(AZ_type_overlap, AZ_symmetric); solver.SetAztecOption(AZ_solver, AZ_cg_condnum); // more expensive than AZ_cg, but allows estimate of condition # solver.SetAztecOption(AZ_subdomain_solve, AZ_ilut); // TODO: look these up (copied from example) solver.SetAztecParam(AZ_ilut_fill, 1.0); // TODO: look these up (copied from example) solver.SetAztecParam(AZ_drop, 0.0); // TODO: look these up (copied from example) int solveResult = solver.Iterate(_maxIters,_tol); // int solveResult = solver.AdaptiveIterate(_maxIters,1,_tol); // an experiment (was Iterate()) norminf = A->NormInf(); normone = A->NormOne(); condest = solver.Condest(); int numIters = solver.NumIters(); if (_printToConsole) { cout << "\n Inf-norm of stiffness matrix after scaling = " << norminf; cout << "\n One-norm of stiffness matrix after scaling = " << normone << endl << endl; cout << "Condition number estimate: " << condest << endl; cout << "Num iterations: " << numIters << endl; } return solveResult; }
static int make_my_A(const Epetra_RowMatrix &matrix, int *myA, const Epetra_Comm &comm) { int me = comm.MyPID(); const Epetra_Map &rowmap = matrix.RowMatrixRowMap(); const Epetra_Map &colmap = matrix.RowMatrixColMap(); int myRows = matrix.NumMyRows(); int numRows = matrix.NumGlobalRows(); int numCols = matrix.NumGlobalCols(); int base = rowmap.IndexBase(); int maxRow = matrix.MaxNumEntries(); memset(myA, 0, sizeof(int) * numRows * numCols); int *myIndices = new int [maxRow]; double *tmp = new double [maxRow]; int rowLen = 0; for (int i=0; i< myRows; i++){ int rc = matrix.ExtractMyRowCopy(i, maxRow, rowLen, tmp, myIndices); if (rc){ if (me == 0){ std::cout << "Error in make_my_A" << std::endl; } return 1; } int *row = myA + (numCols * (rowmap.GID(i) - base)); for (int j=0; j < rowLen; j++){ int colGID = colmap.GID(myIndices[j]); row[colGID - base] = me + 1; } } if (maxRow){ delete [] myIndices; delete [] tmp; } return 0; }
//============================================================================= int Amesos_Dscpack::Solve() { if (IsNumericFactorizationOK_ == false) AMESOS_CHK_ERR(NumericFactorization()); ResetTimer(0); ResetTimer(1); Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); // MS // some checks on matrix size if (RowMatrixA->NumGlobalRows() != RowMatrixA->NumGlobalCols()) AMESOS_CHK_ERR(-1); // Convert vector b to a vector in the form that DSCPACK needs it // Epetra_MultiVector* vecX = Problem_->GetLHS(); Epetra_MultiVector* vecB = Problem_->GetRHS(); if ((vecX == 0) || (vecB == 0)) AMESOS_CHK_ERR(-1); // something wrong with input int NumVectors = vecX->NumVectors(); if (NumVectors != vecB->NumVectors()) AMESOS_CHK_ERR(-2); double *dscmapXvalues ; int dscmapXlda ; Epetra_MultiVector dscmapX(DscRowMap(),NumVectors) ; int ierr; AMESOS_CHK_ERR(dscmapX.ExtractView(&dscmapXvalues,&dscmapXlda)); assert (dscmapXlda == NumLocalCols); double *dscmapBvalues ; int dscmapBlda ; Epetra_MultiVector dscmapB(DscRowMap(), NumVectors ) ; ierr = dscmapB.ExtractView( &dscmapBvalues, &dscmapBlda ); AMESOS_CHK_ERR(ierr); assert( dscmapBlda == NumLocalCols ) ; AMESOS_CHK_ERR(dscmapB.Import(*vecB, Importer(), Insert)); VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); ResetTimer(0); // MS // now solve the problem std::vector<double> ValuesInNewOrder( NumLocalCols ) ; OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); if ( MyDscRank >= 0 ) { for ( int j =0 ; j < NumVectors; j++ ) { for ( int i = 0; i < NumLocalCols; i++ ) { ValuesInNewOrder[i] = dscmapBvalues[DscColMap().LID( LocalStructOldNum[i] ) +j*dscmapBlda ] ; } AMESOS_CHK_ERR( DSC_InputRhsLocalVec ( PrivateDscpackData_->MyDSCObject_, &ValuesInNewOrder[0], NumLocalCols ) ) ; AMESOS_CHK_ERR( DSC_Solve ( PrivateDscpackData_->MyDSCObject_ ) ) ; AMESOS_CHK_ERR( DSC_GetLocalSolution ( PrivateDscpackData_->MyDSCObject_, &ValuesInNewOrder[0], NumLocalCols ) ) ; for ( int i = 0; i < NumLocalCols; i++ ) { dscmapXvalues[DscColMap().LID( LocalStructOldNum[i] ) +j*dscmapXlda ] = ValuesInNewOrder[i]; } } } SolveTime_ = AddTime("Total solve time", SolveTime_, 0); ResetTimer(0); ResetTimer(1); vecX->Export( dscmapX, Importer(), Insert ) ; VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); if (ComputeTrueResidual_) ComputeTrueResidual(*(GetProblem()->GetMatrix()), *vecX, *vecB, false, "Amesos_Dscpack"); if (ComputeVectorNorms_) ComputeVectorNorms(*vecX, *vecB, "Amesos_Dscpack"); OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); NumSolve_++; return(0) ; }
//============================================================================= int Amesos_Dscpack::PerformNumericFactorization() { ResetTimer(0); ResetTimer(1); Epetra_RowMatrix* RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ; int numrows = RowMatrixA->NumGlobalRows(); assert( numrows == RowMatrixA->NumGlobalCols() ); // // Call Dscpack to perform Numeric Factorization // std::vector<double> MyANonZ; #if 0 if ( IsNumericFactorizationOK_ ) { DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject); } #endif DscRowMap_ = Teuchos::rcp(new Epetra_Map(numrows, NumLocalCols, LocalStructOldNum, 0, Comm())); if (DscRowMap_.get() == 0) AMESOS_CHK_ERR(-1); Importer_ = rcp(new Epetra_Import(DscRowMap(), OriginalMap)); // // Import from the CrsMatrix // Epetra_CrsMatrix DscMat(Copy, DscRowMap(), 0); AMESOS_CHK_ERR(DscMat.Import(*RowMatrixA, Importer(), Insert)); AMESOS_CHK_ERR(DscMat.FillComplete()); DscColMap_ = Teuchos::rcp(new Epetra_Map(DscMat.RowMatrixColMap())); assert( MyDscRank >= 0 || NumLocalNonz == 0 ) ; assert( MyDscRank >= 0 || NumLocalCols == 0 ) ; assert( MyDscRank >= 0 || NumGlobalCols == 0 ) ; MyANonZ.resize( NumLocalNonz ) ; int NonZIndex = 0 ; int max_num_entries = DscMat.MaxNumEntries() ; std::vector<int> col_indices( max_num_entries ) ; std::vector<double> mat_values( max_num_entries ) ; assert( NumLocalCols == DscRowMap().NumMyElements() ) ; std::vector<int> my_global_elements( NumLocalCols ) ; AMESOS_CHK_ERR(DscRowMap().MyGlobalElements( &my_global_elements[0] ) ) ; std::vector<int> GlobalStructOldColNum( NumGlobalCols ) ; typedef std::pair<int, double> Data; std::vector<Data> sort_array(max_num_entries); std::vector<int> sort_indices(max_num_entries); for ( int i = 0; i < NumLocalCols ; i++ ) { assert( my_global_elements[i] == LocalStructOldNum[i] ) ; int num_entries_this_row; #ifdef USE_LOCAL AMESOS_CHK_ERR( DscMat.ExtractMyRowCopy( i, max_num_entries, num_entries_this_row, &mat_values[0], &col_indices[0] ) ) ; #else AMESOS_CHK_ERR( DscMat.ExtractGlobalRowCopy( DscMat.GRID(i), max_num_entries, num_entries_this_row, &mat_values[0], &col_indices[0] ) ) ; #endif int OldRowNumber = LocalStructOldNum[i] ; if (GlobalStructOwner[ OldRowNumber ] == -1) AMESOS_CHK_ERR(-1); int NewRowNumber = GlobalStructNewColNum[ my_global_elements[ i ] ] ; // // Sort the column elements // for ( int j = 0; j < num_entries_this_row; j++ ) { #ifdef USE_LOCAL sort_array[j].first = GlobalStructNewColNum[ DscMat.GCID( col_indices[j])] ; sort_indices[j] = GlobalStructNewColNum[ DscMat.GCID( col_indices[j])] ; #else sort_array[j].first = GlobalStructNewColNum[ col_indices[j] ]; #endif sort_array[j].second = mat_values[j] ; } sort(&sort_array[0], &sort_array[num_entries_this_row]); for ( int j = 0; j < num_entries_this_row; j++ ) { int NewColNumber = sort_array[j].first ; if ( NewRowNumber <= NewColNumber ) MyANonZ[ NonZIndex++ ] = sort_array[j].second ; #ifndef USE_LOCAL assert( NonZIndex <= NumLocalNonz ); // This assert can fail on non-symmetric matrices #endif } } OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); if ( MyDscRank >= 0 ) { const int SchemeCode = 1; #ifndef USE_LOCAL assert( NonZIndex == NumLocalNonz ); #endif AMESOS_CHK_ERR( DSC_NFactor ( PrivateDscpackData_->MyDSCObject_, SchemeCode, &MyANonZ[0], DSC_LLT, DSC_LBLAS3, DSC_DBLAS2 ) ) ; } // if ( MyDscRank >= 0 ) IsNumericFactorizationOK_ = true ; NumFactTime_ = AddTime("Total numeric factorization time", NumFactTime_, 0); return(0); }
//============================================================================= int Amesos_Dscpack::PerformSymbolicFactorization() { ResetTimer(0); ResetTimer(1); MyPID_ = Comm().MyPID(); NumProcs_ = Comm().NumProc(); Epetra_RowMatrix *RowMatrixA = Problem_->GetMatrix(); if (RowMatrixA == 0) AMESOS_CHK_ERR(-1); const Epetra_Map& OriginalMap = RowMatrixA->RowMatrixRowMap() ; const Epetra_MpiComm& comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm()); int numrows = RowMatrixA->NumGlobalRows(); int numentries = RowMatrixA->NumGlobalNonzeros(); Teuchos::RCP<Epetra_CrsGraph> Graph; Epetra_CrsMatrix* CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA); if (CastCrsMatrixA) { Graph = Teuchos::rcp(const_cast<Epetra_CrsGraph*>(&(CastCrsMatrixA->Graph())), false); } else { int MaxNumEntries = RowMatrixA->MaxNumEntries(); Graph = Teuchos::rcp(new Epetra_CrsGraph(Copy, OriginalMap, MaxNumEntries)); std::vector<int> Indices(MaxNumEntries); std::vector<double> Values(MaxNumEntries); for (int i = 0 ; i < RowMatrixA->NumMyRows() ; ++i) { int NumEntries; RowMatrixA->ExtractMyRowCopy(i, MaxNumEntries, NumEntries, &Values[0], &Indices[0]); for (int j = 0 ; j < NumEntries ; ++j) Indices[j] = RowMatrixA->RowMatrixColMap().GID(Indices[j]); int GlobalRow = RowMatrixA->RowMatrixRowMap().GID(i); Graph->InsertGlobalIndices(GlobalRow, NumEntries, &Indices[0]); } Graph->FillComplete(); } // // Create a replicated map and graph // std::vector<int> AllIDs( numrows ) ; for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ; Epetra_Map ReplicatedMap( -1, numrows, &AllIDs[0], 0, Comm()); Epetra_Import ReplicatedImporter(ReplicatedMap, OriginalMap); Epetra_CrsGraph ReplicatedGraph( Copy, ReplicatedMap, 0 ); AMESOS_CHK_ERR(ReplicatedGraph.Import(*Graph, ReplicatedImporter, Insert)); AMESOS_CHK_ERR(ReplicatedGraph.FillComplete()); // // Convert the matrix to Ap, Ai // std::vector <int> Replicates(numrows); std::vector <int> Ap(numrows + 1); std::vector <int> Ai(EPETRA_MAX(numrows, numentries)); for( int i = 0 ; i < numrows; i++ ) Replicates[i] = 1; int NumEntriesPerRow ; int *ColIndices = 0 ; int Ai_index = 0 ; for ( int MyRow = 0; MyRow <numrows; MyRow++ ) { AMESOS_CHK_ERR( ReplicatedGraph.ExtractMyRowView( MyRow, NumEntriesPerRow, ColIndices ) ); Ap[MyRow] = Ai_index ; for ( int j = 0; j < NumEntriesPerRow; j++ ) { Ai[Ai_index] = ColIndices[j] ; Ai_index++; } } assert( Ai_index == numentries ) ; Ap[ numrows ] = Ai_index ; MtxConvTime_ = AddTime("Total matrix conversion time", MtxConvTime_, 0); ResetTimer(0); // // Call Dscpack Symbolic Factorization // int OrderCode = 2; std::vector<double> MyANonZ; NumLocalNonz = 0 ; GlobalStructNewColNum = 0 ; GlobalStructNewNum = 0 ; GlobalStructOwner = 0 ; LocalStructOldNum = 0 ; NumGlobalCols = 0 ; // MS // Have to define the maximum number of processes to be used // MS // This is only a suggestion as Dscpack uses a number of processes that is a power of 2 int NumGlobalNonzeros = GetProblem()->GetMatrix()->NumGlobalNonzeros(); int NumRows = GetProblem()->GetMatrix()->NumGlobalRows(); // optimal value for MaxProcs == -1 int OptNumProcs1 = 1+EPETRA_MAX( NumRows/10000, NumGlobalNonzeros/1000000 ); OptNumProcs1 = EPETRA_MIN(NumProcs_,OptNumProcs1 ); // optimal value for MaxProcs == -2 int OptNumProcs2 = (int)sqrt(1.0 * NumProcs_); if( OptNumProcs2 < 1 ) OptNumProcs2 = 1; // fix the value of MaxProcs switch (MaxProcs_) { case -1: MaxProcs_ = EPETRA_MIN(OptNumProcs1, NumProcs_); break; case -2: MaxProcs_ = EPETRA_MIN(OptNumProcs2, NumProcs_); break; case -3: MaxProcs_ = NumProcs_; break; } #if 0 if (MyDscRank>=0 && A_and_LU_built) { DSC_ReFactorInitialize(PrivateDscpackData_->MyDSCObject); } #endif // if ( ! A_and_LU_built ) { // DSC_End( PrivateDscpackData_->MyDSCObject ) ; // PrivateDscpackData_->MyDSCObject = DSC_Begin() ; // } // MS // here I continue with the old code... OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); DscNumProcs = 1 ; int DscMax = DSC_Analyze( numrows, &Ap[0], &Ai[0], &Replicates[0] ); while ( DscNumProcs * 2 <=EPETRA_MIN( MaxProcs_, DscMax ) ) DscNumProcs *= 2 ; MyDscRank = -1; DSC_Open0( PrivateDscpackData_->MyDSCObject_, DscNumProcs, &MyDscRank, comm1.Comm()) ; NumLocalCols = 0 ; // This is for those processes not in the Dsc grid if ( MyDscRank >= 0 ) { assert( MyPID_ == MyDscRank ) ; AMESOS_CHK_ERR( DSC_Order ( PrivateDscpackData_->MyDSCObject_, OrderCode, numrows, &Ap[0], &Ai[0], &Replicates[0], &NumGlobalCols, &NumLocalStructs, &NumLocalCols, &NumLocalNonz, &GlobalStructNewColNum, &GlobalStructNewNum, &GlobalStructOwner, &LocalStructOldNum ) ) ; assert( NumGlobalCols == numrows ) ; assert( NumLocalCols == NumLocalStructs ) ; } if ( MyDscRank >= 0 ) { int MaxSingleBlock; const int Limit = 5000000 ; // Memory Limit set to 5 Terabytes AMESOS_CHK_ERR( DSC_SFactor ( PrivateDscpackData_->MyDSCObject_, &TotalMemory_, &MaxSingleBlock, Limit, DSC_LBLAS3, DSC_DBLAS2 ) ) ; } // A_and_LU_built = true; // If you uncomment this, TestOptions fails SymFactTime_ = AddTime("Total symbolic factorization time", SymFactTime_, 0); return(0); }
int CopyRowMatrix(mxArray* matlabA, const Epetra_RowMatrix& A) { int valueCount = 0; //int* valueCount = &temp; Epetra_Map map = A.RowMatrixRowMap(); const Epetra_Comm & comm = map.Comm(); int numProc = comm.NumProc(); if (numProc==1) DoCopyRowMatrix(matlabA, valueCount, A); else { int numRows = map.NumMyElements(); //cout << "creating allGidsMap\n"; Epetra_Map allGidsMap(-1, numRows, 0,comm); //cout << "done creating allGidsMap\n"; Epetra_IntVector allGids(allGidsMap); for (int i=0; i<numRows; i++) allGids[i] = map.GID(i); // Now construct a RowMatrix on PE 0 by strip-mining the rows of the input matrix A. int numChunks = numProc; int stripSize = allGids.GlobalLength()/numChunks; int remainder = allGids.GlobalLength()%numChunks; int curStart = 0; int curStripSize = 0; Epetra_IntSerialDenseVector importGidList; int numImportGids = 0; if (comm.MyPID()==0) importGidList.Size(stripSize+1); // Set size of vector to max needed for (int i=0; i<numChunks; i++) { if (comm.MyPID()==0) { // Only PE 0 does this part curStripSize = stripSize; if (i<remainder) curStripSize++; // handle leftovers for (int j=0; j<curStripSize; j++) importGidList[j] = j + curStart; curStart += curStripSize; } // The following import map will be non-trivial only on PE 0. //cout << "creating importGidMap\n"; Epetra_Map importGidMap(-1, curStripSize, importGidList.Values(), 0, comm); //cout << "done creating importGidMap\n"; Epetra_Import gidImporter(importGidMap, allGidsMap); Epetra_IntVector importGids(importGidMap); if (importGids.Import(allGids, gidImporter, Insert)) return(-1); // importGids now has a list of GIDs for the current strip of matrix rows. // Use these values to build another importer that will get rows of the matrix. // The following import map will be non-trivial only on PE 0. //cout << "creating importMap\n"; //cout << "A.RowMatrixRowMap().MinAllGID: " << A.RowMatrixRowMap().MinAllGID() << "\n"; Epetra_Map importMap(-1, importGids.MyLength(), importGids.Values(), A.RowMatrixRowMap().MinAllGID(), comm); //cout << "done creating importMap\n"; Epetra_Import importer(importMap, map); Epetra_CrsMatrix importA(Copy, importMap, 0); if (importA.Import(A, importer, Insert)) return(-1); if (importA.FillComplete()) return(-1); // Finally we are ready to write this strip of the matrix to ostream if (DoCopyRowMatrix(matlabA, valueCount, importA)) return(-1); } } if (A.RowMatrixRowMap().Comm().MyPID() == 0) { // set max cap int* matlabAcolumnIndicesPtr = mxGetJc(matlabA); matlabAcolumnIndicesPtr[A.NumGlobalRows()] = valueCount; } return(0); }
int DoCopyRowMatrix(mxArray* matlabA, int& valueCount, const Epetra_RowMatrix& A) { //cout << "doing DoCopyRowMatrix\n"; int ierr = 0; int numRows = A.NumGlobalRows(); //cout << "numRows: " << numRows << "\n"; Epetra_Map rowMap = A.RowMatrixRowMap(); Epetra_Map colMap = A.RowMatrixColMap(); int minAllGID = rowMap.MinAllGID(); const Epetra_Comm & comm = rowMap.Comm(); //cout << "did global setup\n"; if (comm.MyPID()!=0) { if (A.NumMyRows()!=0) ierr = -1; if (A.NumMyCols()!=0) ierr = -1; } else { // declare and get initial values of all matlabA pointers double* matlabAvaluesPtr = mxGetPr(matlabA); int* matlabAcolumnIndicesPtr = mxGetJc(matlabA); int* matlabArowIndicesPtr = mxGetIr(matlabA); // set all matlabA pointers to the proper offset matlabAvaluesPtr += valueCount; matlabArowIndicesPtr += valueCount; if (numRows!=A.NumMyRows()) ierr = -1; Epetra_SerialDenseVector values(A.MaxNumEntries()); Epetra_IntSerialDenseVector indices(A.MaxNumEntries()); //cout << "did proc0 setup\n"; for (int i=0; i<numRows; i++) { //cout << "extracting a row\n"; int I = rowMap.GID(i); int numEntries = 0; if (A.ExtractMyRowCopy(i, values.Length(), numEntries, values.Values(), indices.Values())) return(-1); matlabAcolumnIndicesPtr[I - minAllGID] = valueCount; // set the starting index of column I double* serialValuesPtr = values.Values(); for (int j=0; j<numEntries; j++) { int J = colMap.GID(indices[j]); *matlabAvaluesPtr = *serialValuesPtr++; *matlabArowIndicesPtr = J; // increment matlabA pointers matlabAvaluesPtr++; matlabArowIndicesPtr++; valueCount++; } } //cout << "proc0 row extraction for this chunck is done\n"; } /* if (comm.MyPID() == 0) { cout << "printing matlabA pointers\n"; double* matlabAvaluesPtr = mxGetPr(matlabA); int* matlabAcolumnIndicesPtr = mxGetJc(matlabA); int* matlabArowIndicesPtr = mxGetIr(matlabA); for(int i=0; i < numRows; i++) { for(int j=0; j < A.MaxNumEntries(); j++) { cout << "*matlabAvaluesPtr: " << *matlabAvaluesPtr++ << " *matlabAcolumnIndicesPtr: " << *matlabAcolumnIndicesPtr++ << " *matlabArowIndicesPtr" << *matlabArowIndicesPtr++ << "\n"; } } cout << "done printing matlabA pointers\n"; } */ int ierrGlobal; comm.MinAll(&ierr, &ierrGlobal, 1); // If any processor has -1, all return -1 return(ierrGlobal); }
int check(Epetra_RowMatrix& A, Epetra_RowMatrix & B, bool verbose) { int ierr = 0; EPETRA_TEST_ERR(!A.Comm().NumProc()==B.Comm().NumProc(),ierr); EPETRA_TEST_ERR(!A.Comm().MyPID()==B.Comm().MyPID(),ierr); EPETRA_TEST_ERR(!A.Filled()==B.Filled(),ierr); EPETRA_TEST_ERR(!A.HasNormInf()==B.HasNormInf(),ierr); EPETRA_TEST_ERR(!A.LowerTriangular()==B.LowerTriangular(),ierr); EPETRA_TEST_ERR(!A.Map().SameAs(B.Map()),ierr); EPETRA_TEST_ERR(!A.MaxNumEntries()==B.MaxNumEntries(),ierr); EPETRA_TEST_ERR(!A.NumGlobalCols()==B.NumGlobalCols(),ierr); EPETRA_TEST_ERR(!A.NumGlobalDiagonals()==B.NumGlobalDiagonals(),ierr); EPETRA_TEST_ERR(!A.NumGlobalNonzeros()==B.NumGlobalNonzeros(),ierr); EPETRA_TEST_ERR(!A.NumGlobalRows()==B.NumGlobalRows(),ierr); EPETRA_TEST_ERR(!A.NumMyCols()==B.NumMyCols(),ierr); EPETRA_TEST_ERR(!A.NumMyDiagonals()==B.NumMyDiagonals(),ierr); EPETRA_TEST_ERR(!A.NumMyNonzeros()==B.NumMyNonzeros(),ierr); for (int i=0; i<A.NumMyRows(); i++) { int nA, nB; A.NumMyRowEntries(i,nA); B.NumMyRowEntries(i,nB); EPETRA_TEST_ERR(!nA==nB,ierr); } EPETRA_TEST_ERR(!A.NumMyRows()==B.NumMyRows(),ierr); EPETRA_TEST_ERR(!A.OperatorDomainMap().SameAs(B.OperatorDomainMap()),ierr); EPETRA_TEST_ERR(!A.OperatorRangeMap().SameAs(B.OperatorRangeMap()),ierr); EPETRA_TEST_ERR(!A.RowMatrixColMap().SameAs(B.RowMatrixColMap()),ierr); EPETRA_TEST_ERR(!A.RowMatrixRowMap().SameAs(B.RowMatrixRowMap()),ierr); EPETRA_TEST_ERR(!A.UpperTriangular()==B.UpperTriangular(),ierr); EPETRA_TEST_ERR(!A.UseTranspose()==B.UseTranspose(),ierr); int NumVectors = 5; { // No transpose case Epetra_MultiVector X(A.OperatorDomainMap(), NumVectors); Epetra_MultiVector YA1(A.OperatorRangeMap(), NumVectors); Epetra_MultiVector YA2(YA1); Epetra_MultiVector YB1(YA1); Epetra_MultiVector YB2(YA1); X.Random(); bool transA = false; A.SetUseTranspose(transA); B.SetUseTranspose(transA); A.Apply(X,YA1); A.Multiply(transA, X, YA2); EPETRA_TEST_ERR(checkMultiVectors(YA1,YA2,"A Multiply and A Apply", verbose),ierr); B.Apply(X,YB1); EPETRA_TEST_ERR(checkMultiVectors(YA1,YB1,"A Multiply and B Multiply", verbose),ierr); B.Multiply(transA, X, YB2); EPETRA_TEST_ERR(checkMultiVectors(YA1,YB2,"A Multiply and B Apply", verbose), ierr); } { // transpose case Epetra_MultiVector X(A.OperatorRangeMap(), NumVectors); Epetra_MultiVector YA1(A.OperatorDomainMap(), NumVectors); Epetra_MultiVector YA2(YA1); Epetra_MultiVector YB1(YA1); Epetra_MultiVector YB2(YA1); X.Random(); bool transA = true; A.SetUseTranspose(transA); B.SetUseTranspose(transA); A.Apply(X,YA1); A.Multiply(transA, X, YA2); EPETRA_TEST_ERR(checkMultiVectors(YA1,YA2, "A Multiply and A Apply (transpose)", verbose),ierr); B.Apply(X,YB1); EPETRA_TEST_ERR(checkMultiVectors(YA1,YB1, "A Multiply and B Multiply (transpose)", verbose),ierr); B.Multiply(transA, X,YB2); EPETRA_TEST_ERR(checkMultiVectors(YA1,YB2, "A Multiply and B Apply (transpose)", verbose),ierr); } Epetra_Vector diagA(A.RowMatrixRowMap()); EPETRA_TEST_ERR(A.ExtractDiagonalCopy(diagA),ierr); Epetra_Vector diagB(B.RowMatrixRowMap()); EPETRA_TEST_ERR(B.ExtractDiagonalCopy(diagB),ierr); EPETRA_TEST_ERR(checkMultiVectors(diagA,diagB, "ExtractDiagonalCopy", verbose),ierr); Epetra_Vector rowA(A.RowMatrixRowMap()); EPETRA_TEST_ERR(A.InvRowSums(rowA),ierr); Epetra_Vector rowB(B.RowMatrixRowMap()); EPETRA_TEST_ERR(B.InvRowSums(rowB),ierr) EPETRA_TEST_ERR(checkMultiVectors(rowA,rowB, "InvRowSums", verbose),ierr); Epetra_Vector colA(A.RowMatrixColMap()); EPETRA_TEST_ERR(A.InvColSums(colA),ierr); Epetra_Vector colB(B.RowMatrixColMap()); EPETRA_TEST_ERR(B.InvColSums(colB),ierr); EPETRA_TEST_ERR(checkMultiVectors(colA,colB, "InvColSums", verbose),ierr); EPETRA_TEST_ERR(checkValues(A.NormInf(), B.NormInf(), "NormInf before scaling", verbose), ierr); EPETRA_TEST_ERR(checkValues(A.NormOne(), B.NormOne(), "NormOne before scaling", verbose),ierr); EPETRA_TEST_ERR(A.RightScale(colA),ierr); EPETRA_TEST_ERR(B.RightScale(colB),ierr); EPETRA_TEST_ERR(A.LeftScale(rowA),ierr); EPETRA_TEST_ERR(B.LeftScale(rowB),ierr); EPETRA_TEST_ERR(checkValues(A.NormInf(), B.NormInf(), "NormInf after scaling", verbose), ierr); EPETRA_TEST_ERR(checkValues(A.NormOne(), B.NormOne(), "NormOne after scaling", verbose),ierr); vector<double> valuesA(A.MaxNumEntries()); vector<int> indicesA(A.MaxNumEntries()); vector<double> valuesB(B.MaxNumEntries()); vector<int> indicesB(B.MaxNumEntries()); return(0); for (int i=0; i<A.NumMyRows(); i++) { int nA, nB; EPETRA_TEST_ERR(A.ExtractMyRowCopy(i, A.MaxNumEntries(), nA, &valuesA[0], &indicesA[0]),ierr); EPETRA_TEST_ERR(B.ExtractMyRowCopy(i, B.MaxNumEntries(), nB, &valuesB[0], &indicesB[0]),ierr); EPETRA_TEST_ERR(!nA==nB,ierr); for (int j=0; j<nA; j++) { double curVal = valuesA[j]; int curIndex = indicesA[j]; bool notfound = true; int jj = 0; while (notfound && jj< nB) { if (!checkValues(curVal, valuesB[jj])) notfound = false; jj++; } EPETRA_TEST_ERR(notfound, ierr); vector<int>::iterator p = find(indicesB.begin(),indicesB.end(),curIndex); // find curIndex in indicesB EPETRA_TEST_ERR(p==indicesB.end(), ierr); } } if (verbose) cout << "RowMatrix Methods check OK" << endl; return (ierr); }
int Amesos_Scalapack::RedistributeA( ) { if( debug_ == 1 ) std::cout << "Entering `RedistributeA()'" << std::endl; Time_->ResetStartTime(); Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); EPETRA_CHK_ERR( RowMatrixA == 0 ) ; const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; int NumberOfProcesses = Comm().NumProc() ; // // Compute a uniform distribution as ScaLAPACK would want it // MyFirstElement - The first element which this processor would have // NumExpectedElemetns - The number of elements which this processor would have // int NumRows_ = RowMatrixA->NumGlobalRows() ; int NumColumns_ = RowMatrixA->NumGlobalCols() ; if ( MaxProcesses_ > 0 ) { NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, MaxProcesses_ ) ; } else { int ProcessNumHeuristic = (1+NumRows_/200)*(1+NumRows_/200); NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, ProcessNumHeuristic ); } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:171" << std::endl; // // Create the ScaLAPACK data distribution. // The TwoD data distribution is created in a completely different // manner and is not transposed (whereas the SaLAPACK 1D data // distribution was transposed) // if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:163" << std::endl; Comm().Barrier(); if ( TwoD_distribution_ ) { if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:166" << std::endl; Comm().Barrier(); npcol_ = EPETRA_MIN( NumberOfProcesses, EPETRA_MAX ( 2, (int) sqrt( NumberOfProcesses * 0.5 ) ) ) ; nprow_ = NumberOfProcesses / npcol_ ; // // Create the map for FatA - our first intermediate matrix // int NumMyElements = RowMatrixA->RowMatrixRowMap().NumMyElements() ; std::vector<int> MyGlobalElements( NumMyElements ); RowMatrixA->RowMatrixRowMap().MyGlobalElements( &MyGlobalElements[0] ) ; int NumMyColumns = RowMatrixA->RowMatrixColMap().NumMyElements() ; std::vector<int> MyGlobalColumns( NumMyColumns ); RowMatrixA->RowMatrixColMap().MyGlobalElements( &MyGlobalColumns[0] ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:194" << std::endl; std::vector<int> MyFatElements( NumMyElements * npcol_ ); for( int LocalRow=0; LocalRow<NumMyElements; LocalRow++ ) { for (int i = 0 ; i < npcol_; i++ ){ MyFatElements[LocalRow*npcol_+i] = MyGlobalElements[LocalRow]*npcol_+i; } } Epetra_Map FatInMap( npcol_*NumRows_, NumMyElements*npcol_, &MyFatElements[0], 0, Comm() ); // // Create FatIn, our first intermediate matrix // Epetra_CrsMatrix FatIn( Copy, FatInMap, 0 ); std::vector<std::vector<int> > FatColumnIndices(npcol_,std::vector<int>(1)); std::vector<std::vector<double> > FatMatrixValues(npcol_,std::vector<double>(1)); std::vector<int> FatRowPtrs(npcol_); // A FatRowPtrs[i] = the number // of entries in local row LocalRow*npcol_ + i if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:219" << std::endl; // mypcol_ = iam_%npcol_; myprow_ = (iam_/npcol_)%nprow_; if ( iam_ >= nprow_ * npcol_ ) { myprow_ = nprow_; mypcol_ = npcol_; } // Each row is split into npcol_ rows, with each of the // new rows containing only those elements belonging to // its process column (in the ScaLAPACK 2D process grid) // int MaxNumIndices = RowMatrixA->MaxNumEntries(); int NumIndices; std::vector<int> ColumnIndices(MaxNumIndices); std::vector<double> MatrixValues(MaxNumIndices); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:232 NumMyElements = " << NumMyElements << std::endl; nb_ = grid_nb_; for( int LocalRow=0; LocalRow<NumMyElements; ++LocalRow ) { RowMatrixA->ExtractMyRowCopy( LocalRow, MaxNumIndices, NumIndices, &MatrixValues[0], &ColumnIndices[0] ); for (int i=0; i<npcol_; i++ ) FatRowPtrs[i] = 0 ; // // Deal the individual matrix entries out to the row owned by // the process to which this matrix entry will belong. // for( int i=0 ; i<NumIndices ; ++i ) { int GlobalCol = MyGlobalColumns[ ColumnIndices[i] ]; int pcol_i = pcolnum( GlobalCol, nb_, npcol_ ) ; if ( FatRowPtrs[ pcol_i ]+1 >= FatColumnIndices[ pcol_i ].size() ) { FatColumnIndices[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 ); FatMatrixValues[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 ); } FatColumnIndices[pcol_i][FatRowPtrs[pcol_i]] = GlobalCol ; FatMatrixValues[pcol_i][FatRowPtrs[pcol_i]] = MatrixValues[i]; FatRowPtrs[ pcol_i ]++; } // // Insert each of the npcol_ rows individually // for ( int pcol_i = 0 ; pcol_i < npcol_ ; pcol_i++ ) { FatIn.InsertGlobalValues( MyGlobalElements[LocalRow]*npcol_ + pcol_i, FatRowPtrs[ pcol_i ], &FatMatrixValues[ pcol_i ][0], &FatColumnIndices[ pcol_i ][0] ); } } FatIn.FillComplete( false ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:260" << std::endl; if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:265B" << " iam_ = " << iam_ << " nb_ = " << nb_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << std::endl; // // Compute the map for our second intermediate matrix, FatOut // // Compute directly int UniformRows = ( NumRows_ / ( nprow_ * nb_ ) ) * nb_ ; int AllExcessRows = NumRows_ - UniformRows * nprow_ ; int OurExcessRows = EPETRA_MIN( nb_, AllExcessRows - ( myprow_ * nb_ ) ) ; OurExcessRows = EPETRA_MAX( 0, OurExcessRows ); NumOurRows_ = UniformRows + OurExcessRows ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:277" << std::endl; int UniformColumns = ( NumColumns_ / ( npcol_ * nb_ ) ) * nb_ ; int AllExcessColumns = NumColumns_ - UniformColumns * npcol_ ; int OurExcessColumns = EPETRA_MIN( nb_, AllExcessColumns - ( mypcol_ * nb_ ) ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:281" << std::endl; OurExcessColumns = EPETRA_MAX( 0, OurExcessColumns ); NumOurColumns_ = UniformColumns + OurExcessColumns ; if ( iam_ >= nprow_ * npcol_ ) { UniformRows = 0; NumOurRows_ = 0; NumOurColumns_ = 0; } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:295" << std::endl; #if 0 // Compute using ScaLAPACK's numroc routine, assert agreement int izero = 0; // All matrices start at process 0 int NumRocSays = numroc_( &NumRows_, &nb_, &myprow_, &izero, &nprow_ ); assert( NumOurRows_ == NumRocSays ); #endif // // Compute the rows which this process row owns in the ScaLAPACK 2D // process grid. // std::vector<int> AllOurRows(NumOurRows_); int RowIndex = 0 ; int BlockRow = 0 ; for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) { for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) { AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ; } } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:315" << std::endl; assert ( BlockRow == UniformRows / nb_ ) ; for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) { AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ; } assert( RowIndex == NumOurRows_ ); // // Distribute those rows amongst all the processes in that process row // This is an artificial distribution with the following properties: // 1) It is a 1D data distribution (each row belogs entirely to // a single process // 2) All data which will eventually belong to a given process row, // is entirely contained within the processes in that row. // Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:312" << std::endl; // // Compute MyRows directly // std::vector<int>MyRows(NumOurRows_); RowIndex = 0 ; BlockRow = 0 ; for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) { for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) { MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ + myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ; } } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:326" << std::endl; assert ( BlockRow == UniformRows / nb_ ) ; for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) { MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ + myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ; } Comm().Barrier(); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:334" << std::endl; Comm().Barrier(); for (int i=0; i < NumOurRows_; i++ ) { assert( MyRows[i] == AllOurRows[i]*npcol_+mypcol_ ); } Comm().Barrier(); if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:340" << " iam_ = " << iam_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " NumRows_ = " << NumRows_ << " NumOurRows_ = " << NumOurRows_ << std::endl; Comm().Barrier(); Epetra_Map FatOutMap( npcol_*NumRows_, NumOurRows_, &MyRows[0], 0, Comm() ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:344" << std::endl; Comm().Barrier(); if ( FatOut_ ) delete FatOut_ ; FatOut_ = new Epetra_CrsMatrix( Copy, FatOutMap, 0 ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:348" << std::endl; Epetra_Export ExportToFatOut( FatInMap, FatOutMap ) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:360" << std::endl; FatOut_->Export( FatIn, ExportToFatOut, Add ); FatOut_->FillComplete( false ); // // Create a map to allow us to redistribute the vectors X and B // Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ; assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ; int NumMyVecElements = 0 ; if ( mypcol_ == 0 ) { NumMyVecElements = NumOurRows_; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:385" << std::endl; if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; } VectorMap_ = new Epetra_Map( NumGlobalElements_, NumMyVecElements, &AllOurRows[0], 0, Comm() ); if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:393 debug_ = " << debug_ << std::endl; } else { nprow_ = 1 ; npcol_ = NumberOfProcesses / nprow_ ; assert ( nprow_ * npcol_ == NumberOfProcesses ) ; m_per_p_ = ( NumRows_ + NumberOfProcesses - 1 ) / NumberOfProcesses ; int MyFirstElement = EPETRA_MIN( iam_ * m_per_p_, NumRows_ ) ; int MyFirstNonElement = EPETRA_MIN( (iam_+1) * m_per_p_, NumRows_ ) ; int NumExpectedElements = MyFirstNonElement - MyFirstElement ; assert( NumRows_ == RowMatrixA->NumGlobalRows() ) ; if ( ScaLAPACK1DMap_ ) delete( ScaLAPACK1DMap_ ) ; ScaLAPACK1DMap_ = new Epetra_Map( NumRows_, NumExpectedElements, 0, Comm() ); if ( ScaLAPACK1DMatrix_ ) delete( ScaLAPACK1DMatrix_ ) ; ScaLAPACK1DMatrix_ = new Epetra_CrsMatrix(Copy, *ScaLAPACK1DMap_, 0); Epetra_Export ExportToScaLAPACK1D_( OriginalMap, *ScaLAPACK1DMap_); ScaLAPACK1DMatrix_->Export( *RowMatrixA, ExportToScaLAPACK1D_, Add ); ScaLAPACK1DMatrix_->FillComplete( false ) ; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:417 debug_ = " << debug_ << std::endl; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:402" << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << std::endl ; int info; const int zero = 0 ; if ( ictxt_ == -1313 ) { ictxt_ = 0 ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:408" << std::endl; SL_INIT_F77(&ictxt_, &nprow_, &npcol_) ; } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:410A" << std::endl; int nprow; int npcol; int myrow; int mycol; BLACS_GRIDINFO_F77(&ictxt_, &nprow, &npcol, &myrow, &mycol) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:410" << std::endl; if ( iam_ < nprow_ * npcol_ ) { assert( nprow == nprow_ ) ; if ( npcol != npcol_ ) std::cout << "Amesos_Scalapack.cpp:430 npcol = " << npcol << " npcol_ = " << npcol_ << std::endl ; assert( npcol == npcol_ ) ; if ( TwoD_distribution_ ) { assert( myrow == myprow_ ) ; assert( mycol == mypcol_ ) ; lda_ = EPETRA_MAX(1,NumOurRows_) ; } else { assert( myrow == 0 ) ; assert( mycol == iam_ ) ; nb_ = m_per_p_; lda_ = EPETRA_MAX(1,NumGlobalElements_); } if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp: " << __LINE__ << " TwoD_distribution_ = " << TwoD_distribution_ << " NumGlobalElements_ = " << NumGlobalElements_ << " debug_ = " << debug_ << " nb_ = " << nb_ << " lda_ = " << lda_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " iam_ = " << iam_ << std::endl ; AMESOS_PRINT( myprow_ ); DESCINIT_F77(DescA_, &NumGlobalElements_, &NumGlobalElements_, &nb_, &nb_, &zero, &zero, &ictxt_, &lda_, &info) ; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:441" << std::endl; assert( info == 0 ) ; } else { DescA_[0] = -13; if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:458 nprow = " << nprow << std::endl; assert( nprow == -1 ) ; } if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:446" << std::endl; MatTime_ += Time_->ElapsedTime(); return 0; }