//========================================================================== int Ifpack_CrsRiluk::Factor() { // if (!Allocated()) return(-1); // This test is not needed at this time. All constructors allocate. if (!ValuesInitialized()) return(-2); // Must have values initialized. if (Factored()) return(-3); // Can't have already computed factors. SetValuesInitialized(false); // MinMachNum should be officially defined, for now pick something a little // bigger than IEEE underflow value double MinDiagonalValue = Epetra_MinDouble; double MaxDiagonalValue = 1.0/MinDiagonalValue; int ierr = 0; int i, j, k; int * LI=0, * UI = 0; double * LV=0, * UV = 0; int NumIn, NumL, NumU; // Get Maximun Row length int MaxNumEntries = L_->MaxNumEntries() + U_->MaxNumEntries() + 1; vector<int> InI(MaxNumEntries); // Allocate temp space vector<double> InV(MaxNumEntries); vector<int> colflag(NumMyCols()); double *DV; ierr = D_->ExtractView(&DV); // Get view of diagonal #ifdef IFPACK_FLOPCOUNTERS int current_madds = 0; // We will count multiply-add as they happen #endif // Now start the factorization. // Need some integer workspace and pointers int NumUU; int * UUI; double * UUV; for (j=0; j<NumMyCols(); j++) colflag[j] = - 1; for(i=0; i<NumMyRows(); i++) { // Fill InV, InI with current row of L, D and U combined NumIn = MaxNumEntries; EPETRA_CHK_ERR(L_->ExtractMyRowCopy(i, NumIn, NumL, &InV[0], &InI[0])); LV = &InV[0]; LI = &InI[0]; InV[NumL] = DV[i]; // Put in diagonal InI[NumL] = i; EPETRA_CHK_ERR(U_->ExtractMyRowCopy(i, NumIn-NumL-1, NumU, &InV[NumL+1], &InI[NumL+1])); NumIn = NumL+NumU+1; UV = &InV[NumL+1]; UI = &InI[NumL+1]; // Set column flags for (j=0; j<NumIn; j++) colflag[InI[j]] = j; double diagmod = 0.0; // Off-diagonal accumulator for (int jj=0; jj<NumL; jj++) { j = InI[jj]; double multiplier = InV[jj]; // current_mults++; InV[jj] *= DV[j]; EPETRA_CHK_ERR(U_->ExtractMyRowView(j, NumUU, UUV, UUI)); // View of row above if (RelaxValue_==0.0) { for (k=0; k<NumUU; k++) { int kk = colflag[UUI[k]]; if (kk>-1) { InV[kk] -= multiplier*UUV[k]; #ifdef IFPACK_FLOPCOUNTERS current_madds++; #endif } } } else { for (k=0; k<NumUU; k++) { int kk = colflag[UUI[k]]; if (kk>-1) InV[kk] -= multiplier*UUV[k]; else diagmod -= multiplier*UUV[k]; #ifdef IFPACK_FLOPCOUNTERS current_madds++; #endif } } } if (NumL) { EPETRA_CHK_ERR(L_->ReplaceMyValues(i, NumL, LV, LI)); // Replace current row of L } DV[i] = InV[NumL]; // Extract Diagonal value if (RelaxValue_!=0.0) { DV[i] += RelaxValue_*diagmod; // Add off diagonal modifications // current_madds++; } if (fabs(DV[i]) > MaxDiagonalValue) { if (DV[i] < 0) DV[i] = - MinDiagonalValue; else DV[i] = MinDiagonalValue; } else DV[i] = 1.0/DV[i]; // Invert diagonal value for (j=0; j<NumU; j++) UV[j] *= DV[i]; // Scale U by inverse of diagonal if (NumU) { EPETRA_CHK_ERR(U_->ReplaceMyValues(i, NumU, UV, UI)); // Replace current row of L and U } // Reset column flags for (j=0; j<NumIn; j++) colflag[InI[j]] = -1; } // Validate that the L and U factors are actually lower and upper triangular if( !L_->LowerTriangular() ) EPETRA_CHK_ERR(-2); if( !U_->UpperTriangular() ) EPETRA_CHK_ERR(-3); #ifdef IFPACK_FLOPCOUNTERS // Add up flops double current_flops = 2 * current_madds; double total_flops = 0; EPETRA_CHK_ERR(Graph_.L_Graph().RowMap().Comm().SumAll(¤t_flops, &total_flops, 1)); // Get total madds across all PEs // Now count the rest total_flops += (double) L_->NumGlobalNonzeros(); // Accounts for multiplier above total_flops += (double) D_->GlobalLength(); // Accounts for reciprocal of diagonal if (RelaxValue_!=0.0) total_flops += 2 * (double)D_->GlobalLength(); // Accounts for relax update of diag UpdateFlops(total_flops); // Update flop count #endif SetFactored(true); return(ierr); }
//============================================================================= void Epetra_MsrMatrix::Print(ostream& os) const { int MyPID = RowMatrixRowMap().Comm().MyPID(); int NumProc = RowMatrixRowMap().Comm().NumProc(); for (int iproc=0; iproc < NumProc; iproc++) { if (MyPID==iproc) { /* const Epetra_fmtflags olda = os.setf(ios::right,ios::adjustfield); const Epetra_fmtflags oldf = os.setf(ios::scientific,ios::floatfield); const int oldp = os.precision(12); */ if (MyPID==0) { os << "\nNumber of Global Rows = "; os << NumGlobalRows(); os << endl; os << "Number of Global Cols = "; os << NumGlobalCols(); os << endl; os << "Number of Global Diagonals = "; os << NumGlobalDiagonals(); os << endl; os << "Number of Global Nonzeros = "; os << NumGlobalNonzeros(); os << endl; if (LowerTriangular()) os << " ** Matrix is Lower Triangular **"; os << endl; if (UpperTriangular()) os << " ** Matrix is Upper Triangular **"; os << endl; } os << "\nNumber of My Rows = "; os << NumMyRows(); os << endl; os << "Number of My Cols = "; os << NumMyCols(); os << endl; os << "Number of My Diagonals = "; os << NumMyDiagonals(); os << endl; os << "Number of My Nonzeros = "; os << NumMyNonzeros(); os << endl; os << endl; os << flush; // Reset os flags /* os.setf(olda,ios::adjustfield); os.setf(oldf,ios::floatfield); os.precision(oldp); */ } // Do a few global ops to give I/O a chance to complete Comm().Barrier(); Comm().Barrier(); Comm().Barrier(); } {for (int iproc=0; iproc < NumProc; iproc++) { if (MyPID==iproc) { int i, j; if (MyPID==0) { os.width(8); os << " Processor "; os.width(10); os << " Row Index "; os.width(10); os << " Col Index "; os.width(20); os << " Value "; os << endl; } for (i=0; i<NumMyRows_; i++) { int Row = RowMatrixRowMap().GID(i); // Get global row number int NumEntries = GetRow(i); // ith row is now in Values_ and Indices_ for (j = 0; j < NumEntries ; j++) { os.width(8); os << MyPID ; os << " "; os.width(10); os << Row ; os << " "; os.width(10); os << RowMatrixColMap().GID(Indices_[j]); os << " "; os.width(20); os << Values_[j]; os << " "; os << endl; } } os << flush; } // Do a few global ops to give I/O a chance to complete RowMatrixRowMap().Comm().Barrier(); RowMatrixRowMap().Comm().Barrier(); RowMatrixRowMap().Comm().Barrier(); }} return; }
//============================================================================= int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // // This function forms the product Y = A * Y or Y = A' * X // if (X.NumVectors()==1 && Y.NumVectors()==1) { double * xp = (double *) X[0]; double * yp = (double *) Y[0]; Epetra_Vector x(View, X.Map(), xp); Epetra_Vector y(View, Y.Map(), yp); return(Multiply(TransA, x, y)); } if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. int i, j, k; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; double **Xp = (double**)X.Pointers(); double **Yp = (double**)Y.Pointers(); int NumVectors = X.NumVectors(); int NumMyCols_ = NumMyCols(); // Need to better manage the Import and Export vectors: // - Need accessor functions // - Need to make the NumVector match (use a View to do this) // - Need to look at RightScale and ColSum routines too. if (!TransA) { // If we have a non-trivial importer, we must import elements that are permuted or are on other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed ImportVector_->Import(X, *Importer(), Insert); Xp = (double**)ImportVector_->Pointers(); } // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed Yp = (double**)ExportVector_->Pointers(); } // Do actual computation for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * Xp[k][RowIndices[j]]; Yp[k][i] = sum; } } if (Exporter()!=0) Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector } else { // Transpose operation // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed ExportVector_->Import(X, *Exporter(), Insert); Xp = (double**)ExportVector_->Pointers(); } // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed Yp = (double**)ImportVector_->Pointers(); } // Do actual computation for (k=0; k<NumVectors; k++) for (i=0; i < NumMyCols_; i++) Yp[k][i] = 0.0; // Initialize y for transpose multiply for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] += RowValues[j] * Xp[k][i]; } } if (Importer()!=0) Y.Export(*ImportVector_, *Importer(), Add); // Fill Y with Values from export vector } UpdateFlops(2*NumVectors*NumGlobalNonzeros64()); return(0); }
//============================================================================= int Epetra_FastCrsMatrix::Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_Vector& x, Epetra_Vector& y) const { // // This function find y such that Ly = x or Uy = x or the transpose cases. // if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. if ((Upper) && (!UpperTriangular())) EPETRA_CHK_ERR(-2); if ((!Upper) && (!LowerTriangular())) EPETRA_CHK_ERR(-3); if ((!UnitDiagonal) && (NoDiagonal())) EPETRA_CHK_ERR(-4); // If matrix has no diagonal, we must use UnitDiagonal if ((!UnitDiagonal) && (NumMyDiagonals()<NumMyRows_)) EPETRA_CHK_ERR(-5); // Need each row to have a diagonal int i, j, j0; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; int NumMyCols_ = NumMyCols(); // If upper, point to last row if ((Upper && !Trans) || (!Upper && Trans)) { NumEntriesPerRow += NumMyRows_-1; Indices += NumMyRows_-1; Values += NumMyRows_-1; } double *xp = (double*)x.Values(); double *yp = (double*)y.Values(); if (!Trans) { if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i >=0; i--) { int NumEntries = *NumEntriesPerRow--; int * RowIndices = *Indices--; double * RowValues = *Values--; double sum = 0.0; for (j=j0; j < NumEntries; j++) sum += RowValues[j] * yp[RowIndices[j]]; if (UnitDiagonal) yp[i] = xp[i] - sum; else yp[i] = (xp[i] - sum)/RowValues[0]; } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++ - j0; int * RowIndices = *Indices++; double * RowValues = *Values++; double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * yp[RowIndices[j]]; if (UnitDiagonal) yp[i] = xp[i] - sum; else yp[i] = (xp[i] - sum)/RowValues[NumEntries]; } } } // *********** Transpose case ******************************* else { if (xp!=yp) for (i=0; i < NumMyCols_; i++) yp[i] = xp[i]; // Initialize y for transpose solve if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; if (!UnitDiagonal) yp[i] = yp[i]/RowValues[0]; for (j=j0; j < NumEntries; j++) yp[RowIndices[j]] -= RowValues[j] * yp[i]; } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i >= 0; i--) { int NumEntries = *NumEntriesPerRow-- - j0; int * RowIndices = *Indices--; double * RowValues = *Values--; if (!UnitDiagonal) yp[i] = yp[i]/RowValues[NumEntries]; for (j=0; j < NumEntries; j++) yp[RowIndices[j]] -= RowValues[j] * yp[i]; } } } UpdateFlops(2*NumGlobalNonzeros64()); return(0); }
//============================================================================= int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_Vector& x, Epetra_Vector& y) const { // // This function forms the product y = A * x or y = A' * x // int i, j; double * xp = (double*)x.Values(); double *yp = (double*)y.Values(); int NumMyCols_ = NumMyCols(); if (!TransA) { // If we have a non-trivial importer, we must import elements that are permuted or are on other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=1) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),1); // Create import vector if needed ImportVector_->Import(x, *Importer(), Insert); xp = (double*)ImportVector_->Values(); } // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=1) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),1); // Create Export vector if needed yp = (double*)ExportVector_->Values(); } // Do actual computation for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * xp[RowIndices[j]]; yp[i] = sum; } if (Exporter()!=0) y.Export(*ExportVector_, *Exporter(), Add); // Fill y with Values from export vector } else { // Transpose operation // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=1) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),1); // Create Export vector if needed ExportVector_->Import(x, *Exporter(), Insert); xp = (double*)ExportVector_->Values(); } // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=1) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),1); // Create import vector if needed yp = (double*)ImportVector_->Values(); } // Do actual computation for (i=0; i < NumMyCols_; i++) yp[i] = 0.0; // Initialize y for transpose multiply for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (j=0; j < NumEntries; j++) yp[RowIndices[j]] += RowValues[j] * xp[i]; } if (Importer()!=0) y.Export(*ImportVector_, *Importer(), Add); // Fill y with Values from export vector } UpdateFlops(2*NumGlobalNonzeros64()); return(0); }