bool NewtonSolver(const DataStructure& Mesh, const sparse& Diff, tensor& x, vector& dt, int Order) { int theta, GOrder, Npts = Mesh.GetNverts(), max_iters = 20; vector cm(1,.0); if(t >= (Order-1) * dt0) GOrder = Order; else { theta = (t + dt0)/dt0; GOrder = theta; } vector BDF1 = Construct_BDF(GOrder,dt), BDF2(GOrder,.0); matrix RHS1(Nc,Npts), RHS2(Nc,Npts); for(int j = 0; j < Nc; j++) for(int i = 1; i < GOrder + 1; i++) RHS1[j] -= BDF1(i) * x(j)[i]; if(GOrder > 1) { BDF2 = Construct_BDF(GOrder-1,dt); for(int j = 0; j < Nc; j++) for(int i = 1; i < GOrder; i++) RHS2[j] -= BDF2(i) * x(j)[i]; } vector x1 = x(0)[0], x2 = x(1)[0], x3 = x(2)[0], x4 = x(3)[0], x5 = x(4)[0]; double error = 1.0, tol = pow(10.0,-6.0), est = .0, abs_error = 0.0001, rel_error = abs_error; int iters = 0; while(error > tol) { iters++; error = FixedPointStep(Mesh,Diff,RHS1,x1,x2,x3,x4,x5,BDF1(0)); if(iters > max_iters) break; } cout << "FPI iters = " << iters << endl; x(0)[Order+2] = x1; x(1)[Order+2] = x2; x(2)[Order+2] = x3; x(3)[Order+2] = x4; x(4)[Order+2] = x5; if(GOrder > 1 && iters <= max_iters) { vector err(Nc*Npts,.0); matrix deriv1 = RHS1, deriv2 = RHS2; for(int i = 0; i < Nc; i++) { deriv1[i] -= BDF1(0) * x(i)[Order+2]; deriv2[i] -= BDF2(0) * x(i)[Order+2]; } for(int i = 0; i < Nc; i++) for(int j = 0; j < Npts; j++) if(fabs(deriv1[i](j)) > pow(10.0,-8.0)) err(i*Npts+j) = (fabs(deriv1[i](j)) - fabs(deriv2[i](j))); // / fabs(deriv1[i](j)); est = err.Norm(); double deno = .0; for(int i = 0; i < Nc; i++) deno += deriv1[i].Norm(); est = est / deno; } if(iters > max_iters) { cout << "Failure to converge with attempted timestep = " << dt(0) << endl; est = rel_error * 2.0; } if( est < rel_error) { for(int i = 0; i < Nc; i++) x(i)[0] = x(i)[Order+2]; for(int i = 0; i < Nc; i++) for(int j = Order; j > 0; j--) x(i)[j] = x(i)[j-1]; t += dt(0); vector time(1,dt(0)); cm(0) = x(2)[0].Amax(); time.Write("adaptive.dat"); cm.Write("cmax.dat"); for(int i = Order - 1; i > 0; i--) dt(i) = dt(i-1); cout << "Newton Adaptive BDF Solver Converged with c_max = " << cm << " at time " << t << endl; } if(GOrder > 1 && cm(0) < (c3 - dc)) dt(0) = min(tmax,0.8 * dt(0) * pow(rel_error/est,1.0/(GOrder))); else if(cm(0) >= (c3 - dc)) dt(0) = min(tmin,0.8 * dt(0) * pow(rel_error/est,1.0/GOrder)); return 0; }
int check(Epetra_SerialDenseSolver &solver, double * A1, int LDA1, int N1, int NRHS1, double OneNorm1, double * B1, int LDB1, double * X1, int LDX1, bool Transpose, bool verbose) { int i; bool OK; // Test query functions int M= solver.M(); if (verbose) cout << "\n\nNumber of Rows = " << M << endl<< endl; assert(M==N1); int N= solver.N(); if (verbose) cout << "\n\nNumber of Equations = " << N << endl<< endl; assert(N==N1); int LDA = solver.LDA(); if (verbose) cout << "\n\nLDA = " << LDA << endl<< endl; assert(LDA==LDA1); int LDB = solver.LDB(); if (verbose) cout << "\n\nLDB = " << LDB << endl<< endl; assert(LDB==LDB1); int LDX = solver.LDX(); if (verbose) cout << "\n\nLDX = " << LDX << endl<< endl; assert(LDX==LDX1); int NRHS = solver.NRHS(); if (verbose) cout << "\n\nNRHS = " << NRHS << endl<< endl; assert(NRHS==NRHS1); assert(solver.ANORM()==-1.0); assert(solver.RCOND()==-1.0); if (!solver.A_Equilibrated() && !solver.B_Equilibrated()) { assert(solver.ROWCND()==-1.0); assert(solver.COLCND()==-1.0); assert(solver.AMAX()==-1.0); } // Other binary tests assert(!solver.Factored()); assert(solver.Transpose()==Transpose); assert(!solver.SolutionErrorsEstimated()); assert(!solver.Inverted()); assert(!solver.ReciprocalConditionEstimated()); assert(!solver.Solved()); assert(!solver.SolutionRefined()); int ierr = solver.Factor(); assert(ierr>-1); if (ierr!=0) return(ierr); // Factorization failed due to poor conditioning. double rcond; ierr = solver.ReciprocalConditionEstimate(rcond); assert(ierr==0); if (verbose) { double rcond1 = 1.0/exp(3.5*((double)N)); if (N==1) rcond1 = 1.0; cout << "\n\nRCOND = "<< rcond << " should be approx = " << rcond1 << endl << endl; } ierr = solver.Solve(); assert(ierr>-1); if (ierr!=0 && verbose) cout << "LAPACK rules suggest system should be equilibrated." << endl; assert(solver.Factored()); assert(solver.Transpose()==Transpose); assert(solver.ReciprocalConditionEstimated()); assert(solver.Solved()); if (solver.SolutionErrorsEstimated()) { if (verbose) { cout << "\n\nFERR[0] = "<< solver.FERR()[0] << endl; cout << "\n\nBERR[0] = "<< solver.BERR()[0] << endl<< endl; } } double * resid = new double[NRHS]; OK = Residual(N, NRHS, A1, LDA1, solver.Transpose(), solver.X(), solver.LDX(), B1, LDB1, resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; /* if (solver.A_Equilibrated()) { double * R = solver.R(); double * C = solver.C(); for (i=0; i<solver.M(); i++) cout << "R[" << i <<"] = "<< R[i] << endl; for (i=0; i<solver.N(); i++) cout << "C[" << i <<"] = "<< C[i] << endl; } */ cout << "\n\nResiduals using factorization to solve" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } ierr = solver.Invert(); assert(ierr>-1); assert(solver.Inverted()); assert(!solver.Factored()); assert(solver.Transpose()==Transpose); Epetra_SerialDenseMatrix RHS1(Copy, B1, LDB1, N, NRHS); Epetra_SerialDenseMatrix LHS1(Copy, X1, LDX1, N, NRHS); assert(solver.SetVectors(LHS1, RHS1)==0); assert(!solver.Solved()); assert(solver.Solve()>-1); OK = Residual(N, NRHS, A1, LDA1, solver.Transpose(), solver.X(), solver.LDX(), B1, LDB1, resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; cout << "Residuals using inverse to solve" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } delete [] resid; return(0); }
//========================================================================== int Ifpack_ICT::Compute() { if (!IsInitialized()) IFPACK_CHK_ERR(Initialize()); Time_.ResetStartTime(); IsComputed_ = false; NumMyRows_ = A_.NumMyRows(); int Length = A_.MaxNumEntries(); vector<int> RowIndices(Length); vector<double> RowValues(Length); bool distributed = (Comm().NumProc() > 1)?true:false; if (distributed) { SerialComm_ = Teuchos::rcp(new Epetra_SerialComm); SerialMap_ = Teuchos::rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_)); assert (SerialComm_.get() != 0); assert (SerialMap_.get() != 0); } else SerialMap_ = Teuchos::rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false); int RowNnz; #ifdef IFPACK_FLOPCOUNTERS double flops = 0.0; #endif H_ = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*SerialMap_,0)); if (H_.get() == 0) IFPACK_CHK_ERR(-5); // memory allocation error // get A(0,0) element and insert it (after sqrt) IFPACK_CHK_ERR(A_.ExtractMyRowCopy(0,Length,RowNnz, &RowValues[0],&RowIndices[0])); // skip off-processor elements if (distributed) { int count = 0; for (int i = 0 ;i < RowNnz ; ++i) { if (RowIndices[i] < NumMyRows_){ RowIndices[count] = RowIndices[i]; RowValues[count] = RowValues[i]; ++count; } else continue; } RowNnz = count; } // modify diagonal double diag_val = 0.0; for (int i = 0 ;i < RowNnz ; ++i) { if (RowIndices[i] == 0) { double& v = RowValues[i]; diag_val = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v; break; } } diag_val = sqrt(diag_val); int diag_idx = 0; EPETRA_CHK_ERR(H_->InsertGlobalValues(0,1,&diag_val, &diag_idx)); // The 10 is just a small constant to limit collisons as the actual keys // we store are the indices and not integers // [0..A_.MaxNumEntries()*LevelofFill()]. Ifpack_HashTable Hash( 10 * A_.MaxNumEntries() * LevelOfFill(), 1); // start factorization for line 1 for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) { // get row `row_i' of the matrix IFPACK_CHK_ERR(A_.ExtractMyRowCopy(row_i,Length,RowNnz, &RowValues[0],&RowIndices[0])); // skip off-processor elements if (distributed) { int count = 0; for (int i = 0 ;i < RowNnz ; ++i) { if (RowIndices[i] < NumMyRows_){ RowIndices[count] = RowIndices[i]; RowValues[count] = RowValues[i]; ++count; } else continue; } RowNnz = count; } // number of nonzeros in this row are defined as the nonzeros // of the matrix, plus the level of fill int LOF = (int)(LevelOfFill() * RowNnz); if (LOF == 0) LOF = 1; // convert line `row_i' into hash for fast access Hash.reset(); double h_ii = 0.0; for (int i = 0 ; i < RowNnz ; ++i) { if (RowIndices[i] == row_i) { double& v = RowValues[i]; h_ii = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v; } else if (RowIndices[i] < row_i) { Hash.set(RowIndices[i], RowValues[i], true); } } // form element (row_i, col_j) // I start from the first row that has a nonzero column // index in row_i. for (int col_j = RowIndices[0] ; col_j < row_i ; ++col_j) { double h_ij = 0.0, h_jj = 0.0; // note: get() returns 0.0 if col_j is not found h_ij = Hash.get(col_j); // get pointers to row `col_j' int* ColIndices; double* ColValues; int ColNnz; H_->ExtractGlobalRowView(col_j, ColNnz, ColValues, ColIndices); for (int k = 0 ; k < ColNnz ; ++k) { int col_k = ColIndices[k]; if (col_k == col_j) h_jj = ColValues[k]; else { double xxx = Hash.get(col_k); if (xxx != 0.0) { h_ij -= ColValues[k] * xxx; #ifdef IFPACK_FLOPCOUNTERS flops += 2.0; #endif } } } h_ij /= h_jj; if (IFPACK_ABS(h_ij) > DropTolerance_) { Hash.set(col_j, h_ij); } #ifdef IFPACK_FLOPCOUNTERS // only approx ComputeFlops_ += 2.0 * flops + 1.0; #endif } int size = Hash.getNumEntries(); vector<double> AbsRow(size); int count = 0; // +1 because I use the extra position for diagonal in insert vector<int> keys(size + 1); vector<double> values(size + 1); Hash.arrayify(&keys[0], &values[0]); for (int i = 0 ; i < size ; ++i) { AbsRow[i] = IFPACK_ABS(values[i]); } count = size; double cutoff = 0.0; if (count > LOF) { nth_element(AbsRow.begin(), AbsRow.begin() + LOF, AbsRow.begin() + count, std::greater<double>()); cutoff = AbsRow[LOF]; } for (int i = 0 ; i < size ; ++i) { h_ii -= values[i] * values[i]; } if (h_ii < 0.0) h_ii = 1e-12;; h_ii = sqrt(h_ii); #ifdef IFPACK_FLOPCOUNTERS // only approx, + 1 == sqrt ComputeFlops_ += 2 * size + 1; #endif double DiscardedElements = 0.0; count = 0; for (int i = 0 ; i < size ; ++i) { if (IFPACK_ABS(values[i]) > cutoff) { values[count] = values[i]; keys[count] = keys[i]; ++count; } else DiscardedElements += values[i]; } if (RelaxValue() != 0.0) { DiscardedElements *= RelaxValue(); h_ii += DiscardedElements; } values[count] = h_ii; keys[count] = row_i; ++count; H_->InsertGlobalValues(row_i, count, &(values[0]), (int*)&(keys[0])); } IFPACK_CHK_ERR(H_->FillComplete()); #if 0 // to check the complete factorization Epetra_Vector LHS(Matrix().RowMatrixRowMap()); Epetra_Vector RHS1(Matrix().RowMatrixRowMap()); Epetra_Vector RHS2(Matrix().RowMatrixRowMap()); Epetra_Vector RHS3(Matrix().RowMatrixRowMap()); LHS.Random(); Matrix().Multiply(false,LHS,RHS1); H_->Multiply(true,LHS,RHS2); H_->Multiply(false,RHS2,RHS3); RHS1.Update(-1.0, RHS3, 1.0); cout << endl; cout << RHS1; #endif int MyNonzeros = H_->NumGlobalNonzeros(); Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1); IsComputed_ = true; #ifdef IFPACK_FLOPCOUNTERS double TotalFlops; // sum across all the processors A_.Comm().SumAll(&flops, &TotalFlops, 1); ComputeFlops_ += TotalFlops; #endif ++NumCompute_; ComputeTime_ += Time_.ElapsedTime(); return(0); }