static void mg3P(double **u, double *v, double **r, double a[4], double c[4], int n1, int n2, int n3, int k) { /*-------------------------------------------------------------------- c-------------------------------------------------------------------*/ /*-------------------------------------------------------------------- c multigrid V-cycle routine c-------------------------------------------------------------------*/ int j; /*-------------------------------------------------------------------- c down cycle. c restrict the residual from the find grid to the coarse c-------------------------------------------------------------------*/ for (k = lt; k >= lb+1; k--) { j = k-1; rprj3(r[k], m1[k], m2[k], m3[k], r[j], m1[j], m2[j], m3[j], k); } k = lb; /*-------------------------------------------------------------------- c compute an approximate solution on the coarsest grid c-------------------------------------------------------------------*/ zero3(u[k], m1[k], m2[k], m3[k]); psinv(r[k], u[k], m1[k], m2[k], m3[k], c, k); for (k = lb+1; k <= lt-1; k++) { j = k-1; /*-------------------------------------------------------------------- c prolongate from level k-1 to k c-------------------------------------------------------------------*/ zero3(u[k], m1[k], m2[k], m3[k]); interp(u[j], m1[j], m2[j], m3[j], u[k], m1[k], m2[k], m3[k], k); /*-------------------------------------------------------------------- c compute residual for level k c-------------------------------------------------------------------*/ resid(u[k], r[k], r[k], m1[k], m2[k], m3[k], a, k); /*-------------------------------------------------------------------- c apply smoother c-------------------------------------------------------------------*/ psinv(r[k], u[k], m1[k], m2[k], m3[k], c, k); } j = lt - 1; k = lt; interp(u[j], m1[j], m2[j], m3[j], u[lt], n1, n2, n3, k); resid(u[lt], v, r[lt], n1, n2, n3, a, k); psinv(r[lt], u[lt], n1, n2, n3, c, k); }
void EBPoissonOp:: levelJacobi(LevelData<EBCellFAB>& a_phi, const LevelData<EBCellFAB>& a_rhs) { CH_TIME("EBPoissonOp::levelJacobi"); // Overhauled from previous in-place Gauss-Seidel-like method // This implementation is very inefficient in terms of memory usage, // and could be greatly improved. It has the advantage, though, // of being very simple and easy to verify as correct. EBCellFactory factory(m_eblg.getEBISL()); // Note: this is hardcoded to a single variable (component), // like some other code in this class LevelData<EBCellFAB> resid(m_eblg.getDBL(), 1, m_ghostCellsRHS, factory); residual(resid, a_phi, a_rhs, true); LevelData<EBCellFAB> relaxationCoeff(m_eblg.getDBL(), 1, m_ghostCellsRHS, factory); getJacobiRelaxCoeff(relaxationCoeff); EBLevelDataOps::scale(resid, relaxationCoeff); EBLevelDataOps::incr(a_phi, resid, 1.0); }
void TrdHSegmentR::calChi2(){ Chi2=0.; for(int i=0;i<nTrdRawHit();i++){ TRDHitRZD rzd=TRDHitRZD(*pTrdRawHit(i)); Chi2+=pow(resid(rzd.r,rzd.z,rzd.d)/ (0.62/sqrt(12.)),2); } }
size_t suiolite::capacity () const { int inuse = resid (); assert (inuse <= len); return len - inuse; }
int residuals_b_predicts_a(double ax[], double ay[], double bx[], double by[], int use[], int n, double residuals[], double *rms) { resid(ax, ay, bx, by, use, n, residuals, rms, 0); return 0; }
bool AmesosBTFGlobal_LinearProblem:: rvs() { // cout << "AmesosBTFGlobal_LinearProblem: NewLHS_" << endl; // cout << *NewLHS_ << endl; OldLHS_->Import( *NewLHS_, *Importer2_, Insert ); int numrhs = OldLHS_->NumVectors(); std::vector<double> actual_resids( numrhs ), rhs_norm( numrhs ); Epetra_MultiVector resid( OldLHS_->Map(), numrhs ); OldMatrix_->Apply( *OldLHS_, resid ); resid.Update( -1.0, *OldRHS_, 1.0 ); resid.Norm2( &actual_resids[0] ); OldRHS_->Norm2( &rhs_norm[0] ); if (OldLHS_->Comm().MyPID() == 0 ) { for (int i=0; i<numrhs; i++ ) { std::cout << "Problem " << i << " (in AmesosBTFGlobal): \t" << actual_resids[i]/rhs_norm[i] << std::endl; } } //cout << "AmesosBTFGlobal_LinearProblem: OldLHS_" << endl; //cout << *OldLHS_ << endl; return true; }
// Simple Power method algorithm double power_method(const Epetra_CrsMatrix& A) { // variable needed for iteration double lambda = 0.0; int niters = A.RowMap().NumGlobalElements()*10; double tolerance = 1.0e-10; // Create vectors Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); Epetra_Vector resid(A.RowMap()); // Fill z with random Numbers z.Random(); // variable needed for iteration double normz; double residual = 0; int iter = 0; while (iter==0 || (iter < niters && residual > tolerance)) { z.Norm2(&normz); // Compute 2-norm of z q.Scale(1.0/normz, z); A.Multiply(false, q, z); // Compute z = A*q q.Dot(z, &lambda); // Approximate maximum eigenvalue if (iter%10==0 || iter+1==niters) { // Compute A*q - lambda*q every 10 iterations resid.Update(1.0, z, -lambda, q, 0.0); resid.Norm2(&residual); if (q.Map().Comm().MyPID()==0) std::cout << "Iter = " << iter << " Lambda = " << lambda << " Two-norm of A*q - lambda*q = " << residual << std::endl; } iter++; } return(lambda); }
int TpetraLinearSolver::residual_norm(int whichNorm, Teuchos::RCP<LinSys::Vector> sln, double& norm) { LinSys::Vector resid(rhs_->getMap()); ThrowRequire(! (sln.is_null() || rhs_.is_null() ) ); if (matrix_->isFillActive() ) { // FIXME //!matrix_->fillComplete(map_, map_); throw std::runtime_error("residual_norm"); } matrix_->apply(*sln, resid); LinSys::OneDVector rhs = rhs_->get1dViewNonConst (); LinSys::OneDVector res = resid.get1dViewNonConst (); for (int i=0; i<rhs.size(); ++i) res[i] -= rhs[i]; if ( whichNorm == 0 ) norm = resid.normInf(); else if ( whichNorm == 1 ) norm = resid.norm1(); else if ( whichNorm == 2 ) norm = resid.norm2(); else return 1; return 0; }
/* ----------------------------------------------------------------------------------- * Solve: M x = b * */ int gsl_sparse_matrix_complex_LU_solve(gsl_sparse_matrix_complex *spmat, double *b_real, double *b_imag, double *x_real, double *x_imag) { void *Symbolic, *Numeric; int status; //gsl_sparse_matrix_complex_print_col(spmat); /* --- symbolic factorization --- */ status = umfpack_zl_symbolic(spmat->n, spmat->n, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, &Symbolic, spmat->Control, spmat->Info); if (status < 0) { umfpack_zl_report_info(spmat->Control, spmat->Info); //umfpack_zl_report_status(spmat->Control, status); fprintf(stderr, "%s: umfpack_zl_symbolic failed\n", __PRETTY_FUNCTION__); return -1; } //printf("%s: Symbolic factorization of A: ", __PRETTY_FUNCTION__); //umfpack_zl_report_symbolic(Symbolic, spmat->Control); /* --- numeric factorization --- */ status = umfpack_zl_numeric(spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, Symbolic, &Numeric, spmat->Control, spmat->Info); if (status < 0) { umfpack_zl_report_info(spmat->Control, spmat->Info); umfpack_zl_report_status(spmat->Control, status); fprintf(stderr, "%s: umfpack_zl_numeric failed", __PRETTY_FUNCTION__); return -1; } //printf("%s: Numeric factorization of A: ", __PRETTY_FUNCTION__); //umfpack_zl_report_numeric(Numeric, spmat->Control); /* --- Solve M x = b --- */ status = umfpack_zl_solve(UMFPACK_A, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, x_real, x_imag, b_real, b_imag, Numeric, spmat->Control, spmat->Info); //umfpack_zl_report_info(spmat->Control, spmat->Info); //umfpack_zl_report_status(spmat->Control, status); if (status < 0) { fprintf(stderr, "%s: umfpack_zl_solve failed\n", __PRETTY_FUNCTION__); } //printf("%s: x (solution of Ax=b): ") ; //umfpack_zl_report_vector(spmat->n, x_real, x_imag, spmat->Control); { double rnorm = resid(FALSE, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, x_real, x_imag, b_real, b_imag, spmat->n); printf ("maxnorm of residual: %g\n\n", rnorm) ; } return 0; }
bool IterativeSolvers::pcg(const IRCMatrix &A, Vector &x, const Vector &b, const Preconditioner &M) { /*! Solves Ax=b using the preconditioned conjugate gradient method. */ const idx N = x.getLength(); real resid(100.0); Vector p(N), z(N), q(N); real alpha; real normr(0); real normb = norm(b); real rho(0), rho_1(0), beta(0); Vector r = b - A * x; if (normb == 0.0) normb = 1; resid = norm(r) / normb; if (resid <= IterativeSolvers::toler) { IterativeSolvers::toler = resid; IterativeSolvers::maxIter = 0; return true; } // MAIN LOOP idx i = 1; for (; i <= IterativeSolvers::maxIter; i++) { M.solveMxb(z, r); rho = dot(r, z); if (i == 1) p = z; else { beta = rho / rho_1; aypx(beta, p, z); // p = beta*p + z; } // CALCULATES q = A*p AND dp = dot(q,p) real dp = multiply_dot(A, p, q); alpha = rho / dp; normr = 0; #ifdef USES_OPENMP #pragma omp parallel for reduction(+:normr) #endif for (idx j = 0 ; j < N ; ++j) { x[j] += alpha * p[j]; // x + alpha(0) * p; r[j] -= alpha * q[j]; // r - alpha(0) * q; normr += r[j] * r[j]; } normr = sqrt(normr); resid = normr / normb; if (resid <= IterativeSolvers::toler) { IterativeSolvers::toler = resid; IterativeSolvers::maxIter = i; return true; } rho_1 = rho; } IterativeSolvers::toler = resid; return false; }
void suiolite::grow (size_t ns) { assert (resid () == 0); assert (bytes_read == 0); xfree (buf); len = min<int> (ns, SUIOLITE_MAX_BUFLEN); buf = static_cast<char *> (xmalloc (len)); clear (); }
//--------------------------------------------------------- void EulerShock2D::Report(bool bForce) //--------------------------------------------------------- { CurvedEuler2D::Report(bForce); if (tstep>=1 && tstep <= resid.size()) { // calculate residual // resid(tstep) = sqrt(sum(sum(sum((Q-oldQ).^2)))/(4*K*Np)); // resid(tstep) = sqrt(sum(sum(sum((Q-oldQ).^2)))/(4*K*Np))/dt; DMat Qresid = sqr(Q-oldQ); double d4KNp = double(4*K*Np); resid(tstep) = sqrt(Qresid.sum()/d4KNp); if (eScramInlet == sim_type) { // scale residual resid(tstep) /= dt; } } }
DoubleVector RowDoubleMatrix::conjugateGradient(const DoubleVector &B, double epsilon, unsigned int niter, bool printMessages, unsigned int messageStep) const { DoubleVector X(size_, 0.0); // начальное приближение - вектор нулей DoubleVector resid(size_); // невязка DoubleVector direction; // направление поиска DoubleVector temp(size_); // ременное хранилище для обмена данными double resid_norm; // норма невязки double alpha; double beta; double resid_resid, resid_resid_new; residual(X, B, resid); direction = resid; resid_norm = resid.norm_2(); if (printMessages) std::cout << "Начальная невязка: " << resid_norm << std::endl; if (resid_norm > epsilon) { resid_resid = resid * resid; for (unsigned int i = 0; i < niter; i++) { product(direction, temp); // std::cout << direction.norm_2() << " " << temp.norm_2() << std::endl; alpha = (resid_resid) / (direction * temp); X += alpha * direction; resid -= alpha * temp; resid_resid_new = resid * resid; resid_norm = sqrt(resid_resid_new); if (resid_norm <= epsilon) { if (printMessages) std::cout << "Решение найдено. Итераций: " << i << ", невязка: " << resid_norm << std::endl; break; } if (printMessages && (i % messageStep == 0)) std::cout << i << ", невязка: " << resid_norm << std::endl; beta = (resid_resid_new) / (resid_resid); // d = r + d*beta direction.scale(beta); direction += resid; // resid_resid = resid_resid_new; } } return X; }
void suiolite::rembytes (ssize_t nbytes) { ssize_t rd = resid (); assert (rd >= nbytes); bool docall = full () && nbytes > 0 && scb; int len2 = bep - rp; if (nbytes >= len2) { nbytes -= len2; rp = buf + nbytes; dep[1] = dep[0]; dep[0] = buf; } else { rp += nbytes; } if (docall) (*scb) (); }
int power_method(Epetra_CrsMatrix& A, double &lambda, int niters, double tolerance, bool verbose) { Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); Epetra_Vector resid(A.RowMap()); Epetra_Flops * counter = A.GetFlopCounter(); if (counter!=0) { q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); } // Fill z with random Numbers z.Random(); // variable needed for iteration double normz, residual; int ierr = 1; for (int iter = 0; iter < niters; iter++) { z.Norm2(&normz); // Compute 2-norm of z q.Scale(1.0/normz, z); A.Multiply(false, q, z); // Compute z = A*q q.Dot(z, &lambda); // Approximate maximum eigenvalue if (iter%100==0 || iter+1==niters) { resid.Update(1.0, z, -lambda, q, 0.0); // Compute A*q - lambda*q resid.Norm2(&residual); if (verbose) cout << "Iter = " << iter << " Lambda = " << lambda << " Residual of A*q - lambda*q = " << residual << endl; } if (residual < tolerance) { ierr = 0; break; } } return(ierr); }
int checkResults(Epetra_RowMatrix * A, Epetra_CrsMatrix * transA, Epetra_Vector * xexact, bool verbose) { int n = A->NumGlobalRows(); if (n<100) cout << "A transpose = " << endl << *transA << endl; Epetra_Vector x1(View,A->OperatorDomainMap(), &((*xexact)[0])); Epetra_Vector b1(A->OperatorRangeMap()); A->SetUseTranspose(true); Epetra_Time timer(A->Comm()); double start = timer.ElapsedTime(); A->Apply(x1, b1); if (verbose) cout << "\nTime to compute b1: matvec with original matrix using transpose flag = " << timer.ElapsedTime() - start << endl; if (n<100) cout << "b1 = " << endl << b1 << endl; Epetra_Vector x2(View,transA->OperatorRangeMap(), &((*xexact)[0])); Epetra_Vector b2(transA->OperatorDomainMap()); start = timer.ElapsedTime(); transA->Multiply(false, x2, b2); if (verbose) cout << "\nTime to compute b2: matvec with transpose matrix = " << timer.ElapsedTime() - start << endl; if (n<100) cout << "b1 = " << endl << b1 << endl; double residual; Epetra_Vector resid(A->OperatorDomainMap()); resid.Update(1.0, b1, -1.0, b2, 0.0); resid.Norm2(&residual); if (verbose) cout << "Norm of b1 - b2 = " << residual << endl; int ierr = 0; if (residual > 1.0e-10) ierr++; if (ierr!=0 && verbose) cerr << "Status: Test failed" << endl; else if (verbose) cerr << "Status: Test passed" << endl; return(ierr); }
int NineNodeMixedQuad::addInertiaLoadToUnbalance(const Vector &accel) { static const int numberGauss = 9 ; static const int numberNodes = 9 ; static const int ndf = 2 ; int i; // check to see if have mass int haveRho = 0; for (i = 0; i < numberGauss; i++) { if (materialPointers[i]->getRho() != 0.0) haveRho = 1; } if (haveRho == 0) return 0; // Compute mass matrix int tangFlag = 1 ; formInertiaTerms( tangFlag ) ; // store computed RV fro nodes in resid vector int count = 0; for (i=0; i<numberNodes; i++) { const Vector &Raccel = nodePointers[i]->getRV(accel); for (int j=0; j<ndf; j++) resid(count++) = Raccel(i); } // create the load vector if one does not exist if (load == 0) load = new Vector(numberNodes*ndf); // add -M * RV(accel) to the load vector load->addMatrixVector(1.0, mass, resid, -1.0); return 0; }
void ZeroLengthContact3D::formResidAndTangent( int tang_flag ) { // trial displacement vectors Vector DispTrialS(3); // trial disp for slave node Vector DispTrialM(3); // trial disp for master node // trial frictional force vectors (in local coordinate) Vector t_trial(2); double TtrNorm; // Coulomb friction law surface double Phi; int i, j; //zero stiffness and residual stiff.Zero( ) ; resid.Zero( ) ; // detect contact and set flag ContactFlag = contactDetect(); //opserr<<this->getTag()<< " ZeroLengthContact3D::ContactFlag=" << ContactFlag<<endln; if (ContactFlag == 1) // contacted { // contact presure; pressure = Kn*gap; // Kn : normal penalty DispTrialS=nodePointers[0]->getTrialDisp(); DispTrialM=nodePointers[1]->getTrialDisp(); //nodal displacements double ul[6]; ul[0]=DispTrialS(0); ul[1]=DispTrialS(1); ul[2]=DispTrialS(2); ul[3]=DispTrialM(0); ul[4]=DispTrialM(1); ul[5]=DispTrialM(2); t_trial.Zero(); xi.Zero(); for (i=0; i<6; i++){ xi(0) += T1(i)*ul[i]; xi(1) += T2(i)*ul[i]; } // Compute trial shear force for (i=0; i<2; i++) t_trial(i)=Kt * (xi(i)-stickPt(i)); //Kt: tangential penalty TtrNorm=t_trial.Norm(); // Coulomb friction law, trial state Phi = TtrNorm - (fs * pressure + cohesion); // add cohesion if (Phi <= 0 ) { // stick case //opserr<< "stick ...." << endln; if ( tang_flag == 1 ) { // stiff for (i=0; i<6; i++) { for (j=0; j<6; j++) { stiff(i,j) = Kn*(N(i)*N(j)) + Kt*(T1(i)*T1(j)+T2(i)*T2(j)); } } } //endif tang_flag // force for (i=0; i<6; i++) resid(i)= (-1*pressure)*N(i) + t_trial(0)*T1(i) + t_trial(1)*T2(i) ; // resid(i)= (-1*pressure)*N(i) - t_trial(0)*T1(i) - t_trial(1)*T2(i) ; } // end if stick else { // slide case, non-symmetric stiff ContactFlag=2; // set the contactFlag for sliding // opserr<< "sliding ...." << endln; if ( tang_flag == 1 ) { // stiff double Pt1, Pt2; Pt1=t_trial(0)/TtrNorm; Pt2=t_trial(1)/TtrNorm; double C1=fs*Kn; double C2=Kt*(fs*pressure+cohesion)/TtrNorm; // add cohesion, sept. 7, 2005 for (i=0; i<6; i++) { for (j=0; j<6; j++) { stiff(i,j) = Kn*(N(i)*N(j)) - C1*(Pt1*T1(i)*N(j)+Pt2*T2(i)*N(j)) + C2*( (1-Pt1*Pt1)*T1(i)*T1(j) - Pt1*Pt2 *T1(i)*T2(j) - Pt1*Pt2 *T2(i)*T1(j) + (1-Pt1*Pt2)*T2(i)*T2(j) ); } //endfor i } //endfor j } // endif tang_flag // force double t1, t2; t1 = (fs*pressure+cohesion) * t_trial(0)/TtrNorm; // add cohesion t2 = (fs*pressure+cohesion) * t_trial(1)/TtrNorm; // add cohesion //opserr<<"gap=" << gap <<endln; //opserr<<"pressure= "<<pressure <<endln; for (i=0; i<6; i++) { resid(i) = (-1*pressure)*N(i)+t1*T1(i)+t2*T2(i) ; // resid(i) = (-1*pressure)*N(i)-t1*T1(i)-t2*T2(i) ; } } //endif slide } // endif ContactFlag // for NOT contact, do nothing, stiff and resid are zeroes }
int main(int argc, char *argv[]) { int ierr = 0, i, forierr = 0; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); int rank; // My process ID MPI_Comm_rank(MPI_COMM_WORLD, &rank); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else int rank = 0; Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; int verbose_int = verbose ? 1 : 0; Comm.Broadcast(&verbose_int, 1, 0); verbose = verbose_int==1 ? true : false; // char tmp; // if (rank==0) cout << "Press any key to continue..."<< endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); if(verbose && MyPID==0) cout << Epetra_Version() << endl << endl; if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc << " is alive."<<endl; // Redefine verbose to only print on PE 0 if(verbose && rank!=0) verbose = false; int NumMyEquations = 10000; long long NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3); if(MyPID < 3) NumMyEquations++; // Construct a Map that puts approximately the same Number of equations on each processor Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0LL, Comm); // Get update list and number of local equations from newly created Map vector<long long> MyGlobalElements(Map.NumMyElements()); Map.MyGlobalElements(&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor vector<int> NumNz(NumMyEquations); // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for(i = 0; i < NumMyEquations; i++) if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1)) NumNz[i] = 1; else NumNz[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy, Map, &NumNz[0]); EPETRA_TEST_ERR(A.IndicesAreGlobal(),ierr); EPETRA_TEST_ERR(A.IndicesAreLocal(),ierr); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 vector<double> Values(2); Values[0] = -1.0; Values[1] = -1.0; vector<long long> Indices(2); double two = 2.0; int NumEntries; forierr = 0; for(i = 0; i < NumMyEquations; i++) { if(MyGlobalElements[i] == 0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalEquations-1) { Indices[0] = NumGlobalEquations-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0])==0); forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i])>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); // Finish up A.FillComplete(); A.OptimizeStorage(); Epetra_JadMatrix JadA(A); Epetra_JadMatrix JadA1(A); Epetra_JadMatrix JadA2(A); // Create vectors for Power method Epetra_Vector q(Map); Epetra_Vector z(Map); z.Random(); Epetra_Vector resid(Map); Epetra_Flops flopcounter; A.SetFlopCounter(flopcounter); q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); JadA.SetFlopCounter(A); JadA1.SetFlopCounter(A); JadA2.SetFlopCounter(A); if (verbose) cout << "=======================================" << endl << "Testing Jad using CrsMatrix as input..." << endl << "=======================================" << endl; A.ResetFlops(); powerMethodTests(A, JadA, Map, q, z, resid, verbose); // Increase diagonal dominance if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n" << endl; if (A.MyGlobalRow(0)) { int numvals = A.NumGlobalEntries(0); vector<double> Rowvals(numvals); vector<long long> Rowinds(numvals); A.ExtractGlobalRowCopy(0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A[0,0] for (i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues(0, numvals, &Rowvals[0], &Rowinds[0]); } JadA.UpdateValues(A); A.ResetFlops(); powerMethodTests(A, JadA, Map, q, z, resid, verbose); if (verbose) cout << "================================================================" << endl << "Testing Jad using Jad matrix as input matrix for construction..." << endl << "================================================================" << endl; JadA1.ResetFlops(); powerMethodTests(JadA1, JadA2, Map, q, z, resid, verbose); #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; }
// // Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using blocked right hand sides // and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // Amesos_TestMultiSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { int iam = Comm.MyPID() ; // int hatever; // if ( iam == 0 ) std::cin >> hatever ; Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { NonContiguousMap = true; // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; superluserial.Solve() ; #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist.Solve( true ) ) ; #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist2.Solve( true ) ) ; #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; Amesos_Klu klu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( klu.NumericFactorization( ) ); EPETRA_CHK_ERR( klu.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superluserial.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superluserial.NumericFactorization( ) ); EPETRA_CHK_ERR( superluserial.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARKLETE } else if ( SparseSolver == PARKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Parklete parklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); EPETRA_CHK_ERR( parklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; Amesos_Superludist superludist( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( superlu.Solve( ) ); #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; if ( false && iam == 0 ) { std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) { std::cout << "i=" << i << " error = " << error[i] << " xnorm = " << xnorm[i] << " residual = " << residual[i] << " bnorm = " << bnorm[i] << std::endl ; } std::cout << std::endl << " max_resid = " << max_resid ; std::cout << " max_error = " << max_error << std::endl ; std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ; } } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0 ; }
void mglin(float ***u, int n, int ncycle){ /* Full Multigrid Algorithm for solution of the steady state heat equation with forcing. On input u[1..n][1..n] contains the right-hand side ρ, while on output it returns the solution. The dimension n must be of the form 2j + 1 for some integer j. (j is actually the number of grid levels used in the solution, called ng below.) ncycle is the number of V-cycles to be used at each level. */ unsigned int j,jcycle,jj,jpost,jpre,nf,ng=0,ngrid,nn; /*** setup multigrid jagged arrays ***/ float ***iu[NGMAX+1]; /* stores solution at each grid level */ float ***irhs[NGMAX+1]; /* stores rhs at each grid level */ float ***ires[NGMAX+1]; /* stores residual at each grid level */ float ***irho[NGMAX+1]; /* stores rhs during intial solution of FMG */ /*** use bitshift to find the number of grid levels, stored in ng ***/ nn=n; while (nn >>= 1) ng++; /*** some simple input checks ***/ if (n != 1+(1L << ng)) nrerror("n-1 must be a power of 2 in mglin."); if (ng > NGMAX) nrerror("increase NGMAX in mglin."); /***restrict solution to next coarsest grid (irho[ng-1])***/ nn=n/2+1; ngrid=ng-1; irho[ngrid]=f3tensor(1,nn,1,nn,1,nn); rstrct(irho[ngrid],u,nn);/* coarsens rhs (u at this point) to irho on mesh size nn */ /***continue setting up coarser grids down to coarsest level***/ while (nn > 3) { nn=nn/2+1; irho[--ngrid]=f3tensor(1,nn,1,nn,1,nn); rstrct(irho[ngrid],irho[ngrid+1],nn); } /***now setup and solve coarsest level iu[1],irhs[1] ***/ nn=3; iu[1]=f3tensor(1,nn,1,nn,1,nn); irhs[1]=f3tensor(1,nn,1,nn,1,nn); slvsml(iu[1],irho[1]); /* solve the small system directly */ free_f3tensor(irho[1],1,nn,1,nn,1,nn); ngrid=ng; /* reset ngrid to original size */ for (j=2;j<=ngrid;j++) { /* loop over coarse to fine, starting at level 2 */ printf("at grid level %d\n",j); nn=2*nn-1; iu[j]=f3tensor(1,nn,1,nn,1,nn); /* setup grids for lhs,rhs, and residual */ irhs[j]=f3tensor(1,nn,1,nn,1,nn); ires[j]=f3tensor(1,nn,1,nn,1,nn); interp(iu[j],iu[j-1],nn); /* irho contains rhs except on fine grid where it is in u */ copy(irhs[j],(j != ngrid ? irho[j] : u),nn); /* v-cycle at current grid level */ for (jcycle=1;jcycle<=ncycle;jcycle++) { /* nf is # points on finest grid for current v-sweep */ nf=nn; for (jj=j;jj>=2;jj--) { for (jpre=1;jpre<=NPRE;jpre++) /* NPRE g-s sweeps on the finest (relatively) scale */ relax(iu[jj],iu[jj-1],irhs[jj],nf); //need iu[jj-1] for jacobi resid(ires[jj],iu[jj],irhs[jj],nf); /* compute res on finest scale, store in ires */ nf=nf/2+1; /* next coarsest scale */ rstrct(irhs[jj-1],ires[jj],nf); /* restrict residuals as rhs of next coarsest scale */ fill0(iu[jj-1],nf); /* set the initial solution guess to zero */ } slvsml(iu[1],irhs[1]); /* solve the small problem exactly */ nf=3; /* fine scale now n=3 */ for (jj=2;jj<=j;jj++) { /* work way back up to current finest grid */ nf=2*nf-1; /* next finest scale */ addint(iu[jj],iu[jj-1],ires[jj],nf); /* inter error and add to previous solution guess */ for (jpost=1;jpost<=NPOST;jpost++) /* do NPOST g-s sweeps */ relax(iu[jj],iu[jj-1],irhs[jj],nf); } } } copy(u,iu[ngrid],n); /* copy solution into input array (implicitly returned) */ /*** clean up memory ***/ for (nn=n,j=ng;j>=2;j--,nn=nn/2+1) { free_f3tensor(ires[j],1,nn,1,nn,1,nn); free_f3tensor(irhs[j],1,nn,1,nn,1,nn); free_f3tensor(iu[j],1,nn,1,nn,1,nn); if (j != ng) free_f3tensor(irho[j],1,nn,1,nn,1,nn); } free_f3tensor(irhs[1],1,3,1,3,1,3); free_f3tensor(iu[1],1,3,1,3,1,3); }
//get residual with inertia terms const XC::Vector &XC::Twenty_Node_Brick::getResistingForceIncInertia(void) const { static XC::Vector res(60); // printf("getResistingForceIncInertia()\n"); int i, j; static double a[60]; for(i=0; i<nenu; i++) { const XC::Vector &accel = theNodes[i]->getTrialAccel(); if( 3 != accel.Size() ) { std::cerr << "XC::Twenty_Node_Brick::getResistingForceIncInertia matrix and vector sizes are incompatable\n"; exit(-1); } a[i*3] = accel(0); a[i*3+1] = accel(1); a[i*3+2] = accel(2); } // Compute the current resisting force this->getResistingForce(); // std::cerr<<"K "<<resid<<std::endl; // Compute the mass matrix this->getMass(); for(i = 0; i < 60; i++) { for(j = 0; j < 60; j++){ resid(i) += mass(i,j)*a[j]; } } // printf("\n"); //std::cerr<<"K+M "<<P<<std::endl; for(i=0; i<nenu; i++) { const XC::Vector &vel = theNodes[i]->getTrialVel(); if( 3!= vel.Size() ) { std::cerr << "XC::Twenty_Node_Brick::getResistingForceIncInertia matrix and vector sizes are incompatable\n"; exit(-1); } a[i*3] = vel(0); a[i*3+1] = vel(1); a[i*3+2] = vel(2); } this->getDamp(); for(i = 0; i < 60; i++) { for(j = 0; j < 60; j++) { resid(i) += damp(i,j)*a[j]; } } // std::cerr<<"Pd"<<Pd<<std::endl; res = resid; // std::cerr<<"res "<<res<<std::endl; // exit(-1); if(isDead()) res*=dead_srf; return res; }
//get residual const XC::Vector &XC::Twenty_Node_Brick::getResistingForce(void) const { int i, j, k, k1; double xsj; static XC::Matrix B(6, 3); double volume = 0.; // printf("calling getResistingForce()\n"); resid.Zero(); //compute basis vectors and local nodal coordinates computeBasis( ) ; //gauss loop to compute and save shape functions for( i = 0; i < nintu; i++ ) { // compute Jacobian and global shape functions Jacobian3d(i, xsj, 0); //volume element to also be saved dvolu[i] = wu[i] * xsj ; volume += dvolu[i]; } // end for i //printf("volume = %f\n", volume); // Loop over the integration points for(i = 0; i < nintu; i++) { // Get material stress response const XC::Vector &sigma = physicalProperties[i]->getStress(); // Perform numerical integration on internal force //P = P + (B^ sigma) * intWt(i)*intWt(j) * detJ; //P.addMatrixTransposeVector(1.0, B, sigma, intWt(i)*intWt(j)*detJ); for(j = 0; j < nenu; j++) { B(0,0) = shgu[0][j][i]; B(0,1) = 0.; B(0,2) = 0.; B(1,0) = 0.; B(1,1) = shgu[1][j][i]; B(1,2) = 0.; B(2,0) = 0.; B(2,1) = 0.; B(2,2) = shgu[2][j][i]; B(3,0) = shgu[1][j][i]; B(3,1) = shgu[0][j][i]; B(3,2) = 0.; B(4,0) = 0.; B(4,1) = shgu[2][j][i]; B(4,2) = shgu[1][j][i]; B(5,0) = shgu[2][j][i]; B(5,1) = 0.; B(5,2) = shgu[0][j][i]; for(k = 0; k < 3; k++) { for(k1 = 0; k1 < 6; k1++) resid(j*3+k) += dvolu[i]*(B(k1,k)*sigma(k1)); } // Subtract equiv. body forces from the nodes //P = P - (N^ b) * intWt(i)*intWt(j) * detJ; //P.addMatrixTransposeVector(1.0, N, b, -intWt(i)*intWt(j)*detJ); double r = mixtureRho(i); resid(j*3) -= dvolu[i]*(shgu[3][j][i]*r*bf[0]); resid(j*3+1) -= dvolu[i]*(shgu[3][j][i]*r*bf[1]); resid(j*3+2) -= dvolu[i]*(shgu[3][j][i]*r*bf[2]); } } // Subtract other external nodal loads ... P_res = P_int - P_ext // std::cerr<<"resid before:"<<resid<<std::endl; if(!load.isEmpty()) resid-= load; // std::cerr<<"resid "<<resid<<std::endl; if(isDead()) resid*=dead_srf; return resid ; }
void ZeroLengthInterface2D::formLocalResidAndTangent( int tang_flag , int slave, int master1, int master2, int stage) { // trial frictional force vectors (in local coordinate) double t_trial; double TtrNorm; // Coulomb friction law surface double Phi; int i, j; // set the first value to zero pressure(slave) = 0; t_trial=0; // int IsContact; // detect contact and set flag ContactFlag = contactDetect(slave,master1,master2, stage); if (ContactFlag == 1) // contacted { // create a vector for converting local matrix to global GlobalResidAndTangentOrder(slave, master1, master2); // contact presure; pressure(slave) = Kn * normal_gap(slave); // pressure is positive if in contact double ng = normal_gap(slave); t_trial = Kt * (shear_gap(slave) - stored_shear_gap(slave)); // trial shear force // Coulomb friction law, trial state //TtrNorm=t_trial.Norm(); TtrNorm = sqrt(t_trial * t_trial); Phi = TtrNorm - fc * pressure(slave); if (Phi <= 0 ) { // stick case if ( tang_flag == 1 ) { // stiff for (i = 0; i < 6; i++) { for (j = 0; j < 6; j++) { stiff(loctoglob[i],loctoglob[j]) += Kn * (N(i) * N(j)) + Kt * (T(i) * T(j)); //2D } } } //endif tang_flag // force for (i = 0; i < 6; i++) resid(loctoglob[i]) += pressure(slave) * N(i) + t_trial * T(i); //2D } // end if stick else { // slide case, non-symmetric stiff ContactFlag=2; // set the contactFlag for sliding if ( tang_flag == 1 ) { // stiff for (i = 0; i < 6; i++) { for (j = 0; j < 6; j++) { stiff(loctoglob[i],loctoglob[j]) += Kn * (N(i) * N(j)) - fc * Kn * (t_trial / TtrNorm) * T(i) * N(j); //2D } //endfor i } //endfor j // force } // endif tang_flag double shear = fc * pressure(slave) * (t_trial/TtrNorm); for (i = 0; i < 6; i++) resid(loctoglob[i]) += (pressure(slave) * N(i)) + (shear * T(i)) ; //2D } //endif slide } // endif ContactFlag==1 }
int invIteration(Epetra_CrsMatrix& A, double &lambda, bool verbose) { Ifpack_CrsRiluk * M; applyInverseSetup(A, M); Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); Epetra_Vector resid(A.RowMap()); Epetra_Flops * counter = A.GetFlopCounter(); if (counter!=0) { q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); } // Fill z with random Numbers z.Random(); // variable needed for iteration double normz, residual; int niters = 100; double tolerance = 1.0E-6; int ierr = 1; for (int iter = 0; iter < niters; iter++) { if (verbose) cout << endl << " ***** Performing step " << iter << " of inverse iteration ***** " << endl; z.Norm2(&normz); // Compute 2-norm of z q.Scale(1.0/normz, z); applyInverse(A, z, q, M, verbose); // Compute z such that Az = q q.Dot(z, &lambda); // Approximate maximum eigenvalue if (iter%10==0 || iter+1==niters) { resid.Update(1.0, z, -lambda, q, 0.0); // Compute A(inv)*q - lambda*q resid.Norm2(&residual); cout << endl << "***** Inverse Iteration Step " << iter+1 << endl << " Lambda = " << 1.0/lambda << endl << " Residual of A(inv)*q - lambda*q = " << residual << endl; } if (residual < tolerance) { ierr = 0; break; } } // lambda is the largest eigenvalue of A(inv). 1/lambda is smallest eigenvalue of A. lambda = 1.0/lambda; // Compute A*q - lambda*q explicitly A.Multiply(false, q, z); resid.Update(1.0, z, -lambda, q, 0.0); // Compute A*q - lambda*q resid.Norm2(&residual); cout << " Explicitly computed residual of A*q - lambda*q = " << residual << endl; applyInverseDestroy(M); return(ierr); }
//----------------------------------------------------------------------------- ResourcePackage* Device::create_resource_package(const char* name) { ResourceId resid("package", name); return create_resource_package((StringId64) resid.name); }
int main(int argc, char *argv[]) { /*------------------------------------------------------------------------- c k is the current level. It is passed down through subroutine args c and is NOT global. it is the current iteration c------------------------------------------------------------------------*/ int k, it; double t, tinit, mflops; int nthreads = 1; /*------------------------------------------------------------------------- c These arrays are in common because they are quite large c and probably shouldn''t be allocated on the stack. They c are always passed as subroutine args. c------------------------------------------------------------------------*/ double **u, *v, **r; double a[4], c[4]; double rnm2, rnmu; double epsilon = 1.0e-8; int n1, n2, n3, nit; double verify_value; boolean verified; int i, j, l; FILE *fp; timer_clear(T_BENCH); timer_clear(T_INIT); timer_start(T_INIT); /*---------------------------------------------------------------------- c Read in and broadcast input data c---------------------------------------------------------------------*/ printf("\n\n NAS Parallel Benchmarks 2.3 OpenMP C version" " - MG Benchmark\n\n"); fp = fopen("mg.input", "r"); if (fp != NULL) { printf(" Reading from input file mg.input\n"); fscanf(fp, "%d", <); while(fgetc(fp) != '\n'); fscanf(fp, "%d%d%d", &nx[lt], &ny[lt], &nz[lt]); while(fgetc(fp) != '\n'); fscanf(fp, "%d", &nit); while(fgetc(fp) != '\n'); for (i = 0; i <= 7; i++) { fscanf(fp, "%d", &debug_vec[i]); } fclose(fp); } else { printf(" No input file. Using compiled defaults\n"); lt = LT_DEFAULT; nit = NIT_DEFAULT; nx[lt] = NX_DEFAULT; ny[lt] = NY_DEFAULT; nz[lt] = NZ_DEFAULT; for (i = 0; i <= 7; i++) { debug_vec[i] = DEBUG_DEFAULT; } } if ( (nx[lt] != ny[lt]) || (nx[lt] != nz[lt]) ) { Class = 'U'; } else if( nx[lt] == 32 && nit == 4 ) { Class = 'S'; } else if( nx[lt] == 64 && nit == 40 ) { Class = 'W'; } else if( nx[lt] == 256 && nit == 20 ) { Class = 'B'; } else if( nx[lt] == 512 && nit == 20 ) { Class = 'C'; } else if( nx[lt] == 256 && nit == 4 ) { Class = 'A'; } else { Class = 'U'; } /*-------------------------------------------------------------------- c Use these for debug info: c--------------------------------------------------------------------- c debug_vec(0) = 1 !=> report all norms c debug_vec(1) = 1 !=> some setup information c debug_vec(1) = 2 !=> more setup information c debug_vec(2) = k => at level k or below, show result of resid c debug_vec(3) = k => at level k or below, show result of psinv c debug_vec(4) = k => at level k or below, show result of rprj c debug_vec(5) = k => at level k or below, show result of interp c debug_vec(6) = 1 => (unused) c debug_vec(7) = 1 => (unused) c-------------------------------------------------------------------*/ a[0] = -8.0/3.0; a[1] = 0.0; a[2] = 1.0/6.0; a[3] = 1.0/12.0; if (Class == 'A' || Class == 'S' || Class =='W') { /*-------------------------------------------------------------------- c Coefficients for the S(a) smoother c-------------------------------------------------------------------*/ c[0] = -3.0/8.0; c[1] = 1.0/32.0; c[2] = -1.0/64.0; c[3] = 0.0; } else { /*-------------------------------------------------------------------- c Coefficients for the S(b) smoother c-------------------------------------------------------------------*/ c[0] = -3.0/17.0; c[1] = 1.0/33.0; c[2] = -1.0/61.0; c[3] = 0.0; } lb = 1; setup(&n1,&n2,&n3,lt); /* Allocate the data arrays * 3d arrays are flattened and allocated as a contiguous block * 4d arrays are allocated as separate 3d blocks */ u = (double **)malloc((lt+1)*sizeof(double *)); for (l=lt; l >=1; l--) u[l] = (double *)malloc(m3[l]*m2[l]*m1[l]*sizeof(double)); v = (double *)malloc(m3[lt]*m2[lt]*m1[lt]*sizeof(double)); r = (double **)malloc((lt+1)*sizeof(double *)); for (l=lt; l >=1; l--) r[l] = (double *)malloc(m3[l]*m2[l]*m1[l]*sizeof(double)); // Array v can be treated using a standard OpenACC data region #pragma acc data create(v[0:m3[lt]*m2[lt]*m1[lt]]) copyin(a[0:4],c[0:4]) { #ifdef _OPENACC //**************************************************************** /* Now manually deep-create arrays u,r on the GPU using the Cray extended * runtime API, instead of using a data region */ double **acc_u = (double **)cray_acc_create(u,(lt+1)*sizeof(double *)); for (l=lt; l >=1; l--) { double *acc_ul = (double *)cray_acc_create(u[l],m3[l]*m2[l]*m1[l]*sizeof(double)); SET_ACC_PTR(acc_u[l], acc_ul); } double **acc_r = (double **)cray_acc_create(r,(lt+1)*sizeof(double *)); for (l=lt; l >=1; l--) { double *acc_rl = (double *)cray_acc_create(r[l],m3[l]*m2[l]*m1[l]*sizeof(double)); SET_ACC_PTR(acc_r[l], acc_rl); } //**************************************************************** #endif /* _OPENACC */ #pragma omp parallel { zero3(u[lt],n1,n2,n3); } zran3(v,n1,n2,n3,nx[lt],ny[lt],lt); #pragma omp parallel { norm2u3(v,n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]); #pragma omp single { /* printf("\n norms of random v are\n"); printf(" %4d%19.12e%19.12e\n", 0, rnm2, rnmu); printf(" about to evaluate resid, k= %d\n", lt);*/ printf(" Size: %3dx%3dx%3d (class %1c)\n", nx[lt], ny[lt], nz[lt], Class); printf(" Iterations: %3d\n", nit); } resid(u[lt],v,r[lt],n1,n2,n3,a,lt); norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]); /*c--------------------------------------------------------------------- c One iteration for startup c---------------------------------------------------------------------*/ mg3P(u,v,r,a,c,n1,n2,n3,lt); resid(u[lt],v,r[lt],n1,n2,n3,a,lt); #pragma omp single setup(&n1,&n2,&n3,lt); zero3(u[lt],n1,n2,n3); } /* pragma omp parallel */ zran3(v,n1,n2,n3,nx[lt],ny[lt],lt); timer_stop(T_INIT); timer_start(T_BENCH); #pragma omp parallel firstprivate(nit) private(it) { resid(u[lt],v,r[lt],n1,n2,n3,a,lt); norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]); for ( it = 1; it <= nit; it++) { mg3P(u,v,r,a,c,n1,n2,n3,lt); resid(u[lt],v,r[lt],n1,n2,n3,a,lt); } norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]); #if defined(_OPENMP) #pragma omp master nthreads = omp_get_num_threads(); #endif } /* pragma omp parallel */ timer_stop(T_BENCH); t = timer_read(T_BENCH); tinit = timer_read(T_INIT); verified = FALSE; verify_value = 0.0; printf(" Initialization time: %15.3f seconds\n", tinit); printf(" Benchmark completed\n"); if (Class != 'U') { if (Class == 'S') { verify_value = 0.530770700573e-04; } else if (Class == 'W') { verify_value = 0.250391406439e-17; /* 40 iterations*/ /* 0.183103168997d-044 iterations*/ } else if (Class == 'A') { verify_value = 0.2433365309e-5; } else if (Class == 'B') { verify_value = 0.180056440132e-5; } else if (Class == 'C') { verify_value = 0.570674826298e-06; } if ( fabs( rnm2 - verify_value ) <= epsilon ) { verified = TRUE; printf(" VERIFICATION SUCCESSFUL\n"); printf(" L2 Norm is %20.12e\n", rnm2); printf(" Error is %20.12e\n", rnm2 - verify_value); } else { verified = FALSE; printf(" VERIFICATION FAILED\n"); printf(" L2 Norm is %20.12e\n", rnm2); printf(" The correct L2 Norm is %20.12e\n", verify_value); } } else { verified = FALSE; printf(" Problem size unknown\n"); printf(" NO VERIFICATION PERFORMED\n"); } if ( t != 0.0 ) { int nn = nx[lt]*ny[lt]*nz[lt]; mflops = 58.*nit*nn*1.0e-6 / t; } else { mflops = 0.0; } c_print_results("MG", Class, nx[lt], ny[lt], nz[lt], nit, nthreads, t, mflops, " floating point", verified, NPBVERSION, COMPILETIME, CS1, CS2, CS3, CS4, CS5, CS6, CS7); // I should probably deep-free the manually deep-created accelerator data here } //acc end data }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; if (MyPID==0) verbose = true; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl << "where:" << endl << "HB_filename - filename and path of a Harwell-Boeing data set" << endl << "level_fill - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl << "level_overlap - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl << "To specify a non-default value for one of these parameters, you must specify all" << endl << " preceding values but not any subsequent parameters. Example:" << endl << "ifpackHbSerialMsr.exe mymatrix.hb 1 - loads mymatrix.hb, uses level fill of one, all other values are defaults" << endl << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); // Construct ILU preconditioner double elapsed_time, total_flops, MFLOPs; Epetra_Time timer(Comm); int LevelFill = 0; if (argc > 2) LevelFill = atoi(argv[2]); if (verbose) cout << "Using Level Fill = " << LevelFill << endl; int Overlap = 0; if (argc > 3) Overlap = atoi(argv[3]); if (verbose) cout << "Using Level Overlap = " << Overlap << endl; double Athresh = 0.0; if (argc > 4) Athresh = atof(argv[4]); if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl; double Rthresh = 1.0; if (argc > 5) Rthresh = atof(argv[5]); if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl; Ifpack_IlukGraph * IlukGraph = 0; Ifpack_CrsRiluk * ILUK = 0; if (LevelFill>-1) { elapsed_time = timer.ElapsedTime(); IlukGraph = new Ifpack_IlukGraph(A.Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl; Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); ILUK = new Ifpack_CrsRiluk(*IlukGraph); ILUK->SetFlopCounter(fact_counter); ILUK->SetAbsoluteThreshold(Athresh); ILUK->SetRelativeThreshold(Rthresh); //assert(ILUK->InitValues()==0); int initerr = ILUK->InitValues(A); if (initerr!=0) cout << Comm << "InitValues error = " << initerr; assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; //cout << *ILUK << endl; } double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; int Maxiter = 500; double Tolerance = 1.0E-14; Epetra_Vector xcomp(map); Epetra_Vector resid(map); Epetra_Flops counter; A.SetFlopCounter(counter); xcomp.SetFlopCounter(A); b.SetFlopCounter(A); resid.SetFlopCounter(A); ILUK->SetFlopCounter(A); elapsed_time = timer.ElapsedTime(); BiCGSTAB(A, xcomp, b, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; resid.Update(1.0, xcomp, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); if (verbose) cout << "Norm of the difference between exact and computed solutions = " << residual << endl; if (ILUK!=0) delete ILUK; if (IlukGraph!=0) delete IlukGraph; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }
void Ma57Solver::solve( OoqpVector& rhs_in ) { SimpleVector x_ma27(n), rhs_sav(n); x_ma27.copyFrom(rhs_in); rhs_sav.copyFrom(rhs_in); // int job = 0; // Solve using A // int one = 1; // SimpleVectorHandle work( new SimpleVector(n) ); // SimpleVector & rhs = dynamic_cast<SimpleVector &>(rhs_in); // double * drhs = rhs.elements(); // double * dwork = work->elements(); // int * iwork = new int[n]; // rusage before; // getrusage( RUSAGE_SELF, &before ); // FNAME(ma57cd)( &job, &n, // fact, &lfact, ifact, &lifact, // &one, drhs, &n, // dwork, &n, iwork, // icntl, info ); // rusage after; // getrusage( RUSAGE_SELF, &after ); // cout << "Solution with the factored matrix took " // << (double) (after.ru_utime.tv_sec - before.ru_utime.tv_sec) // + (after.ru_utime.tv_usec - before.ru_utime.tv_usec) / 1000000.0 // << " seconds.\n"; // delete [] iwork; int job = 0; if( freshFactor ) { icntl[8] = 1; // No iterative refinement } else { icntl[8] = 10; // Iterative refinement } // MIKE: are these structure ever released?? SimpleVectorHandle x( new SimpleVector(n) ); SimpleVectorHandle resid( new SimpleVector(n) ); SimpleVectorHandle work( new SimpleVector(5 * n) ); SimpleVector & rhs = dynamic_cast<SimpleVector &>(rhs_in); double * drhs = rhs.elements(); double * dx = x->elements(); double * dresid = resid->elements(); double * dwork = work->elements(); int * iwork = new_iworkn(n); /*static int s = 0; int mype; MPI_Comm_rank(MPI_COMM_WORLD,&mype); if (mype == 0 && s==105) { printf("RHS IN\n\n"); for (int i = 0; i < n; i++) { printf("%d: %.10E\n", i, rhs[i]); } }*/ #ifdef HAVE_GETRUSAGE rusage before; if( gOoqpPrintLevel >= 100 ) { getrusage( RUSAGE_SELF, &before ); } #endif int done = 0; int refactorizations = 0; int dontRefactor = (kThresholdPivoting > kThresholdPivotingMax); while( !done && refactorizations < 10 ) { icntl[9]=1;//condition number FNAME(ma57dd)( &job, &n, &nnz, M, irowM, jcolM, fact, &lfact, ifact, &lifact, drhs, dx, dresid, dwork, iwork, icntl, cntl, info, rinfo ); if( resid->infnorm() < kPrecision*( 1 + rhs.infnorm() ) ) { // resids are fine, use them done = 1; cout << "Ma57: relative norm of residuals for linear system: " << resid->infnorm()/rhs.infnorm() << endl; cout << "Ma57: condition number: " << rinfo[10] << " " << rinfo[11] << " " << rinfo[12] << endl; } else { // resids aren't good enough. if( freshFactor ) { // We weren't doing iterative refinement, // let's do so job = 2; icntl[8] = 10; // Mark this factorization as stale freshFactor = 0; // And grow more pessimistic about the next factorization if( kThresholdPivoting >= kThresholdPivotingMax ) { // We have already refactored as with a high a pivtol as we // are willing to use dontRefactor = 1; } else { // refactor with a higher Threshold Pivoting parameter kThresholdPivoting *= kThresholdPivotingFactor; if( kThresholdPivoting > kThresholdPivotingMax ) kThresholdPivoting = kThresholdPivotingMax; this->setThresholdPivoting(); cout << "Setting ThresholdPivoting parameter to " << kThresholdPivoting << " for future factorizations" << endl; } } else if ( dontRefactor ) { // We might have tried a refactor, but the pivtol is above our // limit. done = 1; } else { // Otherwise, we have already tried iterative refinement, and // have already increased the ThresholdPivoting parameter cout << "Refactoring with Threshold Pivoting parameter" << kThresholdPivoting << endl; this->matrixChanged(); refactorizations++; // be optimistic about the next factorization job = 0; icntl[8] = 1; } // end else we hava already tried iterative refinement } // end else resids aren't good enough } // end while not done //! SimpleVector& x_ma57 = *x; ma27->matrixChanged(); ma27->solve(x_ma27); for(int i=0; i<x_ma57.length(); i++) if(fabs((x_ma57[i]-x_ma27[i])/(1+x_ma27[i])) >1) { printf("[%6d] ma57:%22.14e ma27:%22.14e \n", i, x_ma57[i], x_ma27[i]); } SimpleVector resid_ma57(n); resid_ma57.copyFrom(rhs_sav); sgm_->mult(1.0, resid_ma57.elements(), 1, -1.0, x_ma57.elements(), 1); cout << "MA57 resid.nrm=" << resid_ma57.infnorm() << " rhs.nrm=" << rhs_sav.infnorm() << " n=" << n << endl; SimpleVector resid_ma27(n); resid_ma27.copyFrom(rhs_sav); sgm_->mult(-1.0, resid_ma27.elements(), 1, 1.0, x_ma27.elements(), 1); cout << "MA27 resid.nrm=" << resid_ma27.infnorm() << endl; rhs.copyFrom( *x ); //rhs.copyFrom(x_ma27); //delete [] iwork; //it is cached now }
int main (int argc, char **argv) { double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL], *Ax, *Cx, *Lx, *Ux, *W, t [2], *Dx, rnorm, *Rb, *y, *Rs ; double *Az, *Lz, *Uz, *Dz, *Cz, *Rbz, *yz ; int *Ap, *Ai, *Cp, *Ci, row, col, p, lnz, unz, nr, nc, *Lp, *Li, *Ui, *Up, *P, *Q, *Lj, i, j, k, anz, nfr, nchains, *Qinit, fnpiv, lnz1, unz1, nz1, status, *Front_npivcol, *Front_parent, *Chain_start, *Wi, *Pinit, n1, *Chain_maxrows, *Chain_maxcols, *Front_1strow, *Front_leftmostdesc, nzud, do_recip ; void *Symbolic, *Numeric ; /* ---------------------------------------------------------------------- */ /* initializations */ /* ---------------------------------------------------------------------- */ umfpack_tic (t) ; printf ("\nUMFPACK V%d.%d (%s) demo: _zi_ version\n", UMFPACK_MAIN_VERSION, UMFPACK_SUB_VERSION, UMFPACK_DATE) ; /* get the default control parameters */ umfpack_zi_defaults (Control) ; /* change the default print level for this demo */ /* (otherwise, nothing will print) */ Control [UMFPACK_PRL] = 6 ; /* print the license agreement */ umfpack_zi_report_status (Control, UMFPACK_OK) ; Control [UMFPACK_PRL] = 5 ; /* print the control parameters */ umfpack_zi_report_control (Control) ; /* ---------------------------------------------------------------------- */ /* print A and b, and convert A to column-form */ /* ---------------------------------------------------------------------- */ /* print the right-hand-side */ printf ("\nb: ") ; (void) umfpack_zi_report_vector (n, b, bz, Control) ; /* print the triplet form of the matrix */ printf ("\nA: ") ; (void) umfpack_zi_report_triplet (n, n, nz, Arow, Acol, Aval, Avalz, Control) ; /* convert to column form */ nz1 = MAX (nz,1) ; /* ensure arrays are not of size zero. */ Ap = (int *) malloc ((n+1) * sizeof (int)) ; Ai = (int *) malloc (nz1 * sizeof (int)) ; Ax = (double *) malloc (nz1 * sizeof (double)) ; Az = (double *) malloc (nz1 * sizeof (double)) ; if (!Ap || !Ai || !Ax || !Az) { error ("out of memory") ; } status = umfpack_zi_triplet_to_col (n, n, nz, Arow, Acol, Aval, Avalz, Ap, Ai, Ax, Az, (int *) NULL) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_triplet_to_col failed") ; } /* print the column-form of A */ printf ("\nA: ") ; (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ; /* ---------------------------------------------------------------------- */ /* symbolic factorization */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_symbolic (n, n, Ap, Ai, Ax, Az, &Symbolic, Control, Info) ; if (status < 0) { umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_symbolic failed") ; } /* print the symbolic factorization */ printf ("\nSymbolic factorization of A: ") ; (void) umfpack_zi_report_symbolic (Symbolic, Control) ; /* ---------------------------------------------------------------------- */ /* numeric factorization */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric, Control, Info) ; if (status < 0) { umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_numeric failed") ; } /* print the numeric factorization */ printf ("\nNumeric factorization of A: ") ; (void) umfpack_zi_report_numeric (Numeric, Control) ; /* ---------------------------------------------------------------------- */ /* solve Ax=b */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz, Numeric, Control, Info) ; umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; if (status < 0) { error ("umfpack_zi_solve failed") ; } printf ("\nx (solution of Ax=b): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* compute the determinant */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_get_determinant (x, xz, r, Numeric, Info) ; umfpack_zi_report_status (Control, status) ; if (status < 0) { error ("umfpack_zi_get_determinant failed") ; } printf ("determinant: (%g", x [0]) ; printf ("+ (%g)i", xz [0]) ; /* complex */ printf (") * 10^(%g)\n", r [0]) ; /* ---------------------------------------------------------------------- */ /* solve Ax=b, broken down into steps */ /* ---------------------------------------------------------------------- */ /* Rb = R*b */ Rb = (double *) malloc (n * sizeof (double)) ; Rbz = (double *) malloc (n * sizeof (double)) ; y = (double *) malloc (n * sizeof (double)) ; yz = (double *) malloc (n * sizeof (double)) ; if (!Rb || !y) error ("out of memory") ; if (!Rbz || !yz) error ("out of memory") ; status = umfpack_zi_scale (Rb, Rbz, b, bz, Numeric) ; if (status < 0) error ("umfpack_zi_scale failed") ; /* solve Ly = P*(Rb) */ status = umfpack_zi_solve (UMFPACK_Pt_L, Ap, Ai, Ax, Az, y, yz, Rb, Rbz, Numeric, Control, Info) ; if (status < 0) error ("umfpack_zi_solve failed") ; /* solve UQ'x=y */ status = umfpack_zi_solve (UMFPACK_U_Qt, Ap, Ai, Ax, Az, x, xz, y, yz, Numeric, Control, Info) ; if (status < 0) error ("umfpack_zi_solve failed") ; printf ("\nx (solution of Ax=b, solve is split into 3 steps): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; free (Rb) ; free (Rbz) ; free (y) ; free (yz) ; /* ---------------------------------------------------------------------- */ /* solve A'x=b */ /* ---------------------------------------------------------------------- */ /* note that this is the complex conjugate transpose, A' */ status = umfpack_zi_solve (UMFPACK_At, Ap, Ai, Ax, Az, x, xz, b, bz, Numeric, Control, Info) ; umfpack_zi_report_info (Control, Info) ; if (status < 0) { error ("umfpack_zi_solve failed") ; } printf ("\nx (solution of A'x=b): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (TRUE, Ap, Ai, Ax, Az) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* modify one numerical value in the column-form of A */ /* ---------------------------------------------------------------------- */ /* change A (1,4), look for row index 1 in column 4. */ row = 1 ; col = 4 ; for (p = Ap [col] ; p < Ap [col+1] ; p++) { if (row == Ai [p]) { printf ("\nchanging A (%d,%d) to zero\n", row, col) ; Ax [p] = 0.0 ; Az [p] = 0.0 ; break ; } } printf ("\nmodified A: ") ; (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ; /* ---------------------------------------------------------------------- */ /* redo the numeric factorization */ /* ---------------------------------------------------------------------- */ /* The pattern (Ap and Ai) hasn't changed, so the symbolic factorization */ /* doesn't have to be redone, no matter how much we change Ax. */ /* We don't need the Numeric object any more, so free it. */ umfpack_zi_free_numeric (&Numeric) ; /* Note that a memory leak would have occurred if the old Numeric */ /* had not been free'd with umfpack_zi_free_numeric above. */ status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric, Control, Info) ; if (status < 0) { umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_numeric failed") ; } printf ("\nNumeric factorization of modified A: ") ; (void) umfpack_zi_report_numeric (Numeric, Control) ; /* ---------------------------------------------------------------------- */ /* solve Ax=b, with the modified A */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz, Numeric, Control, Info) ; umfpack_zi_report_info (Control, Info) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_solve failed") ; } printf ("\nx (with modified A): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* modify all of the numerical values of A, but not the pattern */ /* ---------------------------------------------------------------------- */ for (col = 0 ; col < n ; col++) { for (p = Ap [col] ; p < Ap [col+1] ; p++) { row = Ai [p] ; printf ("changing ") ; /* complex: */ printf ("real part of ") ; printf ("A (%d,%d) from %g", row, col, Ax [p]) ; Ax [p] = Ax [p] + col*10 - row ; printf (" to %g\n", Ax [p]) ; } } printf ("\ncompletely modified A (same pattern): ") ; (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ; /* ---------------------------------------------------------------------- */ /* save the Symbolic object to file, free it, and load it back in */ /* ---------------------------------------------------------------------- */ /* use the default filename, "symbolic.umf" */ printf ("\nSaving symbolic object:\n") ; status = umfpack_zi_save_symbolic (Symbolic, (char *) NULL) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_save_symbolic failed") ; } printf ("\nFreeing symbolic object:\n") ; umfpack_zi_free_symbolic (&Symbolic) ; printf ("\nLoading symbolic object:\n") ; status = umfpack_zi_load_symbolic (&Symbolic, (char *) NULL) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_load_symbolic failed") ; } printf ("\nDone loading symbolic object\n") ; /* ---------------------------------------------------------------------- */ /* redo the numeric factorization */ /* ---------------------------------------------------------------------- */ umfpack_zi_free_numeric (&Numeric) ; status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric, Control, Info) ; if (status < 0) { umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_numeric failed") ; } printf ("\nNumeric factorization of completely modified A: ") ; (void) umfpack_zi_report_numeric (Numeric, Control) ; /* ---------------------------------------------------------------------- */ /* solve Ax=b, with the modified A */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz, Numeric, Control, Info) ; umfpack_zi_report_info (Control, Info) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_solve failed") ; } printf ("\nx (with completely modified A): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* free the symbolic and numeric factorization */ /* ---------------------------------------------------------------------- */ umfpack_zi_free_symbolic (&Symbolic) ; umfpack_zi_free_numeric (&Numeric) ; /* ---------------------------------------------------------------------- */ /* C = transpose of A */ /* ---------------------------------------------------------------------- */ Cp = (int *) malloc ((n+1) * sizeof (int)) ; Ci = (int *) malloc (nz1 * sizeof (int)) ; Cx = (double *) malloc (nz1 * sizeof (double)) ; Cz = (double *) malloc (nz1 * sizeof (double)) ; if (!Cp || !Ci || !Cx || !Cz) { error ("out of memory") ; } status = umfpack_zi_transpose (n, n, Ap, Ai, Ax, Az, (int *) NULL, (int *) NULL, Cp, Ci, Cx, Cz, TRUE) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_transpose failed: ") ; } printf ("\nC (transpose of A): ") ; (void) umfpack_zi_report_matrix (n, n, Cp, Ci, Cx, Cz, 1, Control) ; /* ---------------------------------------------------------------------- */ /* symbolic factorization of C */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_symbolic (n, n, Cp, Ci, Cx, Cz, &Symbolic, Control, Info) ; if (status < 0) { umfpack_zi_report_info (Control, Info) ; umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_symbolic failed") ; } printf ("\nSymbolic factorization of C: ") ; (void) umfpack_zi_report_symbolic (Symbolic, Control) ; /* ---------------------------------------------------------------------- */ /* copy the contents of Symbolic into user arrays print them */ /* ---------------------------------------------------------------------- */ printf ("\nGet the contents of the Symbolic object for C:\n") ; printf ("(compare with umfpack_zi_report_symbolic output, above)\n") ; Pinit = (int *) malloc ((n+1) * sizeof (int)) ; Qinit = (int *) malloc ((n+1) * sizeof (int)) ; Front_npivcol = (int *) malloc ((n+1) * sizeof (int)) ; Front_1strow = (int *) malloc ((n+1) * sizeof (int)) ; Front_leftmostdesc = (int *) malloc ((n+1) * sizeof (int)) ; Front_parent = (int *) malloc ((n+1) * sizeof (int)) ; Chain_start = (int *) malloc ((n+1) * sizeof (int)) ; Chain_maxrows = (int *) malloc ((n+1) * sizeof (int)) ; Chain_maxcols = (int *) malloc ((n+1) * sizeof (int)) ; if (!Pinit || !Qinit || !Front_npivcol || !Front_parent || !Chain_start || !Chain_maxrows || !Chain_maxcols || !Front_1strow || !Front_leftmostdesc) { error ("out of memory") ; } status = umfpack_zi_get_symbolic (&nr, &nc, &n1, &anz, &nfr, &nchains, Pinit, Qinit, Front_npivcol, Front_parent, Front_1strow, Front_leftmostdesc, Chain_start, Chain_maxrows, Chain_maxcols, Symbolic) ; if (status < 0) { error ("symbolic factorization invalid") ; } printf ("From the Symbolic object, C is of dimension %d-by-%d\n", nr, nc); printf (" with nz = %d, number of fronts = %d,\n", nz, nfr) ; printf (" number of frontal matrix chains = %d\n", nchains) ; printf ("\nPivot columns in each front, and parent of each front:\n") ; k = 0 ; for (i = 0 ; i < nfr ; i++) { fnpiv = Front_npivcol [i] ; printf (" Front %d: parent front: %d number of pivot cols: %d\n", i, Front_parent [i], fnpiv) ; for (j = 0 ; j < fnpiv ; j++) { col = Qinit [k] ; printf ( " %d-th pivot column is column %d in original matrix\n", k, col) ; k++ ; } } printf ("\nNote that the column ordering, above, will be refined\n") ; printf ("in the numeric factorization below. The assignment of pivot\n") ; printf ("columns to frontal matrices will always remain unchanged.\n") ; printf ("\nTotal number of pivot columns in frontal matrices: %d\n", k) ; printf ("\nFrontal matrix chains:\n") ; for (j = 0 ; j < nchains ; j++) { printf (" Frontal matrices %d to %d are factorized in a single\n", Chain_start [j], Chain_start [j+1] - 1) ; printf (" working array of size %d-by-%d\n", Chain_maxrows [j], Chain_maxcols [j]) ; } /* ---------------------------------------------------------------------- */ /* numeric factorization of C */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_numeric (Cp, Ci, Cx, Cz, Symbolic, &Numeric, Control, Info) ; if (status < 0) { error ("umfpack_zi_numeric failed") ; } printf ("\nNumeric factorization of C: ") ; (void) umfpack_zi_report_numeric (Numeric, Control) ; /* ---------------------------------------------------------------------- */ /* extract the LU factors of C and print them */ /* ---------------------------------------------------------------------- */ if (umfpack_zi_get_lunz (&lnz, &unz, &nr, &nc, &nzud, Numeric) < 0) { error ("umfpack_zi_get_lunz failed") ; } /* ensure arrays are not of zero size */ lnz1 = MAX (lnz,1) ; unz1 = MAX (unz,1) ; Lp = (int *) malloc ((n+1) * sizeof (int)) ; Lj = (int *) malloc (lnz1 * sizeof (int)) ; Lx = (double *) malloc (lnz1 * sizeof (double)) ; Lz = (double *) malloc (lnz1 * sizeof (double)) ; Up = (int *) malloc ((n+1) * sizeof (int)) ; Ui = (int *) malloc (unz1 * sizeof (int)) ; Ux = (double *) malloc (unz1 * sizeof (double)) ; Uz = (double *) malloc (unz1 * sizeof (double)) ; P = (int *) malloc (n * sizeof (int)) ; Q = (int *) malloc (n * sizeof (int)) ; Dx = (double *) NULL ; /* D vector not requested */ Dz = (double *) NULL ; Rs = (double *) malloc (n * sizeof (double)) ; if (!Lp || !Lj || !Lx || !Lz || !Up || !Ui || !Ux || !Uz || !P || !Q || !Rs) { error ("out of memory") ; } status = umfpack_zi_get_numeric (Lp, Lj, Lx, Lz, Up, Ui, Ux, Uz, P, Q, Dx, Dz, &do_recip, Rs, Numeric) ; if (status < 0) { error ("umfpack_zi_get_numeric failed") ; } printf ("\nL (lower triangular factor of C): ") ; (void) umfpack_zi_report_matrix (n, n, Lp, Lj, Lx, Lz, 0, Control) ; printf ("\nU (upper triangular factor of C): ") ; (void) umfpack_zi_report_matrix (n, n, Up, Ui, Ux, Uz, 1, Control) ; printf ("\nP: ") ; (void) umfpack_zi_report_perm (n, P, Control) ; printf ("\nQ: ") ; (void) umfpack_zi_report_perm (n, Q, Control) ; printf ("\nScale factors: row i of A is to be ") ; if (do_recip) { printf ("multiplied by the ith scale factor\n") ; } else { printf ("divided by the ith scale factor\n") ; } for (i = 0 ; i < n ; i++) printf ("%d: %g\n", i, Rs [i]) ; /* ---------------------------------------------------------------------- */ /* convert L to triplet form and print it */ /* ---------------------------------------------------------------------- */ /* Note that L is in row-form, so it is the row indices that are created */ /* by umfpack_zi_col_to_triplet. */ printf ("\nConverting L to triplet form, and printing it:\n") ; Li = (int *) malloc (lnz1 * sizeof (int)) ; if (!Li) { error ("out of memory") ; } if (umfpack_zi_col_to_triplet (n, Lp, Li) < 0) { error ("umfpack_zi_col_to_triplet failed") ; } printf ("\nL, in triplet form: ") ; (void) umfpack_zi_report_triplet (n, n, lnz, Li, Lj, Lx, Lz, Control) ; /* ---------------------------------------------------------------------- */ /* save the Numeric object to file, free it, and load it back in */ /* ---------------------------------------------------------------------- */ /* use the default filename, "numeric.umf" */ printf ("\nSaving numeric object:\n") ; status = umfpack_zi_save_numeric (Numeric, (char *) NULL) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_save_numeric failed") ; } printf ("\nFreeing numeric object:\n") ; umfpack_zi_free_numeric (&Numeric) ; printf ("\nLoading numeric object:\n") ; status = umfpack_zi_load_numeric (&Numeric, (char *) NULL) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_load_numeric failed") ; } printf ("\nDone loading numeric object\n") ; /* ---------------------------------------------------------------------- */ /* solve C'x=b */ /* ---------------------------------------------------------------------- */ status = umfpack_zi_solve (UMFPACK_At, Cp, Ci, Cx, Cz, x, xz, b, bz, Numeric, Control, Info) ; umfpack_zi_report_info (Control, Info) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_solve failed") ; } printf ("\nx (solution of C'x=b): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (TRUE, Cp, Ci, Cx, Cz) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* solve C'x=b again, using umfpack_zi_wsolve instead */ /* ---------------------------------------------------------------------- */ printf ("\nSolving C'x=b again, using umfpack_zi_wsolve instead:\n") ; Wi = (int *) malloc (n * sizeof (int)) ; W = (double *) malloc (10*n * sizeof (double)) ; if (!Wi || !W) { error ("out of memory") ; } status = umfpack_zi_wsolve (UMFPACK_At, Cp, Ci, Cx, Cz, x, xz, b, bz, Numeric, Control, Info, Wi, W) ; umfpack_zi_report_info (Control, Info) ; if (status < 0) { umfpack_zi_report_status (Control, status) ; error ("umfpack_zi_wsolve failed") ; } printf ("\nx (solution of C'x=b): ") ; (void) umfpack_zi_report_vector (n, x, xz, Control) ; rnorm = resid (TRUE, Cp, Ci, Cx, Cz) ; printf ("maxnorm of residual: %g\n\n", rnorm) ; /* ---------------------------------------------------------------------- */ /* free everything */ /* ---------------------------------------------------------------------- */ /* This is not strictly required since the process is exiting and the */ /* system will reclaim the memory anyway. It's useful, though, just as */ /* a list of what is currently malloc'ed by this program. Plus, it's */ /* always a good habit to explicitly free whatever you malloc. */ free (Ap) ; free (Ai) ; free (Ax) ; free (Az) ; free (Cp) ; free (Ci) ; free (Cx) ; free (Cz) ; free (Pinit) ; free (Qinit) ; free (Front_npivcol) ; free (Front_1strow) ; free (Front_leftmostdesc) ; free (Front_parent) ; free (Chain_start) ; free (Chain_maxrows) ; free (Chain_maxcols) ; free (Lp) ; free (Lj) ; free (Lx) ; free (Lz) ; free (Up) ; free (Ui) ; free (Ux) ; free (Uz) ; free (P) ; free (Q) ; free (Li) ; free (Wi) ; free (W) ; umfpack_zi_free_symbolic (&Symbolic) ; umfpack_zi_free_numeric (&Numeric) ; /* ---------------------------------------------------------------------- */ /* print the total time spent in this demo */ /* ---------------------------------------------------------------------- */ umfpack_toc (t) ; printf ("\numfpack_zi_demo complete.\nTotal time: %5.2f seconds" " (CPU time), %5.2f seconds (wallclock time)\n", t [1], t [0]) ; return (0) ; }