VPUBLIC void Vdpbsl(double *abd, int *lda, int *n, int *m, double *b) { double t; int k, kb, la, lb, lm; MAT2(abd, *lda, 1); for (k=1; k<=*n; k++) { lm = VMIN2(k-1, *m); la = *m + 1 - lm; lb = k - lm; t = Vddot(lm, RAT2(abd, la, k ), 1, RAT(b, lb), 1 ); VAT(b, k) = (VAT(b, k) - t) / VAT2(abd, *m+1, k); } // Solve R*X = Y for (kb=1; kb<=*n; kb++) { k = *n + 1 - kb; lm = VMIN2(k-1, *m); la = *m + 1 - lm; lb = k - lm; VAT(b, k) /= VAT2(abd, *m+1, k); t = -VAT(b, k); Vdaxpy(lm, t, RAT2(abd, la, k), 1, RAT(b, lb), 1); } }
VPUBLIC void Vprtmatd(int *nx, int *ny, int *nz, int *ipc, double *rpc, double *ac) { int numdia; MAT2(ac, *nx * *ny * *nz, 1); WARN_UNTESTED; // Do the printing numdia = VAT(ipc, 11); if (numdia == 7) { Vprtmatd7(nx, ny, nz, ipc, rpc, RAT2(ac, 1, 1), RAT2(ac, 1, 2), RAT2(ac, 1, 3), RAT2(ac, 1, 4)); } else if (numdia == 27) { Vprtmatd27(nx, ny, nz, ipc, rpc, RAT2(ac, 1, 1), RAT2(ac, 1, 2), RAT2(ac, 1, 3), RAT2(ac, 1, 4), RAT2(ac, 1, 5), RAT2(ac, 1, 6), RAT2(ac, 1, 7), RAT2(ac, 1, 8), RAT2(ac, 1, 9), RAT2(ac, 1, 10), RAT2(ac, 1, 11), RAT2(ac, 1, 12), RAT2(ac, 1, 13), RAT2(ac, 1, 14)); } else { Vnm_print(2, "Vprtmatd: invalid stencil type given: %d\n", numdia); } }
VPUBLIC void Vazeros(int *nx, int *ny, int *nz, double *x) { int i, n; int nproc = 1; n = *nx * *ny * *nz; #pragma omp parallel for private(i) for (i=1; i<=n; i++) VAT(x, i) = 0.0; }
VPUBLIC void Vaxrand(int *nx, int *ny, int *nz, double *x) { int n, i, ii, ipara, ivect, iflag; int nproc = 1; double xdum; WARN_UNTESTED; // Find parallel loops (ipara), remainder (ivect) n = *nx * *ny * *nz; ipara = n / nproc; ivect = n % nproc; iflag = 1; xdum = (double)(VRAND); // Do parallel loops for (ii=1; ii<=nproc; ii++) for (i=1+(ipara*(ii-1)); i<=ipara*ii; i++) VAT(x, i) = (double)(VRAND); // Do vector loops for (i=ipara*nproc+1; i<=n; i++) VAT(x, i) = (double)(VRAND); }
VPUBLIC void Vnewton(int *nx, int *ny, int *nz, double *x, int *iz, double *w0, double *w1, double *w2, double *w3, int *istop, int *itmax, int *iters, int *ierror, int *nlev, int *ilev, int *nlev_real, int *mgsolv, int *iok, int *iinfo, double *epsiln, double *errtol, double *omega, int *nu1, int *nu2, int *mgsmoo, double *cprime, double *rhs, double *xtmp, int *ipc, double *rpc, double *pc, double *ac, double *cc, double *fc, double *tru) { int level, lev; int itmax_s, iters_s, ierror_s, iok_s, iinfo_s, istop_s; double errtol_s, ord, bigc; double rsden, rsnrm, orsnrm; double xnorm_old, xnorm_new, damp, xnorm_med, xnorm_den; double rho_max, rho_min, rho_max_mod, rho_min_mod, errtol_p; int iter_d, itmax_d, mode, idamp, ipkey; int itmax_p, iters_p, iok_p, iinfo_p; // Utility and temproary parameters double alpha; MAT2(iz, 50, 1); // Recover level information level = 1; lev = (*ilev - 1) + level; // Do some i/o if requested if (*iinfo > 1) { VMESSAGE3("Starting: (%d, %d, %d)", *nx, *ny, *nz); } if (*iok != 0) { Vprtstp(*iok, -1, 0.0, 0.0, 0.0); } /************************************************************** *** note: if (iok!=0) then: use a stopping test. *** *** else: use just the itmax to stop iteration. *** ************************************************************** *** istop=0 most efficient (whatever it is) *** *** istop=1 relative residual *** *** istop=2 rms difference of successive iterates *** *** istop=3 relative true error (provided for testing) *** **************************************************************/ // Compute denominator for stopping criterion if (*istop == 0) { rsden = 1.0; } else if (*istop == 1) { // Compute initial residual with zero initial guess // this is analogous to the linear case where one can // simply take norm of rhs for a zero initial guess Vazeros(nx, ny, nz, w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w2, w3); rsden = Vxnrm1(nx, ny, nz, w2); } else if (*istop == 2) { rsden = VSQRT( *nx * *ny * *nz); } else if (*istop == 3) { rsden = Vxnrm2(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev))); } else if (*istop == 4) { rsden = Vxnrm2(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev))); } else if (*istop == 5) { Vnmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT(tru, VAT2(iz, 1, lev)), w1, w2); rsden = VSQRT(Vxdot(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1)); } else { VABORT_MSG1("Bad istop value: %d\n", *istop); } if (rsden == 0.0) { rsden = 1.0; VWARN_MSG0(rsden != 0, "rhs is zero"); } rsnrm = rsden; orsnrm = rsnrm; if (*iok != 0) { Vprtstp(*iok, 0, rsnrm, rsden, orsnrm); } /********************************************************************* *** begin newton iteration *********************************************************************/ // Now compute residual with the initial guess Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), RAT( x, VAT2(iz, 1, lev)), w0, w2); xnorm_old = Vxnrm1(nx, ny, nz, w0); if (*iok != 0) { xnorm_den = rsden; } else { xnorm_den = xnorm_old; } /********************************************************************* *** begin the loop *********************************************************************/ // Setup for the looping VMESSAGE0("Damping enabled"); idamp = 1; *iters = 0; //30 while(1) { (*iters)++; // Save iterate if stop test will use it on next iter if (*istop == 2) { Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), RAT(tru, VAT2(iz, 1, lev))); } // Compute the current jacobian system and rhs ipkey = VAT(ipc, 10); Vgetjac(nx, ny, nz, nlev_real, iz, ilev, &ipkey, x, w0, cprime, rhs, cc, pc); // Determine number of correct digits in current residual // Algorithm 5.3 in the thesis, test version (1') // Global-superlinear convergence bigc = 1.0; ord = 2.0; /* NAB 06-18-01: If complex problems are not converging, set this to * machine epsilon. This makes it use the exact jacobian rather than * the appropriate form (as here) */ errtol_s = VMIN2((0.9 * xnorm_old), (bigc * VPOW(xnorm_old, ord))); VMESSAGE1("Using errtol_s: %f", errtol_s); // Do a linear multigrid solve of the newton equations Vazeros(nx, ny, nz, RAT(xtmp, VAT2(iz, 1, lev))); itmax_s = 1000; istop_s = 0; iters_s = 0; ierror_s = 0; // NAB 06-18-01 -- What this used to be: iok_s = 0; iinfo_s = 0; if ((*iinfo >= 2) && (*ilev == 1)) iok_s = 2; // What it's changed to: if (*iinfo >= 2) iinfo_s = *iinfo; iok_s = 2; // End of NAB hack. Vmvcs(nx, ny, nz, xtmp, iz, w0, w1, w2, w3, &istop_s, &itmax_s, &iters_s, &ierror_s, nlev, ilev, nlev_real, mgsolv, &iok_s, &iinfo_s, epsiln, &errtol_s, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cprime, rhs, tru); /************************************************************** *** note: rhs and cprime are now available as temp vectors *** **************************************************************/ // If damping is still enabled -- doit if (idamp == 1) { // Try the correction Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), w1); damp = 1.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); // Damping is still enabled -- doit damp = 1.0; iter_d = 0; itmax_d = 10; mode = 0; VMESSAGE1("Attempting damping, relres = %f", xnorm_new / xnorm_den); while(iter_d < itmax_d) { if (mode == 0) { if (xnorm_new < xnorm_old) { mode = 1; } } else if (xnorm_new > xnorm_med) { break; } // Keep old soln and residual around, and its norm Vxcopy(nx, ny, nz, w1, w2); Vxcopy(nx, ny, nz, w0, w3); xnorm_med = xnorm_new; // New damped correction, residual, and its norm Vxcopy(nx, ny, nz, RAT(x, VAT2(iz, 1, lev)), w1); damp = damp / 2.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), w1); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), w1, w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); // Next iter... iter_d = iter_d + 1; VMESSAGE1("Attempting damping, relres = %f", xnorm_new / xnorm_den); } Vxcopy(nx, ny, nz, w2, RAT(x, VAT2(iz, 1, lev))); Vxcopy(nx, ny, nz, w3, w0); xnorm_new = xnorm_med; xnorm_old = xnorm_new; VMESSAGE1("Damping accepted, relres = %f", xnorm_new / xnorm_den); // Determine whether or not to disable damping if ((iter_d - 1) == 0) { VMESSAGE0("Damping disabled"); idamp = 0; } } else { // Damping is disabled -- accept the newton step damp = 1.0; Vxaxpy(nx, ny, nz, &damp, RAT(xtmp, VAT2(iz, 1, lev)), RAT(x, VAT2(iz, 1, lev))); Vnmresid(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), RAT( fc, VAT2(iz, 1, lev)), RAT( x, VAT2(iz, 1, lev)), w0, RAT(rhs, VAT2(iz, 1, lev))); xnorm_new = Vxnrm1(nx, ny, nz, w0); xnorm_old = xnorm_new; } // Compute/check the current stopping test *** if (iok != 0) { orsnrm = rsnrm; if (*istop == 0) { rsnrm = xnorm_new; } else if (*istop == 1) { rsnrm = xnorm_new; } else if (*istop == 2) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm1(nx, ny, nz, w1); } else if (*istop == 3) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm2(nx, ny, nz, w1); } else if (*istop == 4) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); rsnrm = Vxnrm2(nx, ny, nz, w1); } else if (*istop == 5) { Vxcopy(nx, ny, nz, RAT(tru, VAT2(iz, 1, lev)), w1); alpha = -1.0; Vxaxpy(nx, ny, nz, &alpha, RAT(x, VAT2(iz, 1, lev)), w1); Vnmatvec(nx, ny, nz, RAT(ipc, VAT2(iz, 5, lev)), RAT(rpc, VAT2(iz, 6, lev)), RAT( ac, VAT2(iz, 7, lev)), RAT( cc, VAT2(iz, 1, lev)), w1, w2, w3); rsnrm = VSQRT(Vxdot(nx, ny, nz, w1, w2)); } else { VABORT_MSG1("Bad istop value: %d", *istop); } Vprtstp(*iok, *iters, rsnrm, rsden, orsnrm); if ((rsnrm/rsden) <= *errtol) break; } // Check iteration count *** if (*iters >= *itmax) break; } // Condition estimate of final jacobian if (*iinfo > 2) { Vnm_print(2, "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"); Vnm_print(2, "% Vnewton: JACOBIAN ANALYSIS ==> (%d, %d, %d)\n", *nx, *ny, *nz ); // Largest eigenvalue of the jacobian matrix Vnm_print(2, "% Vnewton: Power calculating rho(JAC)\n"); itmax_p = 1000; errtol_p = 1.0e-4; iters_p = 0; iinfo_p = *iinfo; Vpower(nx, ny, nz, iz, ilev, ipc, rpc, ac, cprime, w0, w1, w2, w3, &rho_max, &rho_max_mod, &errtol_p, &itmax_p, &iters_p, &iinfo_p); Vnm_print(2, "% Vnewton: power iters = %d\n", iters_p); Vnm_print(2, "% Vnewton: power eigmax = %d\n", rho_max); Vnm_print(2, "% Vnewton: power (MODEL) = %d\n", rho_max_mod); // Smallest eigenvalue of the system matrix A *** Vnm_print(2, "% Vnewton: ipower calculating lambda_min(JAC)...\n"); itmax_p = 1000; errtol_p = 1.0e-4; iters_p = 0; iinfo_p = *iinfo; Vazeros(nx, ny, nz, xtmp); Vipower(nx, ny, nz, xtmp, iz, w0, w1, w2, w3, rhs, &rho_min, &rho_min_mod, &errtol_p, &itmax_p, &iters_p, nlev, ilev, nlev_real, mgsolv, &iok_p, &iinfo_p, epsiln, errtol, omega, nu1, nu2, mgsmoo, ipc, rpc, pc, ac, cprime, tru); Vnm_print(2, "% Vnewton: ipower iters = %d\n", iters_p); Vnm_print(2, "% Vnewton: ipower eigmin = %d\n", rho_min); Vnm_print(2, "% Vnewton: ipower (MODEL) = %d\n", rho_min_mod); // Condition number estimate Vnm_print(2, "% Vnewton: condition number = %f\n", (double)rho_max / rho_min); Vnm_print(2, "% Vnewton: condition (MODEL) = %f\n", (double)rho_max_mod / rho_min_mod); Vnm_print(2, "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"); } }
VPUBLIC void Vdaxpy(int n, double da, double *dx, int incx, double *dy, int incy) { int i, ix, iy, m, mp1; if (n <= 0) return; if (da == 0) return; if (incx == 1 && incy == 1) { m = n % 4; if (m != 0) { for (i=1; i<=m; i++) VAT(dy, i) += da * VAT(dx, i); } if (n < 4) return; mp1 = m + 1; for (i=mp1; i<=n; i+=4) { VAT(dy, i ) += da * VAT(dx, i ); VAT(dy, i+1) += da * VAT(dx, i+1); VAT(dy, i+2) += da * VAT(dx, i+2); VAT(dy, i+3) += da * VAT(dx, i+3); } } else { ix = 1; if (incx < 0 ) ix = (-n + 1) * incx + 1; iy = 1; if (incy < 0 ) iy = (-n + 1) * incy + 1; for (i=1; i<=n; i++) { VAT(dy, iy) += da * VAT(dx, ix); ix += incx; iy += incy; } } }
VPUBLIC double Vddot(int n, double *dx, int incx, double *dy, int incy) { double dtemp; int i, ix, iy, m, mp1; double ddot = 0.0; dtemp = 0.0; if (n <= 0) return ddot; if (incx == 1 && incy == 1) { m = n % 5; if (m != 0) { for (i=1; i<=m; i++) dtemp += VAT(dx, i) * VAT(dy, i); if (n < 5) { ddot = dtemp; return ddot; } } mp1 = m + 1; for (i=mp1; i<=n; i+=5) dtemp += VAT(dx, i) * VAT(dy, i) + VAT(dx, i+1) * VAT(dy, i+1) + VAT(dx, i+2) * VAT(dy, i+2) + VAT(dx, i+3) * VAT(dy, i+3) + VAT(dx, i+4) * VAT(dy, i+4); } else { ix = 1; if (incx < 0) ix = (-n + 1) * incx + 1; iy = 1; if (incy < 0) iy = (-n + 1) * incy + 1; for (i=1; i<=n; i++) { ix += incx; iy += incy; } } ddot = dtemp; return ddot; }