static int CVDiagSetup(CVodeMem cv_mem, int convfail, N_Vector ypred, N_Vector fpred, booleantype *jcurPtr, N_Vector vtemp1, N_Vector vtemp2, N_Vector vtemp3) { realtype r; N_Vector ftemp, y; booleantype invOK; CVDiagMem cvdiag_mem; int retval; cvdiag_mem = (CVDiagMem) lmem; /* Rename work vectors for use as temporary values of y and f */ ftemp = vtemp1; y = vtemp2; /* Form y with perturbation = FRACT*(func. iter. correction) */ r = FRACT * rl1; N_VLinearSum(h, fpred, -ONE, zn[1], ftemp); N_VLinearSum(r, ftemp, ONE, ypred, y); /* Evaluate f at perturbed y */ retval = f(tn, y, M, cv_mem->cv_user_data); nfeDI++; if (retval < 0) { cvProcessError(cv_mem, CVDIAG_RHSFUNC_UNRECVR, "CVDIAG", "CVDiagSetup", MSGDG_RHSFUNC_FAILED); last_flag = CVDIAG_RHSFUNC_UNRECVR; return(-1); } if (retval > 0) { last_flag = CVDIAG_RHSFUNC_RECVR; return(1); } /* Construct M = I - gamma*J with J = diag(deltaf_i/deltay_i) */ N_VLinearSum(ONE, M, -ONE, fpred, M); N_VLinearSum(FRACT, ftemp, -h, M, M); N_VProd(ftemp, ewt, y); /* Protect against deltay_i being at roundoff level */ N_VCompare(uround, y, bit); N_VAddConst(bit, -ONE, bitcomp); N_VProd(ftemp, bit, y); N_VLinearSum(FRACT, y, -ONE, bitcomp, y); N_VDiv(M, y, M); N_VProd(M, bit, M); N_VLinearSum(ONE, M, -ONE, bitcomp, M); /* Invert M with test for zero components */ invOK = N_VInvTest(M, M); if (!invOK) { last_flag = CVDIAG_INV_FAIL; return(1); } /* Set jcur = TRUE, save gamma in gammasv, and return */ *jcurPtr = TRUE; gammasv = gamma; last_flag = CVDIAG_SUCCESS; return(0); }
static real KINScSteplength(KINMem kin_mem, N_Vector ucur, N_Vector ss, N_Vector usc) { N_VInv(usc, vtemp1); N_VAbs(ucur, vtemp2); N_VLinearSum(ONE, vtemp1, ONE, vtemp2, vtemp1); N_VDiv(ss, vtemp1, vtemp1); return(N_VMaxNorm(vtemp1)); }
void N_VDiv_SensWrapper(N_Vector x, N_Vector y, N_Vector z) { int i; for (i=0; i < NV_NVECS_SW(x); i++) N_VDiv(NV_VEC_SW(x,i), NV_VEC_SW(y,i), NV_VEC_SW(z,i)); return; }
static int KINConstraint(KINMem kin_mem) { real mxchange; N_VLinearSum(ONE, uu, ONE, pp, vtemp1); /* this vector.c routine returns TRUE if all products v1[i]*v2[i] are * positive (with the proviso that all products which would result from * v1[i]=0. are ignored) , and FALSE otherwise (e.g. at least one such * product is negative) */ if (N_VConstrProdPos(constraints, vtemp1)) return(0); N_VDiv(pp, uu, vtemp2); mxchange = N_VMaxNorm(vtemp2); if (mxchange >= relu) { stepl = POINT9 * relu / mxchange; return(1); } return(0); }
int SUNLinSolSolve_SPBCGS(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype delta) { /* local data and shortcut variables */ realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho; N_Vector r_star, r, p, q, u, Ap, vtemp; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; int l, l_max, ier; void *A_data, *P_data; N_Vector sx, sb; ATimesFn atimes; PSolveFn psolve; realtype *res_norm; int *nli; /* local variables for fused vector operations */ realtype cv[3]; N_Vector Xv[3]; /* Make local shorcuts to solver variables. */ if (S == NULL) return(SUNLS_MEM_NULL); l_max = SPBCGS_CONTENT(S)->maxl; r_star = SPBCGS_CONTENT(S)->r_star; r = SPBCGS_CONTENT(S)->r; p = SPBCGS_CONTENT(S)->p; q = SPBCGS_CONTENT(S)->q; u = SPBCGS_CONTENT(S)->u; Ap = SPBCGS_CONTENT(S)->Ap; vtemp = SPBCGS_CONTENT(S)->vtemp; sb = SPBCGS_CONTENT(S)->s1; sx = SPBCGS_CONTENT(S)->s2; A_data = SPBCGS_CONTENT(S)->ATData; P_data = SPBCGS_CONTENT(S)->PData; atimes = SPBCGS_CONTENT(S)->ATimes; psolve = SPBCGS_CONTENT(S)->Psolve; nli = &(SPBCGS_CONTENT(S)->numiters); res_norm = &(SPBCGS_CONTENT(S)->resnorm); /* Initialize counters and convergence flag */ *nli = 0; converged = SUNFALSE; /* set booleantype flags for internal solver options */ preOnLeft = ( (PRETYPE(S) == PREC_LEFT) || (PRETYPE(S) == PREC_BOTH) ); preOnRight = ( (PRETYPE(S) == PREC_RIGHT) || (PRETYPE(S) == PREC_BOTH) ); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star = r_0 */ if (preOnLeft) { ier = psolve(P_data, r_star, r, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, r_star, r); if (scale_b) N_VProd(sb, r, r_star); else N_VScale(ONE, r, r_star); /* Initialize beta_denom to the dot product of r0 with r0 */ beta_denom = N_VDotProd(r_star, r_star); /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and return if small */ *res_norm = r_norm = rho = SUNRsqrt(beta_denom); if (r_norm <= delta) { LASTFLAG(S) = SUNLS_SUCCESS; return(LASTFLAG(S)); } /* Copy r_star to r and p */ N_VScale(ONE, r_star, r); N_VScale(ONE, r_star, p); /* Begin main iteration loop */ for(l = 0; l < l_max; l++) { (*nli)++; /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv p */ if (scale_x) N_VDiv(p, sx, vtemp); else N_VScale(ONE, p, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv p */ if (preOnRight) { N_VScale(ONE, vtemp, Ap); ier = psolve(P_data, Ap, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } /* Apply A: Ap = A P2_inv sx_inv p */ ier = atimes(A_data, vtemp, Ap ); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, Ap, vtemp, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, Ap, vtemp); /* Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */ if (scale_b) N_VProd(sb, vtemp, Ap); else N_VScale(ONE, vtemp, Ap); /* Calculate alpha = <r,r_star>/<Ap,r_star> */ alpha = ((beta_denom / N_VDotProd(Ap, r_star))); /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */ N_VLinearSum(ONE, r, -alpha, Ap, q); /* Generate u = A-tilde q */ /* Apply x-scaling: vtemp = sx_inv q */ if (scale_x) N_VDiv(q, sx, vtemp); else N_VScale(ONE, q, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv q */ if (preOnRight) { N_VScale(ONE, vtemp, u); ier = psolve(P_data, u, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } /* Apply A: u = A P2_inv sx_inv u */ ier = atimes(A_data, vtemp, u ); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, u, vtemp, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, u, vtemp); /* Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */ if (scale_b) N_VProd(sb, vtemp, u); else N_VScale(ONE, vtemp, u); /* Calculate omega = <u,q>/<u,u> */ omega_denom = N_VDotProd(u, u); if (omega_denom == ZERO) omega_denom = ONE; omega = (N_VDotProd(u, q) / omega_denom); /* Update x = x + alpha*p + omega*q */ cv[0] = ONE; Xv[0] = x; cv[1] = alpha; Xv[1] = p; cv[2] = omega; Xv[2] = q; ier = N_VLinearCombination(3, cv, Xv, x); if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR); /* Update the residual r = q - omega*u */ N_VLinearSum(ONE, q, -omega, u, r); /* Set rho = norm(r) and check convergence */ *res_norm = rho = SUNRsqrt(N_VDotProd(r, r)); if (rho <= delta) { converged = SUNTRUE; break; } /* Not yet converged, continue iteration */ /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */ beta_num = N_VDotProd(r, r_star); beta = ((beta_num / beta_denom) * (alpha / omega)); /* Update p = r + beta*(p - omega*Ap) = beta*p - beta*omega*Ap + r */ cv[0] = beta; Xv[0] = p; cv[1] = -alpha*(beta_num / beta_denom); Xv[1] = Ap; cv[2] = ONE; Xv[2] = r; ier = N_VLinearCombination(3, cv, Xv, p); if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR); /* udpate beta_denom for next iteration */ beta_denom = beta_num; } /* Main loop finished */ if ((converged == SUNTRUE) || (rho < r_norm)) { /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */ if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } N_VScale(ONE, vtemp, x); } if (converged == SUNTRUE) LASTFLAG(S) = SUNLS_SUCCESS; else LASTFLAG(S) = SUNLS_RES_REDUCED; return(LASTFLAG(S)); } else { LASTFLAG(S) = SUNLS_CONV_FAIL; return(LASTFLAG(S)); } }
int SpgmrSolve(SpgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, real delta, int max_restarts, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, real *res_norm, int *nli, int *nps) { N_Vector *V, xcor, vtemp; real **Hes, *givens, *yg; real s_r0_norm, beta, rotation_product, r_norm, s_product, rho; boole preOnLeft, preOnRight, scale_x, scale_b, converged; int i, j, k, l, l_plus_1, l_max, krydim, ier, ntries; if (mem == NULL) return(SPGMR_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; V = mem->V; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize converged flag */ if (max_restarts < 0) max_restarts = 0; if ((pretype != LEFT) && (pretype != RIGHT) && (pretype != BOTH)) pretype = NONE; preOnLeft = ((pretype == LEFT) || (pretype == BOTH)); preOnRight = ((pretype == RIGHT) || (pretype == BOTH)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { if (atimes(A_data, x, vtemp) != 0) return(SPGMR_ATIMES_FAIL); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } N_VScale(ONE, vtemp, V[0]); /* Apply b-scaling to vtemp, get L2 norm of sb r_0, and return if small */ /* if (scale_b) N_VProd(sb, vtemp, vtemp); s_r0_norm = RSqrt(N_VDotProd(vtemp, vtemp)); if (s_r0_norm <= delta) return(SPGMR_SUCCESS); */ /* Apply left preconditioner and b-scaling to V[0] = r_0 */ if (preOnLeft) { ier = psolve(P_data, V[0], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[0], vtemp); } if (scale_b) { N_VProd(sb, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = sb P1_inv r_0, and return if small */ *res_norm = r_norm = beta = RSqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPGMR_SUCCESS); /* Set xcor = 0 */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts */ for (ntries = 0; ntries <= max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i <= l_max; i++) for (j=0; j < l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis */ for(l=0; l < l_max; l++) { (*nli)++; krydim = l_plus_1 = l + 1; /* Generate A-tilde V[l], where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv V[l] */ if (scale_x) { N_VDiv(V[l], sx, vtemp); } else { N_VScale(ONE, V[l], vtemp); } /* Apply right precoditioner: vtemp = P2_inv sx_inv V[l] */ N_VScale(ONE, vtemp, V[l_plus_1]); if (preOnRight) { ier = psolve(P_data, V[l_plus_1], vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } /* Apply A: V[l+1] = A P2_inv sx_inv V[l] */ if (atimes(A_data, vtemp, V[l_plus_1] ) != 0) return(SPGMR_ATIMES_FAIL); /* Apply left preconditioning: vtemp = P1_inv A P2_inv sx_inv V[l] */ if (preOnLeft) { ier = psolve(P_data, V[l_plus_1], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[l_plus_1], vtemp); } /* Apply b-scaling: V[l+1] = sb P1_inv A P2_inv sx_inv V[l] */ if (scale_b) { N_VProd(sb, vtemp, V[l_plus_1]); } else { N_VScale(ONE, vtemp, V[l_plus_1]); } /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l]), vtemp, yg) != 0) return(SPGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l])) != 0) return(SPGMR_GS_FAIL); } /* Update the QR factorization of Hes */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes */ rotation_product *= givens[2*l+1]; if ((*res_norm = rho = ABS(rotation_product*r_norm)) <= delta) { converged = TRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt */ N_VScale(ONE/Hes[l_plus_1][l], V[l_plus_1], V[l_plus_1]); } /* Inner loop is done. Compute the new correction vector xcor */ /* Construct g, then solve for y */ yg[0] = r_norm; for (i=1; i <= krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPGMR_QRSOL_FAIL); /* Add correction vector V_l y to xcor */ for (k=0; k < krydim; k++) N_VLinearSum(yg[k], V[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x */ if (converged) { /* Apply x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add correction to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart */ if (ntries == max_restarts) break; /* Construct last column of Q in yg */ s_product = ONE; for (i=krydim; i > 0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg */ r_norm *= s_product; for (i=0; i <= krydim; i++) yg[i] *= r_norm; r_norm = ABS(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart */ N_VScale(yg[0], V[0], V[0]); for( k=1; k <= krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { /* Apply the x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add vtemp to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_RES_REDUCED); } return(SPGMR_CONV_FAIL); }
int SpbcgSolve(SpbcgMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, realtype delta, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho; N_Vector r_star, r, p, q, u, Ap, vtemp; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; int l, l_max, ier; if (mem == NULL) return(SPBCG_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; r_star = mem->r_star; r = mem->r; p = mem->p; q = mem->q; u = mem->u; Ap = mem->Ap; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize converged flag */ if ((pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) pretype = PREC_NONE; preOnLeft = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT)); preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star = r_0 */ if (preOnLeft) { ier = psolve(P_data, r_star, r, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, r_star, r); if (scale_b) N_VProd(sb, r, r_star); else N_VScale(ONE, r, r_star); /* Initialize beta_denom to the dot product of r0 with r0 */ beta_denom = N_VDotProd(r_star, r_star); /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and return if small */ *res_norm = r_norm = rho = SUNRsqrt(beta_denom); if (r_norm <= delta) return(SPBCG_SUCCESS); /* Copy r_star to r and p */ N_VScale(ONE, r_star, r); N_VScale(ONE, r_star, p); /* Begin main iteration loop */ for(l = 0; l < l_max; l++) { (*nli)++; /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv p */ if (scale_x) N_VDiv(p, sx, vtemp); else N_VScale(ONE, p, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv p */ if (preOnRight) { N_VScale(ONE, vtemp, Ap); ier = psolve(P_data, Ap, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } /* Apply A: Ap = A P2_inv sx_inv p */ ier = atimes(A_data, vtemp, Ap ); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, Ap, vtemp, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, Ap, vtemp); /* Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */ if (scale_b) N_VProd(sb, vtemp, Ap); else N_VScale(ONE, vtemp, Ap); /* Calculate alpha = <r,r_star>/<Ap,r_star> */ alpha = ((beta_denom / N_VDotProd(Ap, r_star))); /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */ N_VLinearSum(ONE, r, -alpha, Ap, q); /* Generate u = A-tilde q */ /* Apply x-scaling: vtemp = sx_inv q */ if (scale_x) N_VDiv(q, sx, vtemp); else N_VScale(ONE, q, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv q */ if (preOnRight) { N_VScale(ONE, vtemp, u); ier = psolve(P_data, u, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } /* Apply A: u = A P2_inv sx_inv u */ ier = atimes(A_data, vtemp, u ); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, u, vtemp, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, u, vtemp); /* Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */ if (scale_b) N_VProd(sb, vtemp, u); else N_VScale(ONE, vtemp, u); /* Calculate omega = <u,q>/<u,u> */ omega_denom = N_VDotProd(u, u); if (omega_denom == ZERO) omega_denom = ONE; omega = (N_VDotProd(u, q) / omega_denom); /* Update x = x + alpha*p + omega*q */ N_VLinearSum(alpha, p, omega, q, vtemp); N_VLinearSum(ONE, x, ONE, vtemp, x); /* Update the residual r = q - omega*u */ N_VLinearSum(ONE, q, -omega, u, r); /* Set rho = norm(r) and check convergence */ *res_norm = rho = SUNRsqrt(N_VDotProd(r, r)); if (rho <= delta) { converged = TRUE; break; } /* Not yet converged, continue iteration */ /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */ beta_num = N_VDotProd(r, r_star); beta = ((beta_num / beta_denom) * (alpha / omega)); beta_denom = beta_num; /* Update p = r + beta*(p - omega*Ap) */ N_VLinearSum(ONE, p, -omega, Ap, vtemp); N_VLinearSum(ONE, r, beta, vtemp, p); } /* Main loop finished */ if ((converged == TRUE) || (rho < r_norm)) { /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */ if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); N_VScale(ONE, vtemp, x); } if (converged == TRUE) return(SPBCG_SUCCESS); else return(SPBCG_RES_REDUCED); } else return(SPBCG_CONV_FAIL); }
/*---------------------------------------------------------------- Function : SpfgmrSolve ---------------------------------------------------------------*/ int SpfgmrSolve(SpfgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, realtype delta, int max_restarts, int maxit, void *P_data, N_Vector s1, N_Vector s2, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { N_Vector *V, *Z, xcor, vtemp; realtype **Hes, *givens, *yg; realtype beta, rotation_product, r_norm, s_product, rho; booleantype preOnRight, scale1, scale2, converged; int i, j, k, l, l_max, krydim, ier, ntries; if (mem == NULL) return(SPFGMR_MEM_NULL); /* Initialize some variables */ krydim = 0; /* Make local copies of mem variables. */ l_max = mem->l_max; V = mem->V; Z = mem->Z; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = SUNFALSE; /* Initialize converged flag */ /* If maxit is greater than l_max, then set maxit=l_max */ if (maxit > l_max) maxit = l_max; /* Check for legal value of max_restarts */ if (max_restarts < 0) max_restarts = 0; /* Set preconditioning flag (enabling any preconditioner implies right preconditioning, since FGMRES does not support left preconditioning) */ preOnRight = ((pretype == PREC_RIGHT) || (pretype == PREC_BOTH) || (pretype == PREC_LEFT)); /* Set scaling flags */ scale1 = (s1 != NULL); scale2 = (s2 != NULL); /* Set vtemp to initial (unscaled) residual r_0 = b - A*x_0. */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { ier = atimes(A_data, x, vtemp); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } /* Apply left scaling to vtemp = r_0 to fill V[0]. */ if (scale1) { N_VProd(s1, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = s1 r_0, and return if small */ *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPFGMR_SUCCESS); /* Initialize rho to avoid compiler warning message */ rho = beta; /* Set xcor = 0. */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts. */ for (ntries=0; ntries<=max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i<=l_max; i++) for (j=0; j<l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis. */ for (l=0; l<maxit; l++) { (*nli)++; krydim = l + 1; /* Generate A-tilde V[l], where A-tilde = s1 A P_inv s2_inv. */ /* Apply right scaling: vtemp = s2_inv V[l]. */ if (scale2) N_VDiv(V[l], s2, vtemp); else N_VScale(ONE, V[l], vtemp); /* Apply right preconditioner: vtemp = Z[l] = P_inv s2_inv V[l]. */ if (preOnRight) { N_VScale(ONE, vtemp, V[l+1]); ier = psolve(P_data, V[l+1], vtemp, delta, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPFGMR_PSOLVE_FAIL_UNREC : SPFGMR_PSOLVE_FAIL_REC); } N_VScale(ONE, vtemp, Z[l]); /* Apply A: V[l+1] = A P_inv s2_inv V[l]. */ ier = atimes(A_data, vtemp, V[l+1]); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); /* Apply left scaling: V[l+1] = s1 A P_inv s2_inv V[l]. */ if (scale1) N_VProd(s1, V[l+1], V[l+1]); /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l+1, l_max, &(Hes[l+1][l]), vtemp, yg) != 0) return(SPFGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l+1, l_max, &(Hes[l+1][l])) != 0) return(SPFGMR_GS_FAIL); } /* Update the QR factorization of Hes. */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPFGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes. */ rotation_product *= givens[2*l+1]; *res_norm = rho = SUNRabs(rotation_product*r_norm); if (rho <= delta) { converged = SUNTRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt routine. */ N_VScale(ONE/Hes[l+1][l], V[l+1], V[l+1]); } /* Inner loop is done. Compute the new correction vector xcor. */ /* Construct g, then solve for y. */ yg[0] = r_norm; for (i=1; i<=krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPFGMR_QRSOL_FAIL); /* Add correction vector Z_l y to xcor. */ for (k=0; k<krydim; k++) N_VLinearSum(yg[k], Z[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x and return. */ if (converged) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart. */ if (ntries == max_restarts) break; /* Construct last column of Q in yg. */ s_product = ONE; for (i=krydim; i>0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg. */ r_norm *= s_product; for (i=0; i<=krydim; i++) yg[i] *= r_norm; r_norm = SUNRabs(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart. */ N_VScale(yg[0], V[0], V[0]); for (k=1; k<=krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_RES_REDUCED); } return(SPFGMR_CONV_FAIL); }
int SptfqmrSolve(SptfqmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, realtype delta, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { realtype alpha, tau, eta, beta, c, sigma, v_bar, omega; realtype rho[2]; realtype r_init_norm, r_curr_norm; realtype temp_val; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; booleantype b_ok; int n, m, ier; /* Exit immediately if memory pointer is NULL */ if (mem == NULL) return(SPTFQMR_MEM_NULL); temp_val = r_curr_norm = -ONE; /* Initialize to avoid compiler warnings */ *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize convergence flag */ b_ok = FALSE; if ((pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) pretype = PREC_NONE; preOnLeft = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT)); preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_star = r_0 = b - A*x_0 */ /* NOTE: if x == 0 then just set residual to b and continue */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star (or really just r_0) */ if (preOnLeft) { ier = psolve(P_data, r_star, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, r_star, vtemp1); if (scale_b) N_VProd(sb, vtemp1, r_star); else N_VScale(ONE, vtemp1, r_star); /* Initialize rho[0] */ /* NOTE: initialized here to reduce number of computations - avoid need to compute r_star^T*r_star twice, and avoid needlessly squaring values */ rho[0] = N_VDotProd(r_star, r_star); /* Compute norm of initial residual (r_0) to see if we really need to do anything */ *res_norm = r_init_norm = RSqrt(rho[0]); if (r_init_norm <= delta) return(SPTFQMR_SUCCESS); /* Set v_ = A*r_0 (preconditioned and scaled) */ if (scale_x) N_VDiv(r_star, sx, vtemp1); else N_VScale(ONE, r_star, vtemp1); if (preOnRight) { N_VScale(ONE, vtemp1, v_); ier = psolve(P_data, v_, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, vtemp1, v_); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, v_, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, v_, vtemp1); if (scale_b) N_VProd(sb, vtemp1, v_); else N_VScale(ONE, vtemp1, v_); /* Initialize remaining variables */ N_VScale(ONE, r_star, r_[0]); N_VScale(ONE, r_star, u_); N_VScale(ONE, r_star, p_); N_VConst(ZERO, d_); tau = r_init_norm; v_bar = eta = ZERO; /* START outer loop */ for (n = 0; n < l_max; ++n) { /* Increment linear iteration counter */ (*nli)++; /* sigma = r_star^T*v_ */ sigma = N_VDotProd(r_star, v_); /* alpha = rho[0]/sigma */ alpha = rho[0]/sigma; /* q_ = u_-alpha*v_ */ N_VLinearSum(ONE, u_, -alpha, v_, q_); /* r_[1] = r_[0]-alpha*A*(u_+q_) */ N_VLinearSum(ONE, u_, ONE, q_, r_[1]); if (scale_x) N_VDiv(r_[1], sx, r_[1]); if (preOnRight) { N_VScale(ONE, r_[1], vtemp1); ier = psolve(P_data, vtemp1, r_[1], PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, r_[1], vtemp1); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, vtemp1, r_[1], PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, vtemp1, r_[1]); if (scale_b) N_VProd(sb, r_[1], vtemp1); else N_VScale(ONE, r_[1], vtemp1); N_VLinearSum(ONE, r_[0], -alpha, vtemp1, r_[1]); /* START inner loop */ for (m = 0; m < 2; ++m) { /* d_ = [*]+(v_bar^2*eta/alpha)*d_ */ /* NOTES: * (1) [*] = u_ if m == 0, and q_ if m == 1 * (2) using temp_val reduces the number of required computations * if the inner loop is executed twice */ if (m == 0) { temp_val = RSqrt(N_VDotProd(r_[1], r_[1])); omega = RSqrt(RSqrt(N_VDotProd(r_[0], r_[0]))*temp_val); N_VLinearSum(ONE, u_, SQR(v_bar)*eta/alpha, d_, d_); } else { omega = temp_val; N_VLinearSum(ONE, q_, SQR(v_bar)*eta/alpha, d_, d_); } /* v_bar = omega/tau */ v_bar = omega/tau; /* c = (1+v_bar^2)^(-1/2) */ c = ONE / RSqrt(ONE+SQR(v_bar)); /* tau = tau*v_bar*c */ tau = tau*v_bar*c; /* eta = c^2*alpha */ eta = SQR(c)*alpha; /* x = x+eta*d_ */ N_VLinearSum(ONE, x, eta, d_, x); /* Check for convergence... */ /* NOTE: just use approximation to norm of residual, if possible */ *res_norm = r_curr_norm = tau*RSqrt(m+1); /* Exit inner loop if iteration has converged based upon approximation to norm of current residual */ if (r_curr_norm <= delta) { converged = TRUE; break; } /* Decide if actual norm of residual vector should be computed */ /* NOTES: * (1) if r_curr_norm > delta, then check if actual residual norm * is OK (recall we first compute an approximation) * (2) if r_curr_norm >= r_init_norm and m == 1 and n == l_max, then * compute actual residual norm to see if the iteration can be * saved * (3) the scaled and preconditioned right-hand side of the given * linear system (denoted by b) is only computed once, and the * result is stored in vtemp3 so it can be reused - reduces the * number of psovles if using left preconditioning */ if ((r_curr_norm > delta) || (r_curr_norm >= r_init_norm && m == 1 && n == l_max)) { /* Compute norm of residual ||b-A*x||_2 (preconditioned and scaled) */ if (scale_x) N_VDiv(x, sx, vtemp1); else N_VScale(ONE, x, vtemp1); if (preOnRight) { ier = psolve(P_data, vtemp1, vtemp2, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC); N_VScale(ONE, vtemp2, vtemp1); } ier = atimes(A_data, vtemp1, vtemp2); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, vtemp2, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, vtemp2, vtemp1); if (scale_b) N_VProd(sb, vtemp1, vtemp2); else N_VScale(ONE, vtemp1, vtemp2); /* Only precondition and scale b once (result saved for reuse) */ if (!b_ok) { b_ok = TRUE; if (preOnLeft) { ier = psolve(P_data, b, vtemp3, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, b, vtemp3); if (scale_b) N_VProd(sb, vtemp3, vtemp3); } N_VLinearSum(ONE, vtemp3, -ONE, vtemp2, vtemp1); *res_norm = r_curr_norm = RSqrt(N_VDotProd(vtemp1, vtemp1)); /* Exit inner loop if inequality condition is satisfied (meaning exit if we have converged) */ if (r_curr_norm <= delta) { converged = TRUE; break; } } } /* END inner loop */ /* If converged, then exit outer loop as well */ if (converged == TRUE) break; /* rho[1] = r_star^T*r_[1] */ rho[1] = N_VDotProd(r_star, r_[1]); /* beta = rho[1]/rho[0] */ beta = rho[1]/rho[0]; /* u_ = r_[1]+beta*q_ */ N_VLinearSum(ONE, r_[1], beta, q_, u_); /* p_ = u_+beta*(q_+beta*p_) */ N_VLinearSum(beta, q_, SQR(beta), p_, p_); N_VLinearSum(ONE, u_, ONE, p_, p_); /* v_ = A*p_ */ if (scale_x) N_VDiv(p_, sx, vtemp1); else N_VScale(ONE, p_, vtemp1); if (preOnRight) { N_VScale(ONE, vtemp1, v_); ier = psolve(P_data, v_, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, vtemp1, v_); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, v_, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, v_, vtemp1); if (scale_b) N_VProd(sb, vtemp1, v_); else N_VScale(ONE, vtemp1, v_); /* Shift variable values */ /* NOTE: reduces storage requirements */ N_VScale(ONE, r_[1], r_[0]); rho[0] = rho[1]; } /* END outer loop */ /* Determine return value */ /* If iteration converged or residual was reduced, then return current iterate (x) */ if ((converged == TRUE) || (r_curr_norm < r_init_norm)) { if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC); N_VScale(ONE, vtemp1, x); } if (converged == TRUE) return(SPTFQMR_SUCCESS); else return(SPTFQMR_RES_REDUCED); } /* Otherwise, return error code */ else return(SPTFQMR_CONV_FAIL); }
/* ---------------------------------------------------------------------- * SUNLinSol_SPGMR Linear Solver Testing Routine * * We run multiple tests to exercise this solver: * 1. simple tridiagonal system (no preconditioning) * 2. simple tridiagonal system (Jacobi preconditioning) * 3. tridiagonal system w/ scale vector s1 (no preconditioning) * 4. tridiagonal system w/ scale vector s1 (Jacobi preconditioning) * 5. tridiagonal system w/ scale vector s2 (no preconditioning) * 6. tridiagonal system w/ scale vector s2 (Jacobi preconditioning) * * Note: We construct a tridiagonal matrix Ahat, a random solution xhat, * and a corresponding rhs vector bhat = Ahat*xhat, such that each * of these is unit-less. To test row/column scaling, we use the * matrix A = S1-inverse Ahat S2, rhs vector b = S1-inverse bhat, * and solution vector x = (S2-inverse) xhat; hence the linear * system has rows scaled by S1-inverse and columns scaled by S2, * where S1 and S2 are the diagonal matrices with entries from the * vectors s1 and s2, the 'scaling' vectors supplied to SPGMR * having strictly positive entries. When this is combined with * preconditioning, assume that Phat is the desired preconditioner * for Ahat, then our preconditioning matrix P \approx A should be * left prec: P-inverse \approx S1-inverse Ahat-inverse S1 * right prec: P-inverse \approx S2-inverse Ahat-inverse S2. * Here we use a diagonal preconditioner D, so the S*-inverse * and S* in the product cancel one another. * --------------------------------------------------------------------*/ int main(int argc, char *argv[]) { int fails=0; /* counter for test failures */ int passfail=0; /* overall pass/fail flag */ SUNLinearSolver LS; /* linear solver object */ N_Vector xhat, x, b; /* test vectors */ UserData ProbData; /* problem data structure */ int gstype, pretype, maxl, print_timing; sunindextype i; realtype *vecdata; double tol; /* check inputs: local problem size, timing flag */ if (argc < 7) { printf("ERROR: SIX (6) Inputs required:\n"); printf(" Problem size should be >0\n"); printf(" Gram-Schmidt orthogonalization type should be 1 or 2\n"); printf(" Preconditioning type should be 1 or 2\n"); printf(" Maximum Krylov subspace dimension should be >0\n"); printf(" Solver tolerance should be >0\n"); printf(" timing output flag should be 0 or 1 \n"); return 1; } ProbData.N = atol(argv[1]); problem_size = ProbData.N; if (ProbData.N <= 0) { printf("ERROR: Problem size must be a positive integer\n"); return 1; } gstype = atoi(argv[2]); if ((gstype < 1) || (gstype > 2)) { printf("ERROR: Gram-Schmidt orthogonalization type must be either 1 or 2\n"); return 1; } pretype = atoi(argv[3]); if ((pretype < 1) || (pretype > 2)) { printf("ERROR: Preconditioning type must be either 1 or 2\n"); return 1; } maxl = atoi(argv[4]); if (maxl <= 0) { printf("ERROR: Maximum Krylov subspace dimension must be a positive integer\n"); return 1; } tol = atof(argv[5]); if (tol <= ZERO) { printf("ERROR: Solver tolerance must be a positive real number\n"); return 1; } print_timing = atoi(argv[6]); SetTiming(print_timing); printf("\nSPGMR linear solver test:\n"); printf(" Problem size = %ld\n", (long int) ProbData.N); printf(" Gram-Schmidt orthogonalization type = %i\n", gstype); printf(" Preconditioning type = %i\n", pretype); printf(" Maximum Krylov subspace dimension = %i\n", maxl); printf(" Solver Tolerance = %"GSYM"\n", tol); printf(" timing output flag = %i\n\n", print_timing); /* Create vectors */ x = N_VNew_Serial(ProbData.N); if (check_flag(x, "N_VNew_Serial", 0)) return 1; xhat = N_VNew_Serial(ProbData.N); if (check_flag(xhat, "N_VNew_Serial", 0)) return 1; b = N_VNew_Serial(ProbData.N); if (check_flag(b, "N_VNew_Serial", 0)) return 1; ProbData.d = N_VNew_Serial(ProbData.N); if (check_flag(ProbData.d, "N_VNew_Serial", 0)) return 1; ProbData.s1 = N_VNew_Serial(ProbData.N); if (check_flag(ProbData.s1, "N_VNew_Serial", 0)) return 1; ProbData.s2 = N_VNew_Serial(ProbData.N); if (check_flag(ProbData.s2, "N_VNew_Serial", 0)) return 1; /* Fill xhat vector with uniform random data in [1,2] */ vecdata = N_VGetArrayPointer(xhat); for (i=0; i<ProbData.N; i++) vecdata[i] = ONE + urand(); /* Fill Jacobi vector with matrix diagonal */ N_VConst(FIVE, ProbData.d); /* Create SPGMR linear solver */ LS = SUNLinSol_SPGMR(x, pretype, maxl); fails += Test_SUNLinSolGetType(LS, SUNLINEARSOLVER_ITERATIVE, 0); fails += Test_SUNLinSolSetATimes(LS, &ProbData, ATimes, 0); fails += Test_SUNLinSolSetPreconditioner(LS, &ProbData, PSetup, PSolve, 0); fails += Test_SUNLinSolSetScalingVectors(LS, ProbData.s1, ProbData.s2, 0); fails += Test_SUNLinSolInitialize(LS, 0); fails += Test_SUNLinSolSpace(LS, 0); fails += SUNLinSol_SPGMRSetGSType(LS, gstype); if (fails) { printf("FAIL: SUNLinSol_SPGMR module failed %i initialization tests\n\n", fails); return 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module passed all initialization tests\n\n"); } /*** Test 1: simple Poisson-like solve (no preconditioning) ***/ /* set scaling vectors */ N_VConst(ONE, ProbData.s1); N_VConst(ONE, ProbData.s2); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 1, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 1, passed all tests\n\n"); } /*** Test 2: simple Poisson-like solve (Jacobi preconditioning) ***/ /* set scaling vectors */ N_VConst(ONE, ProbData.s1); N_VConst(ONE, ProbData.s2); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, pretype); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 2, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 2, passed all tests\n\n"); } /*** Test 3: Poisson-like solve w/ scaled rows (no preconditioning) ***/ /* set scaling vectors */ vecdata = N_VGetArrayPointer(ProbData.s1); for (i=0; i<ProbData.N; i++) vecdata[i] = ONE + THOUSAND*urand(); N_VConst(ONE, ProbData.s2); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 3, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 3, passed all tests\n\n"); } /*** Test 4: Poisson-like solve w/ scaled rows (Jacobi preconditioning) ***/ /* set scaling vectors */ vecdata = N_VGetArrayPointer(ProbData.s1); for (i=0; i<ProbData.N; i++) vecdata[i] = ONE + THOUSAND*urand(); N_VConst(ONE, ProbData.s2); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, pretype); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 4, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 4, passed all tests\n\n"); } /*** Test 5: Poisson-like solve w/ scaled columns (no preconditioning) ***/ /* set scaling vectors */ N_VConst(ONE, ProbData.s1); vecdata = N_VGetArrayPointer(ProbData.s2); for (i=0; i<ProbData.N; i++) vecdata[i] = ONE + THOUSAND*urand(); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, PREC_NONE); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 5, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 5, passed all tests\n\n"); } /*** Test 6: Poisson-like solve w/ scaled columns (Jacobi preconditioning) ***/ /* set scaling vector, Jacobi solver vector */ N_VConst(ONE, ProbData.s1); vecdata = N_VGetArrayPointer(ProbData.s2); for (i=0; i<ProbData.N; i++) vecdata[i] = ONE + THOUSAND*urand(); /* Fill x vector with scaled version */ N_VDiv(xhat,ProbData.s2,x); /* Fill b vector with result of matrix-vector product */ fails = ATimes(&ProbData, x, b); if (check_flag(&fails, "ATimes", 1)) return 1; /* Run tests with this setup */ fails += SUNLinSol_SPGMRSetPrecType(LS, pretype); fails += Test_SUNLinSolSetup(LS, NULL, 0); fails += Test_SUNLinSolSolve(LS, NULL, x, b, tol, 0); fails += Test_SUNLinSolLastFlag(LS, 0); fails += Test_SUNLinSolNumIters(LS, 0); fails += Test_SUNLinSolResNorm(LS, 0); fails += Test_SUNLinSolResid(LS, 0); /* Print result */ if (fails) { printf("FAIL: SUNLinSol_SPGMR module, problem 6, failed %i tests\n\n", fails); passfail += 1; } else { printf("SUCCESS: SUNLinSol_SPGMR module, problem 6, passed all tests\n\n"); } /* Free solver and vectors */ SUNLinSolFree(LS); N_VDestroy(x); N_VDestroy(xhat); N_VDestroy(b); N_VDestroy(ProbData.d); N_VDestroy(ProbData.s1); N_VDestroy(ProbData.s2); return(passfail); }