int KINSpilsDQJtimes(N_Vector v, N_Vector Jv, N_Vector u, booleantype *new_u, void *data) { realtype sigma, sigma_inv, sutsv, sq1norm, sign, vtv; KINMem kin_mem; KINSpilsMem kinspils_mem; int retval; /* data is kin_mem */ kin_mem = (KINMem) data; kinspils_mem = (KINSpilsMem) lmem; /* scale the vector v and put Du*v into vtemp1 */ N_VProd(v, uscale, vtemp1); /* scale u and put into Jv (used as a temporary storage) */ N_VProd(u, uscale, Jv); /* compute dot product (Du*u).(Du*v) */ sutsv = N_VDotProd(Jv, vtemp1); /* compute dot product (Du*v).(Du*v) */ vtv = N_VDotProd(vtemp1, vtemp1); sq1norm = N_VL1Norm(vtemp1); sign = (sutsv >= ZERO) ? ONE : -ONE ; /* this expression for sigma is from p. 469, Brown and Saad paper */ sigma = sign*sqrt_relfunc*SUNMAX(SUNRabs(sutsv),sq1norm)/vtv; sigma_inv = ONE/sigma; /* compute the u-prime at which to evaluate the function func */ N_VLinearSum(ONE, u, sigma, v, vtemp1); /* call the system function to calculate func(u+sigma*v) */ retval = func(vtemp1, vtemp2, user_data); nfes++; if (retval != 0) return(retval); /* finish the computation of the difference quotient */ N_VLinearSum(sigma_inv, vtemp2, -sigma_inv, fval, Jv); return(0); }
int ClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, realtype *stemp, N_Vector *vtemp) { int i, i0, k_minus_1, retval; realtype vk_norm; k_minus_1 = k - 1; i0 = SUNMAX(k-p,0); /* Perform Classical Gram-Schmidt */ retval = N_VDotProdMulti(k-i0+1, v[k], v+i0, stemp); if (retval != 0) return(-1); vk_norm = SUNRsqrt(stemp[k-i0]); for (i=k-i0-1; i >= 0; i--) { h[i][k_minus_1] = stemp[i]; stemp[i+1] = -stemp[i]; vtemp[i+1] = v[i]; } stemp[0] = ONE; vtemp[0] = v[k]; retval = N_VLinearCombination(k-i0+1, stemp, vtemp, v[k]); if (retval != 0) return(-1); /* Compute the norm of the new vector at v[k] */ *new_vk_norm = SUNRsqrt(N_VDotProd(v[k], v[k])); /* Reorthogonalize if necessary */ if ((FACTOR * (*new_vk_norm)) < vk_norm) { retval = N_VDotProdMulti(k-i0, v[k], v+i0, stemp+1); if (retval != 0) return(-1); stemp[0] = ONE; vtemp[0] = v[k]; for (i=i0; i < k; i++) { h[i][k_minus_1] += stemp[i-i0+1]; stemp[i-i0+1] = -stemp[i-i0+1]; vtemp[i-i0+1] = v[i-i0]; } retval = N_VLinearCombination(k+1, stemp, vtemp, v[k]); if (retval != 0) return(-1); *new_vk_norm = SUNRsqrt(N_VDotProd(v[k],v[k])); } return(0); }
int ModifiedGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm) { int i, k_minus_1, i0; realtype new_norm_2, new_product, vk_norm, temp; vk_norm = RSqrt(N_VDotProd(v[k],v[k])); k_minus_1 = k - 1; i0 = MAX(k-p, 0); /* Perform modified Gram-Schmidt */ for (i=i0; i < k; i++) { h[i][k_minus_1] = N_VDotProd(v[i], v[k]); N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]); } /* Compute the norm of the new vector at v[k] */ *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k])); /* If the norm of the new vector at v[k] is less than FACTOR (== 1000) times unit roundoff times the norm of the input vector v[k], then the vector will be reorthogonalized in order to ensure that nonorthogonality is not being masked by a very small vector length. */ temp = FACTOR * vk_norm; if ((temp + (*new_vk_norm)) != temp) return(0); new_norm_2 = ZERO; for (i=i0; i < k; i++) { new_product = N_VDotProd(v[i], v[k]); temp = FACTOR * h[i][k_minus_1]; if ((temp + new_product) == temp) continue; h[i][k_minus_1] += new_product; N_VLinearSum(ONE, v[k],-new_product, v[i], v[k]); new_norm_2 += SQR(new_product); } if (new_norm_2 != ZERO) { new_product = SQR(*new_vk_norm) - new_norm_2; *new_vk_norm = (new_product > ZERO) ? RSqrt(new_product) : ZERO; } return(0); }
int ClassicalGS(N_Vector *v, realtype **h, int k, int p, realtype *new_vk_norm, N_Vector temp, realtype *s) { int i, k_minus_1, i0; realtype vk_norm; k_minus_1 = k - 1; /* Perform Classical Gram-Schmidt */ vk_norm = RSqrt(N_VDotProd(v[k], v[k])); i0 = MAX(k-p, 0); for (i=i0; i < k; i++) { h[i][k_minus_1] = N_VDotProd(v[i], v[k]); } for (i=i0; i < k; i++) { N_VLinearSum(ONE, v[k], -h[i][k_minus_1], v[i], v[k]); } /* Compute the norm of the new vector at v[k] */ *new_vk_norm = RSqrt(N_VDotProd(v[k], v[k])); /* Reorthogonalize if necessary */ if ((FACTOR * (*new_vk_norm)) < vk_norm) { for (i=i0; i < k; i++) { s[i] = N_VDotProd(v[i], v[k]); } if (i0 < k) { N_VScale(s[i0], v[i0], temp); h[i0][k_minus_1] += s[i0]; } for (i=i0+1; i < k; i++) { N_VLinearSum(s[i], v[i], ONE, temp, temp); h[i][k_minus_1] += s[i]; } N_VLinearSum(ONE, v[k], -ONE, temp, v[k]); *new_vk_norm = RSqrt(N_VDotProd(v[k],v[k])); } return(0); }
static int kinLapackBandSolve(KINMem kin_mem, N_Vector x, N_Vector b, realtype *res_norm) { KINDlsMem kindls_mem; realtype *xd; int ier, one = 1; kindls_mem = (KINDlsMem) lmem; /* Copy the right-hand side into x */ N_VScale(ONE, b, x); xd = N_VGetArrayPointer(x); /* Back-solve and get solution in x */ dgbtrs_f77("N", &n, &ml, &mu, &one, J->data, &(J->ldim), pivots, xd, &n, &ier, 1); if (ier > 0) return(-1); /* Compute the terms Jpnorm and sfdotJp for use in the global strategy * routines and in KINForcingTerm. Both of these terms are subsequently * corrected if the step is reduced by constraints or the line search. * * sJpnorm is the norm of the scaled product (scaled by fscale) of * the current Jacobian matrix J and the step vector p. * * sfdotJp is the dot product of the scaled f vector and the scaled * vector J*p, where the scaling uses fscale. */ sJpnorm = N_VWL2Norm(b,fscale); N_VProd(b, fscale, b); N_VProd(b, fscale, b); sfdotJp = N_VDotProd(fval, b); last_flag = KINDLS_SUCCESS; return(0); }
static int kinDenseSolve(KINMem kin_mem, N_Vector x, N_Vector b, realtype *sJpnorm, realtype *sFdotJp) { KINDlsMem kindls_mem; realtype *xd; kindls_mem = (KINDlsMem) lmem; /* Copy the right-hand side into x */ N_VScale(ONE, b, x); xd = N_VGetArrayPointer(x); /* Back-solve and get solution in x */ DenseGETRS(J, lpivots, xd); /* Compute the term sFdotJp for use in the linesearch routine. This term is subsequently corrected if the step is reduced by constraints or the linesearch. sFdotJp is the dot product of the scaled f vector and the scaled vector J*p, where the scaling uses fscale. */ N_VProd(b, fscale, b); N_VProd(b, fscale, b); *sFdotJp = N_VDotProd(fval, b); last_flag = KINDLS_SUCCESS; return(0); }
/* Inefficient kludge for determining the number of entries in a N_Vector object (replace if such a routine is ever added to the N_Vector API). Returns "-1" on an error. */ sunindextype GlobalVectorLength_LapDense(N_Vector y) { realtype len; N_Vector tmp = NULL; tmp = N_VClone(y); if (tmp == NULL) return(-1); N_VConst(ONE, tmp); len = N_VDotProd(tmp, tmp); N_VDestroy(tmp); return( (sunindextype) len ); }
realtype N_VDotProd_SensWrapper(N_Vector x, N_Vector y) { int i; realtype sum; sum = ZERO; for (i=0; i < NV_NVECS_SW(x); i++) sum += N_VDotProd(NV_VEC_SW(x,i), NV_VEC_SW(y,i)); return(sum); }
static int KINDenseSolve(KINMem kin_mem, N_Vector x, N_Vector b, realtype *res_norm) { KINDenseMem kindense_mem; realtype *xd; kindense_mem = (KINDenseMem) lmem; /* Copy the right-hand side into x */ N_VScale(ONE, b, x); xd = N_VGetArrayPointer(x); /* Back-solve and get solution in x */ DenseGETRS(J, pivots, xd); /* Compute the terms Jpnorm and sfdotJp for use in the global strategy routines and in KINForcingTerm. Both of these terms are subsequently corrected if the step is reduced by constraints or the line search. sJpnorm is the norm of the scaled product (scaled by fscale) of the current Jacobian matrix J and the step vector p. sfdotJp is the dot product of the scaled f vector and the scaled vector J*p, where the scaling uses fscale. */ sJpnorm = N_VWL2Norm(b,fscale); N_VProd(b, fscale, b); N_VProd(b, fscale, b); sfdotJp = N_VDotProd(fval, b); last_flag = KINDENSE_SUCCESS; return(0); }
int CVSpbcg(void *cvode_mem, int pretype, int maxl) { CVodeMem cv_mem; CVSpilsMem cvspils_mem; SpbcgMem spbcg_mem; int mxl; /* Return immediately if cvode_mem is NULL */ if (cvode_mem == NULL) { CVProcessError(NULL, CVSPILS_MEM_NULL, "CVSPBCG", "CVSpbcg", MSGS_CVMEM_NULL); return(CVSPILS_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; /* Check if N_VDotProd is present */ if (vec_tmpl->ops->nvdotprod == NULL) { CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_NVECTOR); return(CVSPILS_ILL_INPUT); } if (lfree != NULL) lfree(cv_mem); /* Set four main function fields in cv_mem */ linit = CVSpbcgInit; lsetup = CVSpbcgSetup; lsolve = CVSpbcgSolve; lfree = CVSpbcgFree; /* Get memory for CVSpilsMemRec */ cvspils_mem = NULL; cvspils_mem = (CVSpilsMem) malloc(sizeof(CVSpilsMemRec)); if (cvspils_mem == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); return(CVSPILS_MEM_FAIL); } /* Set ILS type */ cvspils_mem->s_type = SPILS_SPBCG; /* Set Spbcg parameters that have been passed in call sequence */ cvspils_mem->s_pretype = pretype; mxl = cvspils_mem->s_maxl = (maxl <= 0) ? CVSPILS_MAXL : maxl; /* Set default values for the rest of the Spbcg parameters */ cvspils_mem->s_delt = CVSPILS_DELT; cvspils_mem->s_P_data = NULL; cvspils_mem->s_pset = NULL; cvspils_mem->s_psolve = NULL; cvspils_mem->s_jtimes = CVSpilsDQJtimes; cvspils_mem->s_j_data = cvode_mem; cvspils_mem->s_last_flag = CVSPILS_SUCCESS; setupNonNull = FALSE; /* Check for legal pretype */ if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) { CVProcessError(cv_mem, CVSPILS_ILL_INPUT, "CVSPBCG", "CVSpbcg", MSGS_BAD_PRETYPE); return(CVSPILS_ILL_INPUT); } /* Allocate memory for ytemp and x */ ytemp = NULL; ytemp = N_VClone(vec_tmpl); if (ytemp == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } x = NULL; x = N_VClone(vec_tmpl); if (x == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } /* Compute sqrtN from a dot product */ N_VConst(ONE, ytemp); sqrtN = RSqrt(N_VDotProd(ytemp, ytemp)); /* Call SpbcgMalloc to allocate workspace for Spbcg */ spbcg_mem = NULL; spbcg_mem = SpbcgMalloc(mxl, vec_tmpl); if (spbcg_mem == NULL) { CVProcessError(cv_mem, CVSPILS_MEM_FAIL, "CVSPBCG", "CVSpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); N_VDestroy(x); free(cvspils_mem); cvspils_mem = NULL; return(CVSPILS_MEM_FAIL); } /* Attach SPBCG memory to spils memory structure */ spils_mem = (void *) spbcg_mem; /* Attach linear solver memory to integrator memory */ lmem = cvspils_mem; return(CVSPILS_SUCCESS); }
int CVSpgmr(void *cvode_mem, int pretype, int maxl) { CVodeMem cv_mem; CVSpgmrMem cvspgmr_mem; int mxl; /* Return immediately if cvode_mem is NULL */ if (cvode_mem == NULL) { fprintf(stderr, MSGS_CVMEM_NULL); return(CVSPGMR_MEM_NULL); } cv_mem = (CVodeMem) cvode_mem; /* Check if N_VDotProd is present */ if(vec_tmpl->ops->nvdotprod == NULL) { if(errfp!=NULL) fprintf(errfp, MSGS_BAD_NVECTOR); return(CVSPGMR_ILL_INPUT); } if (lfree != NULL) lfree(cv_mem); /* Set four main function fields in cv_mem */ linit = CVSpgmrInit; lsetup = CVSpgmrSetup; lsolve = CVSpgmrSolve; lfree = CVSpgmrFree; /* Get memory for CVSpgmrMemRec */ cvspgmr_mem = (CVSpgmrMem) malloc(sizeof(CVSpgmrMemRec)); if (cvspgmr_mem == NULL) { if(errfp!=NULL) fprintf(errfp, MSGS_MEM_FAIL); return(CVSPGMR_MEM_FAIL); } /* Set Spgmr parameters that have been passed in call sequence */ cvspgmr_mem->g_pretype = pretype; mxl = cvspgmr_mem->g_maxl = (maxl <= 0) ? CVSPGMR_MAXL : maxl; /* Set default values for the rest of the Spgmr parameters */ cvspgmr_mem->g_gstype = MODIFIED_GS; cvspgmr_mem->g_delt = CVSPGMR_DELT; cvspgmr_mem->g_P_data = NULL; cvspgmr_mem->g_pset = NULL; cvspgmr_mem->g_psolve = NULL; cvspgmr_mem->g_jtimes = CVSpgmrDQJtimes; cvspgmr_mem->g_j_data = cvode_mem; cvspgmr_mem->g_last_flag = CVSPGMR_SUCCESS; setupNonNull = FALSE; /* Check for legal pretype */ if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) { if(errfp!=NULL) fprintf(errfp, MSGS_BAD_PRETYPE); return(CVSPGMR_ILL_INPUT); } /* Allocate memory for ytemp and x */ ytemp = N_VClone(vec_tmpl); if (ytemp == NULL) { if(errfp!=NULL) fprintf(errfp, MSGS_MEM_FAIL); return(CVSPGMR_MEM_FAIL); } x = N_VClone(vec_tmpl); if (x == NULL) { if(errfp!=NULL) fprintf(errfp, MSGS_MEM_FAIL); N_VDestroy(ytemp); return(CVSPGMR_MEM_FAIL); } /* Compute sqrtN from a dot product */ N_VConst(ONE, ytemp); sqrtN = RSqrt( N_VDotProd(ytemp, ytemp) ); /* Call SpgmrMalloc to allocate workspace for Spgmr */ spgmr_mem = SpgmrMalloc(mxl, vec_tmpl); if (spgmr_mem == NULL) { if(errfp!=NULL) fprintf(errfp, MSGS_MEM_FAIL); N_VDestroy(ytemp); N_VDestroy(x); return(CVSPGMR_MEM_FAIL); } /* Attach linear solver memory to integrator memory */ lmem = cvspgmr_mem; return(CVSPGMR_SUCCESS); }
/*--------------------------------------------------------------- ARKSpbcg: This routine initializes the memory record and sets various function fields specific to the Spbcg linear solver module. ARKSpbcg first calls the existing lfree routine if this is not NULL. It then sets the ark_linit, ark_lsetup, ark_lsolve, ark_lfree fields in (*arkode_mem) to be ARKSpbcgInit, ARKSpbcgSetup, ARKSpbcgSolve, and ARKSpbcgFree, respectively. It allocates memory for a structure of type ARKSpilsMemRec and sets the ark_lmem field in (*arkode_mem) to the address of this structure. It sets setupNonNull in (*arkode_mem), and sets various fields in the ARKSpilsMemRec structure. Finally, ARKSpbcg allocates memory for ytemp and x, and calls SpbcgMalloc to allocate memory for the Spbcg solver. ---------------------------------------------------------------*/ int ARKSpbcg(void *arkode_mem, int pretype, int maxl) { ARKodeMem ark_mem; ARKSpilsMem arkspils_mem; SpbcgMem spbcg_mem; int mxl; /* Return immediately if arkode_mem is NULL */ if (arkode_mem == NULL) { arkProcessError(NULL, ARKSPILS_MEM_NULL, "ARKSPBCG", "ARKSpbcg", MSGS_ARKMEM_NULL); return(ARKSPILS_MEM_NULL); } ark_mem = (ARKodeMem) arkode_mem; /* Check if N_VDotProd and N_VProd are present */ if ((ark_mem->ark_tempv->ops->nvdotprod == NULL) || (ark_mem->ark_tempv->ops->nvprod == NULL)) { arkProcessError(ark_mem, ARKSPILS_ILL_INPUT, "ARKSPBCG", "ARKSpbcg", MSGS_BAD_NVECTOR); return(ARKSPILS_ILL_INPUT); } if (ark_mem->ark_lfree != NULL) ark_mem->ark_lfree(ark_mem); /* Set four main function fields in ark_mem */ ark_mem->ark_linit = ARKSpbcgInit; ark_mem->ark_lsetup = ARKSpbcgSetup; ark_mem->ark_lsolve = ARKSpbcgSolve; ark_mem->ark_lfree = ARKSpbcgFree; ark_mem->ark_lsolve_type = 0; /* Get memory for ARKSpilsMemRec */ arkspils_mem = NULL; arkspils_mem = (ARKSpilsMem) malloc(sizeof(struct ARKSpilsMemRec)); if (arkspils_mem == NULL) { arkProcessError(ark_mem, ARKSPILS_MEM_FAIL, "ARKSPBCG", "ARKSpbcg", MSGS_MEM_FAIL); return(ARKSPILS_MEM_FAIL); } /* Set ILS type */ arkspils_mem->s_type = SPILS_SPBCG; /* Set Spbcg parameters that have been passed in call sequence */ arkspils_mem->s_pretype = pretype; mxl = arkspils_mem->s_maxl = (maxl <= 0) ? ARKSPILS_MAXL : maxl; /* Set defaults for Jacobian-related fields */ arkspils_mem->s_jtimesDQ = TRUE; arkspils_mem->s_jtimes = NULL; arkspils_mem->s_j_data = NULL; /* Set defaults for preconditioner-related fields */ arkspils_mem->s_pset = NULL; arkspils_mem->s_psolve = NULL; arkspils_mem->s_pfree = NULL; arkspils_mem->s_P_data = ark_mem->ark_user_data; /* Initialize counters */ arkspils_mem->s_npe = arkspils_mem->s_nli = 0; arkspils_mem->s_nps = arkspils_mem->s_ncfl = 0; arkspils_mem->s_nstlpre = arkspils_mem->s_njtimes = 0; arkspils_mem->s_nfes = 0; /* Set default values for the rest of the Spbcg parameters */ arkspils_mem->s_eplifac = ARKSPILS_EPLIN; arkspils_mem->s_last_flag = ARKSPILS_SUCCESS; ark_mem->ark_setupNonNull = FALSE; /* Check for legal pretype */ if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) { arkProcessError(ark_mem, ARKSPILS_ILL_INPUT, "ARKSPBCG", "ARKSpbcg", MSGS_BAD_PRETYPE); free(arkspils_mem); arkspils_mem = NULL; return(ARKSPILS_ILL_INPUT); } /* Allocate memory for ytemp and x */ arkspils_mem->s_ytemp = N_VClone(ark_mem->ark_tempv); if (arkspils_mem->s_ytemp == NULL) { arkProcessError(ark_mem, ARKSPILS_MEM_FAIL, "ARKSPBCG", "ARKSpbcg", MSGS_MEM_FAIL); free(arkspils_mem); arkspils_mem = NULL; return(ARKSPILS_MEM_FAIL); } arkspils_mem->s_x = N_VClone(ark_mem->ark_tempv); if (arkspils_mem->s_x == NULL) { arkProcessError(ark_mem, ARKSPILS_MEM_FAIL, "ARKSPBCG", "ARKSpbcg", MSGS_MEM_FAIL); N_VDestroy(arkspils_mem->s_ytemp); free(arkspils_mem); arkspils_mem = NULL; return(ARKSPILS_MEM_FAIL); } /* Compute sqrtN from a dot product */ N_VConst(ONE, arkspils_mem->s_ytemp); arkspils_mem->s_sqrtN = SUNRsqrt(N_VDotProd(arkspils_mem->s_ytemp, arkspils_mem->s_ytemp)); /* Call SpbcgMalloc to allocate workspace for Spbcg */ spbcg_mem = NULL; spbcg_mem = SpbcgMalloc(mxl, ark_mem->ark_tempv); if (spbcg_mem == NULL) { arkProcessError(ark_mem, ARKSPILS_MEM_FAIL, "ARKSPBCG", "ARKSpbcg", MSGS_MEM_FAIL); N_VDestroy(arkspils_mem->s_ytemp); N_VDestroy(arkspils_mem->s_x); free(arkspils_mem); arkspils_mem = NULL; return(ARKSPILS_MEM_FAIL); } /* Attach SPBCG memory to spils memory structure */ arkspils_mem->s_spils_mem = (void *) spbcg_mem; /* Attach linear solver memory to integrator memory */ ark_mem->ark_lmem = arkspils_mem; return(ARKSPILS_SUCCESS); }
static int KINSptfqmrSolve(KINMem kin_mem, N_Vector xx, N_Vector bb, realtype *sJpnorm, realtype *sFdotJp) { KINSpilsMem kinspils_mem; SptfqmrMem sptfqmr_mem; int ret, nli_inc, nps_inc; realtype res_norm; kinspils_mem = (KINSpilsMem) lmem; sptfqmr_mem = (SptfqmrMem) spils_mem; /* Set initial guess to xx = 0. bb is set, by the routine calling KINSptfqmrSolve, to the RHS vector for the system to be solved. */ N_VConst(ZERO, xx); new_uu = TRUE; /* set flag required for user Jacobian routine */ /* call SptfqmrSolve */ ret = SptfqmrSolve(sptfqmr_mem, kin_mem, xx, bb, pretype, eps, kin_mem, fscale, fscale, KINSpilsAtimes, KINSpilsPSolve, &res_norm, &nli_inc, &nps_inc); /* increment counters nli, nps, and ncfl (nni is updated in the KINSol main iteration loop) */ nli = nli + (long int) nli_inc; nps = nps + (long int) nps_inc; if (printfl > 2) KINPrintInfo(kin_mem, PRNT_NLI, "KINSPTFQMR", "KINSptfqmrSolve", INFO_NLI, nli_inc); if (ret != 0) ncfl++; last_flag = ret; if ( (ret != 0) && (ret != SPTFQMR_RES_REDUCED) ) { /* Handle all failure returns from SptfqmrSolve */ switch(ret) { case SPTFQMR_PSOLVE_FAIL_REC: case SPTFQMR_ATIMES_FAIL_REC: return(1); break; case SPTFQMR_CONV_FAIL: case SPTFQMR_MEM_NULL: case SPTFQMR_ATIMES_FAIL_UNREC: case SPTFQMR_PSOLVE_FAIL_UNREC: return(-1); break; } } /* SptfqmrSolve returned either SPTFQMR_SUCCESS or SPTFQMR_RES_REDUCED. Compute the terms sJpnorm and sFdotJp for use in the linesearch routine and in KINForcingTerm. Both of these terms are subsequently corrected if the step is reduced by constraints or the linesearch. sJpnorm is the norm of the scaled product (scaled by fscale) of the current Jacobian matrix J and the step vector p (= solution vector xx). sFdotJp is the dot product of the scaled f vector and the scaled vector J*p, where the scaling uses fscale. */ ret = KINSpilsAtimes(kin_mem, xx, bb); if (ret > 0) { last_flag = SPTFQMR_ATIMES_FAIL_REC; return(1); } else if (ret < 0) { last_flag = SPTFQMR_ATIMES_FAIL_UNREC; return(-1); } *sJpnorm = N_VWL2Norm(bb, fscale); N_VProd(bb, fscale, bb); N_VProd(bb, fscale, bb); *sFdotJp = N_VDotProd(fval, bb); if (printfl > 2) KINPrintInfo(kin_mem, PRNT_EPS, "KINSPTFQMR", "KINSptfqmrSolve", INFO_EPS, res_norm, eps); return(0); }
int SUNLinSolSolve_SPBCGS(SUNLinearSolver S, SUNMatrix A, N_Vector x, N_Vector b, realtype delta) { /* local data and shortcut variables */ realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho; N_Vector r_star, r, p, q, u, Ap, vtemp; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; int l, l_max, ier; void *A_data, *P_data; N_Vector sx, sb; ATimesFn atimes; PSolveFn psolve; realtype *res_norm; int *nli; /* local variables for fused vector operations */ realtype cv[3]; N_Vector Xv[3]; /* Make local shorcuts to solver variables. */ if (S == NULL) return(SUNLS_MEM_NULL); l_max = SPBCGS_CONTENT(S)->maxl; r_star = SPBCGS_CONTENT(S)->r_star; r = SPBCGS_CONTENT(S)->r; p = SPBCGS_CONTENT(S)->p; q = SPBCGS_CONTENT(S)->q; u = SPBCGS_CONTENT(S)->u; Ap = SPBCGS_CONTENT(S)->Ap; vtemp = SPBCGS_CONTENT(S)->vtemp; sb = SPBCGS_CONTENT(S)->s1; sx = SPBCGS_CONTENT(S)->s2; A_data = SPBCGS_CONTENT(S)->ATData; P_data = SPBCGS_CONTENT(S)->PData; atimes = SPBCGS_CONTENT(S)->ATimes; psolve = SPBCGS_CONTENT(S)->Psolve; nli = &(SPBCGS_CONTENT(S)->numiters); res_norm = &(SPBCGS_CONTENT(S)->resnorm); /* Initialize counters and convergence flag */ *nli = 0; converged = SUNFALSE; /* set booleantype flags for internal solver options */ preOnLeft = ( (PRETYPE(S) == PREC_LEFT) || (PRETYPE(S) == PREC_BOTH) ); preOnRight = ( (PRETYPE(S) == PREC_RIGHT) || (PRETYPE(S) == PREC_BOTH) ); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star = r_0 */ if (preOnLeft) { ier = psolve(P_data, r_star, r, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, r_star, r); if (scale_b) N_VProd(sb, r, r_star); else N_VScale(ONE, r, r_star); /* Initialize beta_denom to the dot product of r0 with r0 */ beta_denom = N_VDotProd(r_star, r_star); /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and return if small */ *res_norm = r_norm = rho = SUNRsqrt(beta_denom); if (r_norm <= delta) { LASTFLAG(S) = SUNLS_SUCCESS; return(LASTFLAG(S)); } /* Copy r_star to r and p */ N_VScale(ONE, r_star, r); N_VScale(ONE, r_star, p); /* Begin main iteration loop */ for(l = 0; l < l_max; l++) { (*nli)++; /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv p */ if (scale_x) N_VDiv(p, sx, vtemp); else N_VScale(ONE, p, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv p */ if (preOnRight) { N_VScale(ONE, vtemp, Ap); ier = psolve(P_data, Ap, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } /* Apply A: Ap = A P2_inv sx_inv p */ ier = atimes(A_data, vtemp, Ap ); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, Ap, vtemp, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, Ap, vtemp); /* Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */ if (scale_b) N_VProd(sb, vtemp, Ap); else N_VScale(ONE, vtemp, Ap); /* Calculate alpha = <r,r_star>/<Ap,r_star> */ alpha = ((beta_denom / N_VDotProd(Ap, r_star))); /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */ N_VLinearSum(ONE, r, -alpha, Ap, q); /* Generate u = A-tilde q */ /* Apply x-scaling: vtemp = sx_inv q */ if (scale_x) N_VDiv(q, sx, vtemp); else N_VScale(ONE, q, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv q */ if (preOnRight) { N_VScale(ONE, vtemp, u); ier = psolve(P_data, u, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } /* Apply A: u = A P2_inv sx_inv u */ ier = atimes(A_data, vtemp, u ); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, u, vtemp, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, u, vtemp); /* Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */ if (scale_b) N_VProd(sb, vtemp, u); else N_VScale(ONE, vtemp, u); /* Calculate omega = <u,q>/<u,u> */ omega_denom = N_VDotProd(u, u); if (omega_denom == ZERO) omega_denom = ONE; omega = (N_VDotProd(u, q) / omega_denom); /* Update x = x + alpha*p + omega*q */ cv[0] = ONE; Xv[0] = x; cv[1] = alpha; Xv[1] = p; cv[2] = omega; Xv[2] = q; ier = N_VLinearCombination(3, cv, Xv, x); if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR); /* Update the residual r = q - omega*u */ N_VLinearSum(ONE, q, -omega, u, r); /* Set rho = norm(r) and check convergence */ *res_norm = rho = SUNRsqrt(N_VDotProd(r, r)); if (rho <= delta) { converged = SUNTRUE; break; } /* Not yet converged, continue iteration */ /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */ beta_num = N_VDotProd(r, r_star); beta = ((beta_num / beta_denom) * (alpha / omega)); /* Update p = r + beta*(p - omega*Ap) = beta*p - beta*omega*Ap + r */ cv[0] = beta; Xv[0] = p; cv[1] = -alpha*(beta_num / beta_denom); Xv[1] = Ap; cv[2] = ONE; Xv[2] = r; ier = N_VLinearCombination(3, cv, Xv, p); if (ier != SUNLS_SUCCESS) return(SUNLS_VECTOROP_ERR); /* udpate beta_denom for next iteration */ beta_denom = beta_num; } /* Main loop finished */ if ((converged == SUNTRUE) || (rho < r_norm)) { /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */ if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp, delta, PREC_RIGHT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } N_VScale(ONE, vtemp, x); } if (converged == SUNTRUE) LASTFLAG(S) = SUNLS_SUCCESS; else LASTFLAG(S) = SUNLS_RES_REDUCED; return(LASTFLAG(S)); } else { LASTFLAG(S) = SUNLS_CONV_FAIL; return(LASTFLAG(S)); } }
int IDASpbcg(void *ida_mem, int maxl) { IDAMem IDA_mem; IDASpilsMem idaspils_mem; SpbcgMem spbcg_mem; int flag, maxl1; /* Return immediately if ida_mem is NULL */ if (ida_mem == NULL) { IDAProcessError(NULL, IDASPILS_MEM_NULL, "IDASPBCG", "IDASpbcg", MSGS_IDAMEM_NULL); return(IDASPILS_MEM_NULL); } IDA_mem = (IDAMem) ida_mem; /* Check if N_VDotProd is present */ if (vec_tmpl->ops->nvdotprod == NULL) { IDAProcessError(NULL, IDASPILS_ILL_INPUT, "IDASPBCG", "IDASpbcg", MSGS_BAD_NVECTOR); return(IDASPILS_ILL_INPUT); } if (lfree != NULL) flag = lfree((IDAMem) ida_mem); /* Set five main function fields in ida_mem */ linit = IDASpbcgInit; lsetup = IDASpbcgSetup; lsolve = IDASpbcgSolve; lperf = IDASpbcgPerf; lfree = IDASpbcgFree; /* Get memory for IDASpilsMemRec */ idaspils_mem = NULL; idaspils_mem = (IDASpilsMem) malloc(sizeof(struct IDASpilsMemRec)); if (idaspils_mem == NULL) { IDAProcessError(NULL, IDASPILS_MEM_FAIL, "IDASPBCG", "IDASpbcg", MSGS_MEM_FAIL); return(IDASPILS_MEM_FAIL); } /* Set ILS type */ idaspils_mem->s_type = SPILS_SPBCG; /* Set SPBCG parameters that were passed in call sequence */ maxl1 = (maxl <= 0) ? IDA_SPILS_MAXL : maxl; idaspils_mem->s_maxl = maxl1; /* Set defaults for Jacobian-related fileds */ jtimesDQ = TRUE; jtimes = NULL; jdata = NULL; /* Set defaults for preconditioner-related fields */ idaspils_mem->s_pset = NULL; idaspils_mem->s_psolve = NULL; idaspils_mem->s_pfree = NULL; idaspils_mem->s_pdata = IDA_mem->ida_user_data; /* Set default values for the rest of the Spbcg parameters */ idaspils_mem->s_eplifac = PT05; idaspils_mem->s_dqincfac = ONE; idaspils_mem->s_last_flag = IDASPILS_SUCCESS; /* Set setupNonNull to FALSE */ setupNonNull = FALSE; /* Allocate memory for ytemp, yptemp, and xx */ ytemp = N_VClone(vec_tmpl); if (ytemp == NULL) { IDAProcessError(NULL, IDASPILS_MEM_FAIL, "IDASPBCG", "IDASpbcg", MSGS_MEM_FAIL); free(idaspils_mem); idaspils_mem = NULL; return(IDASPILS_MEM_FAIL); } yptemp = N_VClone(vec_tmpl); if (yptemp == NULL) { IDAProcessError(NULL, IDASPILS_MEM_FAIL, "IDASPBCG", "IDASpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); free(idaspils_mem); idaspils_mem = NULL; return(IDASPILS_MEM_FAIL); } xx = N_VClone(vec_tmpl); if (xx == NULL) { IDAProcessError(NULL, IDASPILS_MEM_FAIL, "IDASPBCG", "IDASpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); N_VDestroy(yptemp); free(idaspils_mem); idaspils_mem = NULL; return(IDASPILS_MEM_FAIL); } /* Compute sqrtN from a dot product */ N_VConst(ONE, ytemp); sqrtN = SUNRsqrt(N_VDotProd(ytemp, ytemp)); /* Call SpbcgMalloc to allocate workspace for Spbcg */ spbcg_mem = NULL; spbcg_mem = SpbcgMalloc(maxl1, vec_tmpl); if (spbcg_mem == NULL) { IDAProcessError(NULL, IDASPILS_MEM_FAIL, "IDASPBCG", "IDASpbcg", MSGS_MEM_FAIL); N_VDestroy(ytemp); N_VDestroy(yptemp); N_VDestroy(xx); free(idaspils_mem); idaspils_mem = NULL; return(IDASPILS_MEM_FAIL); } /* Attach SPBCG memory to spils memory structure */ spils_mem = (void *)spbcg_mem; /* Attach linear solver memory to the integrator memory */ lmem = idaspils_mem; return(IDASPILS_SUCCESS); }
int SpgmrSolve(SpgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, real delta, int max_restarts, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, real *res_norm, int *nli, int *nps) { N_Vector *V, xcor, vtemp; real **Hes, *givens, *yg; real s_r0_norm, beta, rotation_product, r_norm, s_product, rho; boole preOnLeft, preOnRight, scale_x, scale_b, converged; int i, j, k, l, l_plus_1, l_max, krydim, ier, ntries; if (mem == NULL) return(SPGMR_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; V = mem->V; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize converged flag */ if (max_restarts < 0) max_restarts = 0; if ((pretype != LEFT) && (pretype != RIGHT) && (pretype != BOTH)) pretype = NONE; preOnLeft = ((pretype == LEFT) || (pretype == BOTH)); preOnRight = ((pretype == RIGHT) || (pretype == BOTH)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set vtemp and V[0] to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { if (atimes(A_data, x, vtemp) != 0) return(SPGMR_ATIMES_FAIL); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } N_VScale(ONE, vtemp, V[0]); /* Apply b-scaling to vtemp, get L2 norm of sb r_0, and return if small */ /* if (scale_b) N_VProd(sb, vtemp, vtemp); s_r0_norm = RSqrt(N_VDotProd(vtemp, vtemp)); if (s_r0_norm <= delta) return(SPGMR_SUCCESS); */ /* Apply left preconditioner and b-scaling to V[0] = r_0 */ if (preOnLeft) { ier = psolve(P_data, V[0], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[0], vtemp); } if (scale_b) { N_VProd(sb, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = sb P1_inv r_0, and return if small */ *res_norm = r_norm = beta = RSqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPGMR_SUCCESS); /* Set xcor = 0 */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts */ for (ntries = 0; ntries <= max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i <= l_max; i++) for (j=0; j < l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis */ for(l=0; l < l_max; l++) { (*nli)++; krydim = l_plus_1 = l + 1; /* Generate A-tilde V[l], where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv V[l] */ if (scale_x) { N_VDiv(V[l], sx, vtemp); } else { N_VScale(ONE, V[l], vtemp); } /* Apply right precoditioner: vtemp = P2_inv sx_inv V[l] */ N_VScale(ONE, vtemp, V[l_plus_1]); if (preOnRight) { ier = psolve(P_data, V[l_plus_1], vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } /* Apply A: V[l+1] = A P2_inv sx_inv V[l] */ if (atimes(A_data, vtemp, V[l_plus_1] ) != 0) return(SPGMR_ATIMES_FAIL); /* Apply left preconditioning: vtemp = P1_inv A P2_inv sx_inv V[l] */ if (preOnLeft) { ier = psolve(P_data, V[l_plus_1], vtemp, LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, V[l_plus_1], vtemp); } /* Apply b-scaling: V[l+1] = sb P1_inv A P2_inv sx_inv V[l] */ if (scale_b) { N_VProd(sb, vtemp, V[l_plus_1]); } else { N_VScale(ONE, vtemp, V[l_plus_1]); } /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l]), vtemp, yg) != 0) return(SPGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l_plus_1, l_max, &(Hes[l_plus_1][l])) != 0) return(SPGMR_GS_FAIL); } /* Update the QR factorization of Hes */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes */ rotation_product *= givens[2*l+1]; if ((*res_norm = rho = ABS(rotation_product*r_norm)) <= delta) { converged = TRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt */ N_VScale(ONE/Hes[l_plus_1][l], V[l_plus_1], V[l_plus_1]); } /* Inner loop is done. Compute the new correction vector xcor */ /* Construct g, then solve for y */ yg[0] = r_norm; for (i=1; i <= krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPGMR_QRSOL_FAIL); /* Add correction vector V_l y to xcor */ for (k=0; k < krydim; k++) N_VLinearSum(yg[k], V[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x */ if (converged) { /* Apply x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add correction to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart */ if (ntries == max_restarts) break; /* Construct last column of Q in yg */ s_product = ONE; for (i=krydim; i > 0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg */ r_norm *= s_product; for (i=0; i <= krydim; i++) yg[i] *= r_norm; r_norm = ABS(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart */ N_VScale(yg[0], V[0], V[0]); for( k=1; k <= krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { /* Apply the x-scaling and right precond.: vtemp = P2_inv sx_inv xcor */ if (scale_x) N_VDiv(xcor, sx, xcor); if (preOnRight) { ier = psolve(P_data, xcor, vtemp, RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPGMR_PSOLVE_FAIL_UNREC : SPGMR_PSOLVE_FAIL_REC); } else { N_VScale(ONE, xcor, vtemp); } /* Add vtemp to initial x to get final solution x, and return */ N_VLinearSum(ONE, x, ONE, vtemp, x); return(SPGMR_RES_REDUCED); } return(SPGMR_CONV_FAIL); }
static int KINSpbcgSolve(KINMem kin_mem, N_Vector xx, N_Vector bb, realtype *res_norm) { KINSpilsMem kinspils_mem; SpbcgMem spbcg_mem; int ret, nli_inc, nps_inc; kinspils_mem = (KINSpilsMem) lmem; spbcg_mem = (SpbcgMem) spils_mem; /* Set initial guess to xx = 0. bb is set, by the routine calling KINSpbcgSolve, to the RHS vector for the system to be solved. */ N_VConst(ZERO, xx); new_uu = TRUE; /* set flag required for user Jacobian routine */ /* call SpbcgSolve */ ret = SpbcgSolve(spbcg_mem, kin_mem, xx, bb, pretype, eps, kin_mem, fscale, fscale, KINSpilsAtimes, KINSpilsPSolve, res_norm, &nli_inc, &nps_inc); /* increment counters nli, nps, and ncfl (nni is updated in the KINSol main iteration loop) */ nli = nli + (long int) nli_inc; nps = nps + (long int) nps_inc; if (printfl > 2) KINPrintInfo(kin_mem, PRNT_NLI, "KINSPBCG", "KINSpbcgSolve", INFO_NLI, nli_inc); if (ret != 0) ncfl++; /* Compute the terms sJpnorm and sfdotJp for use in the global strategy routines and in KINForcingTerm. Both of these terms are subsequently corrected if the step is reduced by constraints or the line search. sJpnorm is the norm of the scaled product (scaled by fscale) of the current Jacobian matrix J and the step vector p. sfdotJp is the dot product of the scaled f vector and the scaled vector J*p, where the scaling uses fscale. */ ret = KINSpilsAtimes(kin_mem, xx, bb); if (ret == 0) ret = SPBCG_SUCCESS; else if (ret > 0) ret = SPBCG_ATIMES_FAIL_REC; else if (ret < 0) ret = SPBCG_ATIMES_FAIL_UNREC; sJpnorm = N_VWL2Norm(bb,fscale); N_VProd(bb, fscale, bb); N_VProd(bb, fscale, bb); sfdotJp = N_VDotProd(fval, bb); if (printfl > 2) KINPrintInfo(kin_mem, PRNT_EPS, "KINSPBCG", "KINSpbcgSolve", INFO_EPS, *res_norm, eps); /* Interpret return value from SpbcgSolve */ last_flag = ret; switch(ret) { case SPBCG_SUCCESS: case SPBCG_RES_REDUCED: return(0); break; case SPBCG_PSOLVE_FAIL_REC: case SPBCG_ATIMES_FAIL_REC: return(1); break; case SPBCG_CONV_FAIL: case SPBCG_MEM_NULL: case SPBCG_ATIMES_FAIL_UNREC: case SPBCG_PSOLVE_FAIL_UNREC: return(-1); break; } return(0); }
int CPSptfqmr(void *cpode_mem, int pretype, int maxl) { CPodeMem cp_mem; CPSpilsMem cpspils_mem; SptfqmrMem sptfqmr_mem; int mxl; /* Return immediately if cpode_mem is NULL */ if (cpode_mem == NULL) { cpProcessError(NULL, CPSPILS_MEM_NULL, "CPSPTFQMR", "CPSptfqmr", MSGS_CPMEM_NULL); return(CPSPILS_MEM_NULL); } cp_mem = (CPodeMem) cpode_mem; /* Check if N_VDotProd is present */ if (vec_tmpl->ops->nvdotprod == NULL) { cpProcessError(cp_mem, CPSPILS_ILL_INPUT, "CPSPTFQMR", "CPSptfqmr", MSGS_BAD_NVECTOR); return(CPSPILS_ILL_INPUT); } if (lfree != NULL) lfree(cp_mem); /* Set four main function fields in cp_mem */ linit = cpSptfqmrInit; lsetup = cpSptfqmrSetup; lsolve = cpSptfqmrSolve; lfree = cpSptfqmrFree; /* Get memory for CPSpilsMemRec */ cpspils_mem = NULL; cpspils_mem = (CPSpilsMem) malloc(sizeof(CPSpilsMemRec)); if (cpspils_mem == NULL) { cpProcessError(cp_mem, CPSPILS_MEM_FAIL, "CPSPTFQMR", "CPSptfqmr", MSGS_MEM_FAIL); return(CPSPILS_MEM_FAIL); } /* Set ILS type */ cpspils_mem->s_type = SPILS_SPTFQMR; /* Set Sptfqmr parameters that have been passed in call sequence */ cpspils_mem->s_pretype = pretype; mxl = cpspils_mem->s_maxl = (maxl <= 0) ? CPSPILS_MAXL : maxl; /* Set default values for the rest of the Sptfqmr parameters */ cpspils_mem->s_delt = CPSPILS_DELT; cpspils_mem->s_psetE = NULL; cpspils_mem->s_psetI = NULL; cpspils_mem->s_pslvE = NULL; cpspils_mem->s_pslvI = NULL; cpspils_mem->s_jtvE = NULL; cpspils_mem->s_jtvI = NULL; cpspils_mem->s_P_data = NULL; cpspils_mem->s_j_data = NULL; cpspils_mem->s_last_flag = CPSPILS_SUCCESS; lsetup_exists = FALSE; /* Check for legal pretype */ if ((pretype != PREC_NONE) && (pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) { cpProcessError(cp_mem, CPSPILS_ILL_INPUT, "CPSPTFQMR", "CPSptfqmr", MSGS_BAD_PRETYPE); free(cpspils_mem); return(CPSPILS_ILL_INPUT); } /* Alocate memory */ sptfqmr_mem = NULL; ytemp = NULL; yptemp = NULL; x = NULL; /* Call SptfqmrMalloc to allocate workspace for Sptfqmr */ sptfqmr_mem = SptfqmrMalloc(mxl, vec_tmpl); if (sptfqmr_mem == NULL) { cpProcessError(cp_mem, CPSPILS_MEM_FAIL, "CPSPTFQMR", "CPSptfqmr", MSGS_MEM_FAIL); free(cpspils_mem); return(CPSPILS_MEM_FAIL); } /* Allocate memory for x, ytemp and (if needed) yptemp */ x = N_VClone(vec_tmpl); if (x == NULL) { cpProcessError(cp_mem, CPSPILS_MEM_FAIL, "CPSPTFQMR", "CPSptfqmr", MSGS_MEM_FAIL); SptfqmrFree(sptfqmr_mem); free(cpspils_mem); return(CPSPILS_MEM_FAIL); } ytemp = N_VClone(vec_tmpl); if (ytemp == NULL) { cpProcessError(cp_mem, CPSPILS_MEM_FAIL, "CPSPTFQMR", "CPSptfqmr", MSGS_MEM_FAIL); SptfqmrFree(sptfqmr_mem); N_VDestroy(x); free(cpspils_mem); return(CPSPILS_MEM_FAIL); } if (ode_type == CP_IMPL) { yptemp = N_VClone(vec_tmpl); if (yptemp == NULL) { cpProcessError(cp_mem, CPSPILS_MEM_FAIL, "CPSPTFQMR", "CPSptfqmr", MSGS_MEM_FAIL); SptfqmrFree(sptfqmr_mem); N_VDestroy(x); N_VDestroy(ytemp); free(cpspils_mem); return(CPSPILS_MEM_FAIL); } } /* Compute sqrtN from a dot product */ N_VConst(ONE, ytemp); sqrtN = RSqrt(N_VDotProd(ytemp, ytemp)); /* Attach SPTFQMR memory to spils memory structure */ spils_mem = (void *) sptfqmr_mem; /* Attach linear solver memory to integrator memory */ lmem = cpspils_mem; return(CPSPILS_SUCCESS); }
int SUNLinSolSolve_PCG(SUNLinearSolver S, SUNMatrix nul, N_Vector x, N_Vector b, realtype delta) { /* local data and shortcut variables */ realtype alpha, beta, r0_norm, rho, rz, rz_old; N_Vector r, p, z, Ap, w; booleantype UsePrec, UseScaling, converged; int l, l_max, pretype, ier; void *A_data, *P_data; ATimesFn atimes; PSolveFn psolve; realtype *res_norm; int *nli; /* Make local shorcuts to solver variables. */ if (S == NULL) return(SUNLS_MEM_NULL); l_max = PCG_CONTENT(S)->maxl; r = PCG_CONTENT(S)->r; p = PCG_CONTENT(S)->p; z = PCG_CONTENT(S)->z; Ap = PCG_CONTENT(S)->Ap; w = PCG_CONTENT(S)->s; A_data = PCG_CONTENT(S)->ATData; P_data = PCG_CONTENT(S)->PData; atimes = PCG_CONTENT(S)->ATimes; psolve = PCG_CONTENT(S)->Psolve; pretype = PCG_CONTENT(S)->pretype; nli = &(PCG_CONTENT(S)->numiters); res_norm = &(PCG_CONTENT(S)->resnorm); /* Initialize counters and convergence flag */ *nli = 0; converged = SUNFALSE; /* set booleantype flags for internal solver options */ UsePrec = ( (pretype == PREC_BOTH) || (pretype == PREC_LEFT) || (pretype == PREC_RIGHT) ); UseScaling = (w != NULL); /* Set r to initial residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r); else { ier = atimes(A_data, x, r); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } N_VLinearSum(ONE, b, -ONE, r, r); } /* Set rho to scaled L2 norm of r, and return if small */ if (UseScaling) N_VProd(r, w, Ap); else N_VScale(ONE, r, Ap); *res_norm = r0_norm = rho = SUNRsqrt(N_VDotProd(Ap, Ap)); if (rho <= delta) { LASTFLAG(S) = SUNLS_SUCCESS; return(LASTFLAG(S)); } /* Apply preconditioner and b-scaling to r = r_0 */ if (UsePrec) { ier = psolve(P_data, r, z, delta, PREC_LEFT); /* z = P^{-1}r */ if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, r, z); /* Initialize rz to <r,z> */ rz = N_VDotProd(r, z); /* Copy z to p */ N_VScale(ONE, z, p); /* Begin main iteration loop */ for(l=0; l<l_max; l++) { /* increment counter */ (*nli)++; /* Generate Ap = A*p */ ier = atimes(A_data, p, Ap); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_ATIMES_FAIL_UNREC : SUNLS_ATIMES_FAIL_REC; return(LASTFLAG(S)); } /* Calculate alpha = <r,z> / <Ap,p> */ alpha = rz / N_VDotProd(Ap, p); /* Update x = x + alpha*p */ N_VLinearSum(ONE, x, alpha, p, x); /* Update r = r - alpha*Ap */ N_VLinearSum(ONE, r, -alpha, Ap, r); /* Set rho and check convergence */ if (UseScaling) N_VProd(r, w, Ap); else N_VScale(ONE, r, Ap); *res_norm = rho = SUNRsqrt(N_VDotProd(Ap, Ap)); if (rho <= delta) { converged = SUNTRUE; break; } /* Apply preconditioner: z = P^{-1}*r */ if (UsePrec) { ier = psolve(P_data, r, z, delta, PREC_LEFT); if (ier != 0) { LASTFLAG(S) = (ier < 0) ? SUNLS_PSOLVE_FAIL_UNREC : SUNLS_PSOLVE_FAIL_REC; return(LASTFLAG(S)); } } else N_VScale(ONE, r, z); /* update rz */ rz_old = rz; rz = N_VDotProd(r, z); /* Calculate beta = <r,z> / <r_old,z_old> */ beta = rz / rz_old; /* Update p = z + beta*p */ N_VLinearSum(ONE, z, beta, p, p); } /* Main loop finished, return with result */ if (converged == SUNTRUE) { LASTFLAG(S) = SUNLS_SUCCESS; } else if (rho < r0_norm) { LASTFLAG(S) = SUNLS_RES_REDUCED; } else { LASTFLAG(S) = SUNLS_CONV_FAIL; } return(LASTFLAG(S)); }
int SpbcgSolve(SpbcgMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, realtype delta, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { realtype alpha, beta, omega, omega_denom, beta_num, beta_denom, r_norm, rho; N_Vector r_star, r, p, q, u, Ap, vtemp; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; int l, l_max, ier; if (mem == NULL) return(SPBCG_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; r_star = mem->r_star; r = mem->r; p = mem->p; q = mem->q; u = mem->u; Ap = mem->Ap; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize converged flag */ if ((pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) pretype = PREC_NONE; preOnLeft = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT)); preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star = r_0 */ if (preOnLeft) { ier = psolve(P_data, r_star, r, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, r_star, r); if (scale_b) N_VProd(sb, r, r_star); else N_VScale(ONE, r, r_star); /* Initialize beta_denom to the dot product of r0 with r0 */ beta_denom = N_VDotProd(r_star, r_star); /* Set r_norm to L2 norm of r_star = sb P1_inv r_0, and return if small */ *res_norm = r_norm = rho = SUNRsqrt(beta_denom); if (r_norm <= delta) return(SPBCG_SUCCESS); /* Copy r_star to r and p */ N_VScale(ONE, r_star, r); N_VScale(ONE, r_star, p); /* Begin main iteration loop */ for(l = 0; l < l_max; l++) { (*nli)++; /* Generate Ap = A-tilde p, where A-tilde = sb P1_inv A P2_inv sx_inv */ /* Apply x-scaling: vtemp = sx_inv p */ if (scale_x) N_VDiv(p, sx, vtemp); else N_VScale(ONE, p, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv p */ if (preOnRight) { N_VScale(ONE, vtemp, Ap); ier = psolve(P_data, Ap, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } /* Apply A: Ap = A P2_inv sx_inv p */ ier = atimes(A_data, vtemp, Ap ); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, Ap, vtemp, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, Ap, vtemp); /* Apply b-scaling: Ap = sb P1_inv A P2_inv sx_inv p */ if (scale_b) N_VProd(sb, vtemp, Ap); else N_VScale(ONE, vtemp, Ap); /* Calculate alpha = <r,r_star>/<Ap,r_star> */ alpha = ((beta_denom / N_VDotProd(Ap, r_star))); /* Update q = r - alpha*Ap = r - alpha*(sb P1_inv A P2_inv sx_inv p) */ N_VLinearSum(ONE, r, -alpha, Ap, q); /* Generate u = A-tilde q */ /* Apply x-scaling: vtemp = sx_inv q */ if (scale_x) N_VDiv(q, sx, vtemp); else N_VScale(ONE, q, vtemp); /* Apply right preconditioner: vtemp = P2_inv sx_inv q */ if (preOnRight) { N_VScale(ONE, vtemp, u); ier = psolve(P_data, u, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } /* Apply A: u = A P2_inv sx_inv u */ ier = atimes(A_data, vtemp, u ); if (ier != 0) return((ier < 0) ? SPBCG_ATIMES_FAIL_UNREC : SPBCG_ATIMES_FAIL_REC); /* Apply left preconditioner: vtemp = P1_inv A P2_inv sx_inv p */ if (preOnLeft) { ier = psolve(P_data, u, vtemp, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, u, vtemp); /* Apply b-scaling: u = sb P1_inv A P2_inv sx_inv u */ if (scale_b) N_VProd(sb, vtemp, u); else N_VScale(ONE, vtemp, u); /* Calculate omega = <u,q>/<u,u> */ omega_denom = N_VDotProd(u, u); if (omega_denom == ZERO) omega_denom = ONE; omega = (N_VDotProd(u, q) / omega_denom); /* Update x = x + alpha*p + omega*q */ N_VLinearSum(alpha, p, omega, q, vtemp); N_VLinearSum(ONE, x, ONE, vtemp, x); /* Update the residual r = q - omega*u */ N_VLinearSum(ONE, q, -omega, u, r); /* Set rho = norm(r) and check convergence */ *res_norm = rho = SUNRsqrt(N_VDotProd(r, r)); if (rho <= delta) { converged = TRUE; break; } /* Not yet converged, continue iteration */ /* Update beta = <rnew,r_star> / <rold,r_start> * alpha / omega */ beta_num = N_VDotProd(r, r_star); beta = ((beta_num / beta_denom) * (alpha / omega)); beta_denom = beta_num; /* Update p = r + beta*(p - omega*Ap) */ N_VLinearSum(ONE, p, -omega, Ap, vtemp); N_VLinearSum(ONE, r, beta, vtemp, p); } /* Main loop finished */ if ((converged == TRUE) || (rho < r_norm)) { /* Apply the x-scaling and right preconditioner: x = P2_inv sx_inv x */ if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPBCG_PSOLVE_FAIL_UNREC : SPBCG_PSOLVE_FAIL_REC); N_VScale(ONE, vtemp, x); } if (converged == TRUE) return(SPBCG_SUCCESS); else return(SPBCG_RES_REDUCED); } else return(SPBCG_CONV_FAIL); }
/* Main Program */ int main() { /* general problem parameters */ realtype T0 = RCONST(0.0); /* initial time */ realtype Tf = RCONST(1.0); /* final time */ int Nt = 10; /* total number of output times */ realtype rtol = 1.e-6; /* relative tolerance */ realtype atol = 1.e-10; /* absolute tolerance */ UserData udata = NULL; realtype *data; sunindextype N = 201; /* spatial mesh size */ realtype k = 0.5; /* heat conductivity */ sunindextype i; /* general problem variables */ int flag; /* reusable error-checking flag */ N_Vector y = NULL; /* empty vector for storing solution */ SUNLinearSolver LS = NULL; /* empty linear solver object */ void *arkode_mem = NULL; /* empty ARKode memory structure */ FILE *FID, *UFID; realtype t, dTout, tout; int iout; long int nst, nst_a, nfe, nfi, nsetups, nli, nJv, nlcf, nni, ncfn, netf; /* allocate and fill udata structure */ udata = (UserData) malloc(sizeof(*udata)); udata->N = N; udata->k = k; udata->dx = RCONST(1.0)/(1.0*N-1.0); /* mesh spacing */ /* Initial problem output */ printf("\n1D Heat PDE test problem:\n"); printf(" N = %li\n", (long int) udata->N); printf(" diffusion coefficient: k = %"GSYM"\n", udata->k); /* Initialize data structures */ y = N_VNew_Serial(N); /* Create serial vector for solution */ if (check_flag((void *) y, "N_VNew_Serial", 0)) return 1; N_VConst(0.0, y); /* Set initial conditions */ /* Call ARKStepCreate to initialize the ARK timestepper module and specify the right-hand side function in y'=f(t,y), the inital time T0, and the initial dependent variable vector y. Note: since this problem is fully implicit, we set f_E to NULL and f_I to f. */ arkode_mem = ARKStepCreate(NULL, f, T0, y); if (check_flag((void *) arkode_mem, "ARKStepCreate", 0)) return 1; /* Set routines */ flag = ARKStepSetUserData(arkode_mem, (void *) udata); /* Pass udata to user functions */ if (check_flag(&flag, "ARKStepSetUserData", 1)) return 1; flag = ARKStepSetMaxNumSteps(arkode_mem, 10000); /* Increase max num steps */ if (check_flag(&flag, "ARKStepSetMaxNumSteps", 1)) return 1; flag = ARKStepSetPredictorMethod(arkode_mem, 1); /* Specify maximum-order predictor */ if (check_flag(&flag, "ARKStepSetPredictorMethod", 1)) return 1; flag = ARKStepSStolerances(arkode_mem, rtol, atol); /* Specify tolerances */ if (check_flag(&flag, "ARKStepSStolerances", 1)) return 1; /* Initialize PCG solver -- no preconditioning, with up to N iterations */ LS = SUNLinSol_PCG(y, 0, N); if (check_flag((void *)LS, "SUNLinSol_PCG", 0)) return 1; /* Linear solver interface -- set user-supplied J*v routine (no 'jtsetup' required) */ flag = ARKStepSetLinearSolver(arkode_mem, LS, NULL); /* Attach linear solver to ARKStep */ if (check_flag(&flag, "ARKStepSetLinearSolver", 1)) return 1; flag = ARKStepSetJacTimes(arkode_mem, NULL, Jac); /* Set the Jacobian routine */ if (check_flag(&flag, "ARKStepSetJacTimes", 1)) return 1; /* Specify linearly implicit RHS, with non-time-dependent Jacobian */ flag = ARKStepSetLinear(arkode_mem, 0); if (check_flag(&flag, "ARKStepSetLinear", 1)) return 1; /* output mesh to disk */ FID=fopen("heat_mesh.txt","w"); for (i=0; i<N; i++) fprintf(FID," %.16"ESYM"\n", udata->dx*i); fclose(FID); /* Open output stream for results, access data array */ UFID=fopen("heat1D.txt","w"); data = N_VGetArrayPointer(y); /* output initial condition to disk */ for (i=0; i<N; i++) fprintf(UFID," %.16"ESYM"", data[i]); fprintf(UFID,"\n"); /* Main time-stepping loop: calls ARKStepEvolve to perform the integration, then prints results. Stops when the final time has been reached */ t = T0; dTout = (Tf-T0)/Nt; tout = T0+dTout; printf(" t ||u||_rms\n"); printf(" -------------------------\n"); printf(" %10.6"FSYM" %10.6"FSYM"\n", t, SUNRsqrt(N_VDotProd(y,y)/N)); for (iout=0; iout<Nt; iout++) { flag = ARKStepEvolve(arkode_mem, tout, y, &t, ARK_NORMAL); /* call integrator */ if (check_flag(&flag, "ARKStepEvolve", 1)) break; printf(" %10.6"FSYM" %10.6"FSYM"\n", t, SUNRsqrt(N_VDotProd(y,y)/N)); /* print solution stats */ if (flag >= 0) { /* successful solve: update output time */ tout += dTout; tout = (tout > Tf) ? Tf : tout; } else { /* unsuccessful solve: break */ fprintf(stderr,"Solver failure, stopping integration\n"); break; } /* output results to disk */ for (i=0; i<N; i++) fprintf(UFID," %.16"ESYM"", data[i]); fprintf(UFID,"\n"); } printf(" -------------------------\n"); fclose(UFID); /* Print some final statistics */ flag = ARKStepGetNumSteps(arkode_mem, &nst); check_flag(&flag, "ARKStepGetNumSteps", 1); flag = ARKStepGetNumStepAttempts(arkode_mem, &nst_a); check_flag(&flag, "ARKStepGetNumStepAttempts", 1); flag = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi); check_flag(&flag, "ARKStepGetNumRhsEvals", 1); flag = ARKStepGetNumLinSolvSetups(arkode_mem, &nsetups); check_flag(&flag, "ARKStepGetNumLinSolvSetups", 1); flag = ARKStepGetNumErrTestFails(arkode_mem, &netf); check_flag(&flag, "ARKStepGetNumErrTestFails", 1); flag = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni); check_flag(&flag, "ARKStepGetNumNonlinSolvIters", 1); flag = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncfn); check_flag(&flag, "ARKStepGetNumNonlinSolvConvFails", 1); flag = ARKStepGetNumLinIters(arkode_mem, &nli); check_flag(&flag, "ARKStepGetNumLinIters", 1); flag = ARKStepGetNumJtimesEvals(arkode_mem, &nJv); check_flag(&flag, "ARKStepGetNumJtimesEvals", 1); flag = ARKStepGetNumLinConvFails(arkode_mem, &nlcf); check_flag(&flag, "ARKStepGetNumLinConvFails", 1); printf("\nFinal Solver Statistics:\n"); printf(" Internal solver steps = %li (attempted = %li)\n", nst, nst_a); printf(" Total RHS evals: Fe = %li, Fi = %li\n", nfe, nfi); printf(" Total linear solver setups = %li\n", nsetups); printf(" Total linear iterations = %li\n", nli); printf(" Total number of Jacobian-vector products = %li\n", nJv); printf(" Total number of linear solver convergence failures = %li\n", nlcf); printf(" Total number of Newton iterations = %li\n", nni); printf(" Total number of nonlinear solver convergence failures = %li\n", ncfn); printf(" Total number of error test failures = %li\n", netf); /* Clean up and return with successful completion */ N_VDestroy(y); /* Free vectors */ free(udata); /* Free user data */ ARKStepFree(&arkode_mem); /* Free integrator memory */ SUNLinSolFree(LS); /* Free linear solver */ return 0; }
/*----------------------------------------------------------------- cvLsSolve This routine interfaces between CVode and the generic SUNLinearSolver object LS, by setting the appropriate tolerance and scaling vectors, calling the solver, and accumulating statistics from the solve for use/reporting by CVode. -----------------------------------------------------------------*/ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, N_Vector ynow, N_Vector fnow) { CVLsMem cvls_mem; realtype bnorm, deltar, delta, w_mean; int curiter, nli_inc, retval, LSType; /* access CVLsMem structure */ if (cv_mem->cv_lmem==NULL) { cvProcessError(cv_mem, CVLS_LMEM_NULL, "CVLS", "cvLsSolve", MSG_LS_LMEM_NULL); return(CVLS_LMEM_NULL); } cvls_mem = (CVLsMem) cv_mem->cv_lmem; /* Retrieve the LS type */ LSType = SUNLinSolGetType(cvls_mem->LS); /* get current nonlinear solver iteration */ retval = SUNNonlinSolGetCurIter(cv_mem->NLS, &curiter); /* If the linear solver is iterative: test norm(b), if small, return x = 0 or x = b; set linear solver tolerance (in left/right scaled 2-norm) */ if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { deltar = cvls_mem->eplifac * cv_mem->cv_tq[4]; bnorm = N_VWrmsNorm(b, weight); if (bnorm <= deltar) { if (curiter > 0) N_VConst(ZERO, b); cvls_mem->last_flag = CVLS_SUCCESS; return(cvls_mem->last_flag); } delta = deltar * cvls_mem->sqrtN; } else { delta = ZERO; } /* Set vectors ycur and fcur for use by the Atimes and Psolve interface routines */ cvls_mem->ycur = ynow; cvls_mem->fcur = fnow; /* Set initial guess x = 0 to LS */ N_VConst(ZERO, cvls_mem->x); /* Set scaling vectors for LS to use (if applicable) */ if (cvls_mem->LS->ops->setscalingvectors) { retval = SUNLinSolSetScalingVectors(cvls_mem->LS, weight, weight); if (retval != SUNLS_SUCCESS) { cvProcessError(cv_mem, CVLS_SUNLS_FAIL, "CVLS", "cvLsSolve", "Error in calling SUNLinSolSetScalingVectors"); cvls_mem->last_flag = CVLS_SUNLS_FAIL; return(cvls_mem->last_flag); } /* If solver is iterative and does not support scaling vectors, update the tolerance in an attempt to account for weight vector. We make the following assumptions: 1. w_i = w_mean, for i=0,...,n-1 (i.e. the weights are homogeneous) 2. the linear solver uses a basic 2-norm to measure convergence Hence (using the notation from sunlinsol_spgmr.h, with S = diag(w)), || bbar - Abar xbar ||_2 < tol <=> || S b - S A x ||_2 < tol <=> || S (b - A x) ||_2 < tol <=> \sum_{i=0}^{n-1} (w_i (b - A x)_i)^2 < tol^2 <=> w_mean^2 \sum_{i=0}^{n-1} (b - A x_i)^2 < tol^2 <=> \sum_{i=0}^{n-1} (b - A x_i)^2 < tol^2 / w_mean^2 <=> || b - A x ||_2 < tol / w_mean So we compute w_mean = ||w||_RMS = ||w||_2 / sqrt(n), and scale the desired tolerance accordingly. */ } else if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { w_mean = SUNRsqrt( N_VDotProd(weight, weight) ) / cvls_mem->sqrtN; delta /= w_mean; } /* If a user-provided jtsetup routine is supplied, call that here */ if (cvls_mem->jtsetup) { cvls_mem->last_flag = cvls_mem->jtsetup(cv_mem->cv_tn, ynow, fnow, cvls_mem->jt_data); cvls_mem->njtsetup++; if (cvls_mem->last_flag != 0) { cvProcessError(cv_mem, retval, "CVLS", "cvLsSolve", MSG_LS_JTSETUP_FAILED); return(cvls_mem->last_flag); } } /* Call solver, and copy x to b */ retval = SUNLinSolSolve(cvls_mem->LS, cvls_mem->A, cvls_mem->x, b, delta); N_VScale(ONE, cvls_mem->x, b); /* If using a direct or matrix-iterative solver, BDF method, and gamma has changed, scale the correction to account for change in gamma */ if ( ((LSType == SUNLINEARSOLVER_DIRECT) || (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE)) && (cv_mem->cv_lmm == CV_BDF) && (cv_mem->cv_gamrat != ONE) ) N_VScale(TWO/(ONE + cv_mem->cv_gamrat), b, b); /* Retrieve statistics from iterative linear solvers */ nli_inc = 0; if ( ((LSType == SUNLINEARSOLVER_ITERATIVE) || (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE)) && (cvls_mem->LS->ops->numiters) ) nli_inc = SUNLinSolNumIters(cvls_mem->LS); /* Increment counters nli and ncfl */ cvls_mem->nli += nli_inc; if (retval != SUNLS_SUCCESS) cvls_mem->ncfl++; /* Interpret solver return value */ cvls_mem->last_flag = retval; switch(retval) { case SUNLS_SUCCESS: return(0); break; case SUNLS_RES_REDUCED: /* allow reduction but not solution on first Newton iteration, otherwise return with a recoverable failure */ if (curiter == 0) return(0); else return(1); break; case SUNLS_CONV_FAIL: case SUNLS_ATIMES_FAIL_REC: case SUNLS_PSOLVE_FAIL_REC: case SUNLS_PACKAGE_FAIL_REC: case SUNLS_QRFACT_FAIL: case SUNLS_LUFACT_FAIL: return(1); break; case SUNLS_MEM_NULL: case SUNLS_ILL_INPUT: case SUNLS_MEM_FAIL: case SUNLS_GS_FAIL: case SUNLS_QRSOL_FAIL: return(-1); break; case SUNLS_PACKAGE_FAIL_UNREC: cvProcessError(cv_mem, SUNLS_PACKAGE_FAIL_UNREC, "CVLS", "cvLsSolve", "Failure in SUNLinSol external package"); return(-1); break; case SUNLS_ATIMES_FAIL_UNREC: cvProcessError(cv_mem, SUNLS_ATIMES_FAIL_UNREC, "CVLS", "cvLsSolve", MSG_LS_JTIMES_FAILED); return(-1); break; case SUNLS_PSOLVE_FAIL_UNREC: cvProcessError(cv_mem, SUNLS_PSOLVE_FAIL_UNREC, "CVLS", "cvLsSolve", MSG_LS_PSOLVE_FAILED); return(-1); break; } return(0); }
int SptfqmrSolve(SptfqmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, realtype delta, void *P_data, N_Vector sx, N_Vector sb, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { realtype alpha, tau, eta, beta, c, sigma, v_bar, omega; realtype rho[2]; realtype r_init_norm, r_curr_norm; realtype temp_val; booleantype preOnLeft, preOnRight, scale_x, scale_b, converged; booleantype b_ok; int n, m, ier; /* Exit immediately if memory pointer is NULL */ if (mem == NULL) return(SPTFQMR_MEM_NULL); temp_val = r_curr_norm = -ONE; /* Initialize to avoid compiler warnings */ *nli = *nps = 0; /* Initialize counters */ converged = FALSE; /* Initialize convergence flag */ b_ok = FALSE; if ((pretype != PREC_LEFT) && (pretype != PREC_RIGHT) && (pretype != PREC_BOTH)) pretype = PREC_NONE; preOnLeft = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT)); preOnRight = ((pretype == PREC_BOTH) || (pretype == PREC_RIGHT)); scale_x = (sx != NULL); scale_b = (sb != NULL); /* Set r_star to initial (unscaled) residual r_star = r_0 = b - A*x_0 */ /* NOTE: if x == 0 then just set residual to b and continue */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r_star); else { ier = atimes(A_data, x, r_star); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, r_star, r_star); } /* Apply left preconditioner and b-scaling to r_star (or really just r_0) */ if (preOnLeft) { ier = psolve(P_data, r_star, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, r_star, vtemp1); if (scale_b) N_VProd(sb, vtemp1, r_star); else N_VScale(ONE, vtemp1, r_star); /* Initialize rho[0] */ /* NOTE: initialized here to reduce number of computations - avoid need to compute r_star^T*r_star twice, and avoid needlessly squaring values */ rho[0] = N_VDotProd(r_star, r_star); /* Compute norm of initial residual (r_0) to see if we really need to do anything */ *res_norm = r_init_norm = RSqrt(rho[0]); if (r_init_norm <= delta) return(SPTFQMR_SUCCESS); /* Set v_ = A*r_0 (preconditioned and scaled) */ if (scale_x) N_VDiv(r_star, sx, vtemp1); else N_VScale(ONE, r_star, vtemp1); if (preOnRight) { N_VScale(ONE, vtemp1, v_); ier = psolve(P_data, v_, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, vtemp1, v_); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, v_, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, v_, vtemp1); if (scale_b) N_VProd(sb, vtemp1, v_); else N_VScale(ONE, vtemp1, v_); /* Initialize remaining variables */ N_VScale(ONE, r_star, r_[0]); N_VScale(ONE, r_star, u_); N_VScale(ONE, r_star, p_); N_VConst(ZERO, d_); tau = r_init_norm; v_bar = eta = ZERO; /* START outer loop */ for (n = 0; n < l_max; ++n) { /* Increment linear iteration counter */ (*nli)++; /* sigma = r_star^T*v_ */ sigma = N_VDotProd(r_star, v_); /* alpha = rho[0]/sigma */ alpha = rho[0]/sigma; /* q_ = u_-alpha*v_ */ N_VLinearSum(ONE, u_, -alpha, v_, q_); /* r_[1] = r_[0]-alpha*A*(u_+q_) */ N_VLinearSum(ONE, u_, ONE, q_, r_[1]); if (scale_x) N_VDiv(r_[1], sx, r_[1]); if (preOnRight) { N_VScale(ONE, r_[1], vtemp1); ier = psolve(P_data, vtemp1, r_[1], PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, r_[1], vtemp1); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, vtemp1, r_[1], PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, vtemp1, r_[1]); if (scale_b) N_VProd(sb, r_[1], vtemp1); else N_VScale(ONE, r_[1], vtemp1); N_VLinearSum(ONE, r_[0], -alpha, vtemp1, r_[1]); /* START inner loop */ for (m = 0; m < 2; ++m) { /* d_ = [*]+(v_bar^2*eta/alpha)*d_ */ /* NOTES: * (1) [*] = u_ if m == 0, and q_ if m == 1 * (2) using temp_val reduces the number of required computations * if the inner loop is executed twice */ if (m == 0) { temp_val = RSqrt(N_VDotProd(r_[1], r_[1])); omega = RSqrt(RSqrt(N_VDotProd(r_[0], r_[0]))*temp_val); N_VLinearSum(ONE, u_, SQR(v_bar)*eta/alpha, d_, d_); } else { omega = temp_val; N_VLinearSum(ONE, q_, SQR(v_bar)*eta/alpha, d_, d_); } /* v_bar = omega/tau */ v_bar = omega/tau; /* c = (1+v_bar^2)^(-1/2) */ c = ONE / RSqrt(ONE+SQR(v_bar)); /* tau = tau*v_bar*c */ tau = tau*v_bar*c; /* eta = c^2*alpha */ eta = SQR(c)*alpha; /* x = x+eta*d_ */ N_VLinearSum(ONE, x, eta, d_, x); /* Check for convergence... */ /* NOTE: just use approximation to norm of residual, if possible */ *res_norm = r_curr_norm = tau*RSqrt(m+1); /* Exit inner loop if iteration has converged based upon approximation to norm of current residual */ if (r_curr_norm <= delta) { converged = TRUE; break; } /* Decide if actual norm of residual vector should be computed */ /* NOTES: * (1) if r_curr_norm > delta, then check if actual residual norm * is OK (recall we first compute an approximation) * (2) if r_curr_norm >= r_init_norm and m == 1 and n == l_max, then * compute actual residual norm to see if the iteration can be * saved * (3) the scaled and preconditioned right-hand side of the given * linear system (denoted by b) is only computed once, and the * result is stored in vtemp3 so it can be reused - reduces the * number of psovles if using left preconditioning */ if ((r_curr_norm > delta) || (r_curr_norm >= r_init_norm && m == 1 && n == l_max)) { /* Compute norm of residual ||b-A*x||_2 (preconditioned and scaled) */ if (scale_x) N_VDiv(x, sx, vtemp1); else N_VScale(ONE, x, vtemp1); if (preOnRight) { ier = psolve(P_data, vtemp1, vtemp2, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC); N_VScale(ONE, vtemp2, vtemp1); } ier = atimes(A_data, vtemp1, vtemp2); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, vtemp2, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, vtemp2, vtemp1); if (scale_b) N_VProd(sb, vtemp1, vtemp2); else N_VScale(ONE, vtemp1, vtemp2); /* Only precondition and scale b once (result saved for reuse) */ if (!b_ok) { b_ok = TRUE; if (preOnLeft) { ier = psolve(P_data, b, vtemp3, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, b, vtemp3); if (scale_b) N_VProd(sb, vtemp3, vtemp3); } N_VLinearSum(ONE, vtemp3, -ONE, vtemp2, vtemp1); *res_norm = r_curr_norm = RSqrt(N_VDotProd(vtemp1, vtemp1)); /* Exit inner loop if inequality condition is satisfied (meaning exit if we have converged) */ if (r_curr_norm <= delta) { converged = TRUE; break; } } } /* END inner loop */ /* If converged, then exit outer loop as well */ if (converged == TRUE) break; /* rho[1] = r_star^T*r_[1] */ rho[1] = N_VDotProd(r_star, r_[1]); /* beta = rho[1]/rho[0] */ beta = rho[1]/rho[0]; /* u_ = r_[1]+beta*q_ */ N_VLinearSum(ONE, r_[1], beta, q_, u_); /* p_ = u_+beta*(q_+beta*p_) */ N_VLinearSum(beta, q_, SQR(beta), p_, p_); N_VLinearSum(ONE, u_, ONE, p_, p_); /* v_ = A*p_ */ if (scale_x) N_VDiv(p_, sx, vtemp1); else N_VScale(ONE, p_, vtemp1); if (preOnRight) { N_VScale(ONE, vtemp1, v_); ier = psolve(P_data, v_, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } ier = atimes(A_data, vtemp1, v_); if (ier != 0) return((ier < 0) ? SPTFQMR_ATIMES_FAIL_UNREC : SPTFQMR_ATIMES_FAIL_REC); if (preOnLeft) { ier = psolve(P_data, v_, vtemp1, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_REC); } else N_VScale(ONE, v_, vtemp1); if (scale_b) N_VProd(sb, vtemp1, v_); else N_VScale(ONE, vtemp1, v_); /* Shift variable values */ /* NOTE: reduces storage requirements */ N_VScale(ONE, r_[1], r_[0]); rho[0] = rho[1]; } /* END outer loop */ /* Determine return value */ /* If iteration converged or residual was reduced, then return current iterate (x) */ if ((converged == TRUE) || (r_curr_norm < r_init_norm)) { if (scale_x) N_VDiv(x, sx, x); if (preOnRight) { ier = psolve(P_data, x, vtemp1, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPTFQMR_PSOLVE_FAIL_UNREC : SPTFQMR_PSOLVE_FAIL_UNREC); N_VScale(ONE, vtemp1, x); } if (converged == TRUE) return(SPTFQMR_SUCCESS); else return(SPTFQMR_RES_REDUCED); } /* Otherwise, return error code */ else return(SPTFQMR_CONV_FAIL); }
/*--------------------------------------------------------------- Function : PcgSolve --------------------------------------------------------------*/ int PcgSolve(PcgMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, realtype delta, void *P_data, N_Vector w, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { realtype alpha, beta, r0_norm, rho, rz, rz_old; N_Vector r, p, z, Ap; booleantype UsePrec, converged; int l, l_max, ier; if (mem == NULL) return(PCG_MEM_NULL); /* Make local copies of mem variables */ l_max = mem->l_max; r = mem->r; p = mem->p; z = mem->z; Ap = mem->Ap; /* Initialize counters and converged flag */ *nli = *nps = 0; converged = FALSE; /* Set preconditioning flag */ UsePrec = ((pretype == PREC_BOTH) || (pretype == PREC_LEFT) || (pretype == PREC_RIGHT)); /* Set r to initial residual r_0 = b - A*x_0 */ if (N_VDotProd(x, x) == ZERO) N_VScale(ONE, b, r); else { ier = atimes(A_data, x, r); if (ier != 0) return((ier < 0) ? PCG_ATIMES_FAIL_UNREC : PCG_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, r, r); } /* Set rho to L2 norm of r, and return if small */ *res_norm = r0_norm = rho = N_VWrmsNorm(r,w); if (rho <= delta) return(PCG_SUCCESS); /* Apply preconditioner and b-scaling to r = r_0 */ if (UsePrec) { ier = psolve(P_data, r, z, PREC_LEFT); /* z = P^{-1}r */ (*nps)++; if (ier != 0) return((ier < 0) ? PCG_PSOLVE_FAIL_UNREC : PCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, r, z); /* Initialize rz to <r,z> */ rz = N_VDotProd(r, z); /* Copy z to p */ N_VScale(ONE, z, p); /* Begin main iteration loop */ for(l=0; l<l_max; l++) { /* increment counter */ (*nli)++; /* Generate Ap = A*p */ ier = atimes(A_data, p, Ap ); if (ier != 0) return((ier < 0) ? PCG_ATIMES_FAIL_UNREC : PCG_ATIMES_FAIL_REC); /* Calculate alpha = <r,z> / <Ap,p> */ alpha = rz / N_VDotProd(Ap, p); /* Update x = x + alpha*p */ N_VLinearSum(ONE, x, alpha, p, x); /* Update r = r - alpha*Ap */ N_VLinearSum(ONE, r, -alpha, Ap, r); /* Set rho and check convergence */ *res_norm = rho = N_VWrmsNorm(r, w); if (rho <= delta) { converged = TRUE; break; } /* Apply preconditioner: z = P^{-1}*r */ if (UsePrec) { ier = psolve(P_data, r, z, PREC_LEFT); (*nps)++; if (ier != 0) return((ier < 0) ? PCG_PSOLVE_FAIL_UNREC : PCG_PSOLVE_FAIL_REC); } else N_VScale(ONE, r, z); /* update rz */ rz_old = rz; rz = N_VDotProd(r, z); /* Calculate beta = <r,z> / <r_old,z_old> */ beta = rz / rz_old; /* Update p = z + beta*p */ N_VLinearSum(ONE, z, beta, p, p); } /* Main loop finished, return with result */ if (converged == TRUE) return(PCG_SUCCESS); if (rho < r0_norm) return(PCG_RES_REDUCED); return(PCG_CONV_FAIL); }
/*--------------------------------------------------------------- CVodeSetLinearSolver specifies the linear solver ---------------------------------------------------------------*/ int CVodeSetLinearSolver(void *cvode_mem, SUNLinearSolver LS, SUNMatrix A) { CVodeMem cv_mem; CVLsMem cvls_mem; int retval, LSType; /* Return immediately if either cvode_mem or LS inputs are NULL */ if (cvode_mem == NULL) { cvProcessError(NULL, CVLS_MEM_NULL, "CVLS", "CVodeSetLinearSolver", MSG_LS_CVMEM_NULL); return(CVLS_MEM_NULL); } if (LS == NULL) { cvProcessError(NULL, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", "LS must be non-NULL"); return(CVLS_ILL_INPUT); } cv_mem = (CVodeMem) cvode_mem; /* Test if solver is compatible with LS interface */ if ( (LS->ops->gettype == NULL) || (LS->ops->initialize == NULL) || (LS->ops->setup == NULL) || (LS->ops->solve == NULL) ) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", "LS object is missing a required operation"); return(CVLS_ILL_INPUT); } /* Test if vector is compatible with LS interface */ if ( (cv_mem->cv_tempv->ops->nvconst == NULL) || (cv_mem->cv_tempv->ops->nvdotprod == NULL) ) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", MSG_LS_BAD_NVECTOR); return(CVLS_ILL_INPUT); } /* Retrieve the LS type */ LSType = SUNLinSolGetType(LS); /* Check for compatible LS type, matrix and "atimes" support */ if ((LSType == SUNLINEARSOLVER_ITERATIVE) && (LS->ops->setatimes == NULL)) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", "Incompatible inputs: iterative LS must support ATimes routine"); return(CVLS_ILL_INPUT); } if ((LSType == SUNLINEARSOLVER_DIRECT) && (A == NULL)) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", "Incompatible inputs: direct LS requires non-NULL matrix"); return(CVLS_ILL_INPUT); } if ((LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) && (A == NULL)) { cvProcessError(cv_mem, CVLS_ILL_INPUT, "CVLS", "CVodeSetLinearSolver", "Incompatible inputs: matrix-iterative LS requires non-NULL matrix"); return(CVLS_ILL_INPUT); } /* free any existing system solver attached to CVode */ if (cv_mem->cv_lfree) cv_mem->cv_lfree(cv_mem); /* Set four main system linear solver function fields in cv_mem */ cv_mem->cv_linit = cvLsInitialize; cv_mem->cv_lsetup = cvLsSetup; cv_mem->cv_lsolve = cvLsSolve; cv_mem->cv_lfree = cvLsFree; /* Allocate memory for CVLsMemRec */ cvls_mem = NULL; cvls_mem = (CVLsMem) malloc(sizeof(struct CVLsMemRec)); if (cvls_mem == NULL) { cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVLS", "CVodeSetLinearSolver", MSG_LS_MEM_FAIL); return(CVLS_MEM_FAIL); } memset(cvls_mem, 0, sizeof(struct CVLsMemRec)); /* set SUNLinearSolver pointer */ cvls_mem->LS = LS; /* Set defaults for Jacobian-related fields */ if (A != NULL) { cvls_mem->jacDQ = SUNTRUE; cvls_mem->jac = cvLsDQJac; cvls_mem->J_data = cv_mem; } else { cvls_mem->jacDQ = SUNFALSE; cvls_mem->jac = NULL; cvls_mem->J_data = NULL; } cvls_mem->jtimesDQ = SUNTRUE; cvls_mem->jtsetup = NULL; cvls_mem->jtimes = cvLsDQJtimes; cvls_mem->jt_data = cv_mem; /* Set defaults for preconditioner-related fields */ cvls_mem->pset = NULL; cvls_mem->psolve = NULL; cvls_mem->pfree = NULL; cvls_mem->P_data = cv_mem->cv_user_data; /* Initialize counters */ cvLsInitializeCounters(cvls_mem); /* Set default values for the rest of the LS parameters */ cvls_mem->msbj = CVLS_MSBJ; cvls_mem->jbad = SUNTRUE; cvls_mem->eplifac = CVLS_EPLIN; cvls_mem->last_flag = CVLS_SUCCESS; /* If LS supports ATimes, attach CVLs routine */ if (LS->ops->setatimes) { retval = SUNLinSolSetATimes(LS, cv_mem, cvLsATimes); if (retval != SUNLS_SUCCESS) { cvProcessError(cv_mem, CVLS_SUNLS_FAIL, "CVLS", "CVodeSetLinearSolver", "Error in calling SUNLinSolSetATimes"); free(cvls_mem); cvls_mem = NULL; return(CVLS_SUNLS_FAIL); } } /* If LS supports preconditioning, initialize pset/psol to NULL */ if (LS->ops->setpreconditioner) { retval = SUNLinSolSetPreconditioner(LS, cv_mem, NULL, NULL); if (retval != SUNLS_SUCCESS) { cvProcessError(cv_mem, CVLS_SUNLS_FAIL, "CVLS", "CVodeSetLinearSolver", "Error in calling SUNLinSolSetPreconditioner"); free(cvls_mem); cvls_mem = NULL; return(CVLS_SUNLS_FAIL); } } /* When using a non-NULL SUNMatrix object, store pointer to A and create saved_J */ if (A != NULL) { cvls_mem->A = A; cvls_mem->savedJ = SUNMatClone(A); if (cvls_mem->savedJ == NULL) { cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVLS", "CVodeSetLinearSolver", MSG_LS_MEM_FAIL); free(cvls_mem); cvls_mem = NULL; return(CVLS_MEM_FAIL); } } /* Allocate memory for ytemp and x */ cvls_mem->ytemp = N_VClone(cv_mem->cv_tempv); if (cvls_mem->ytemp == NULL) { cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVLS", "CVodeSetLinearSolver", MSG_LS_MEM_FAIL); SUNMatDestroy(cvls_mem->savedJ); free(cvls_mem); cvls_mem = NULL; return(CVLS_MEM_FAIL); } cvls_mem->x = N_VClone(cv_mem->cv_tempv); if (cvls_mem->x == NULL) { cvProcessError(cv_mem, CVLS_MEM_FAIL, "CVLS", "CVodeSetLinearSolver", MSG_LS_MEM_FAIL); SUNMatDestroy(cvls_mem->savedJ); N_VDestroy(cvls_mem->ytemp); free(cvls_mem); cvls_mem = NULL; return(CVLS_MEM_FAIL); } /* For iterative LS, compute sqrtN from a dot product */ if ( (LSType == SUNLINEARSOLVER_ITERATIVE) || (LSType == SUNLINEARSOLVER_MATRIX_ITERATIVE) ) { N_VConst(ONE, cvls_mem->ytemp); cvls_mem->sqrtN = SUNRsqrt( N_VDotProd(cvls_mem->ytemp, cvls_mem->ytemp) ); } /* Attach linear solver memory to integrator memory */ cv_mem->cv_lmem = cvls_mem; return(CVLS_SUCCESS); }
/*---------------------------------------------------------------- Function : SpfgmrSolve ---------------------------------------------------------------*/ int SpfgmrSolve(SpfgmrMem mem, void *A_data, N_Vector x, N_Vector b, int pretype, int gstype, realtype delta, int max_restarts, int maxit, void *P_data, N_Vector s1, N_Vector s2, ATimesFn atimes, PSolveFn psolve, realtype *res_norm, int *nli, int *nps) { N_Vector *V, *Z, xcor, vtemp; realtype **Hes, *givens, *yg; realtype beta, rotation_product, r_norm, s_product, rho; booleantype preOnRight, scale1, scale2, converged; int i, j, k, l, l_max, krydim, ier, ntries; if (mem == NULL) return(SPFGMR_MEM_NULL); /* Initialize some variables */ krydim = 0; /* Make local copies of mem variables. */ l_max = mem->l_max; V = mem->V; Z = mem->Z; Hes = mem->Hes; givens = mem->givens; xcor = mem->xcor; yg = mem->yg; vtemp = mem->vtemp; *nli = *nps = 0; /* Initialize counters */ converged = SUNFALSE; /* Initialize converged flag */ /* If maxit is greater than l_max, then set maxit=l_max */ if (maxit > l_max) maxit = l_max; /* Check for legal value of max_restarts */ if (max_restarts < 0) max_restarts = 0; /* Set preconditioning flag (enabling any preconditioner implies right preconditioning, since FGMRES does not support left preconditioning) */ preOnRight = ((pretype == PREC_RIGHT) || (pretype == PREC_BOTH) || (pretype == PREC_LEFT)); /* Set scaling flags */ scale1 = (s1 != NULL); scale2 = (s2 != NULL); /* Set vtemp to initial (unscaled) residual r_0 = b - A*x_0. */ if (N_VDotProd(x, x) == ZERO) { N_VScale(ONE, b, vtemp); } else { ier = atimes(A_data, x, vtemp); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); N_VLinearSum(ONE, b, -ONE, vtemp, vtemp); } /* Apply left scaling to vtemp = r_0 to fill V[0]. */ if (scale1) { N_VProd(s1, vtemp, V[0]); } else { N_VScale(ONE, vtemp, V[0]); } /* Set r_norm = beta to L2 norm of V[0] = s1 r_0, and return if small */ *res_norm = r_norm = beta = SUNRsqrt(N_VDotProd(V[0], V[0])); if (r_norm <= delta) return(SPFGMR_SUCCESS); /* Initialize rho to avoid compiler warning message */ rho = beta; /* Set xcor = 0. */ N_VConst(ZERO, xcor); /* Begin outer iterations: up to (max_restarts + 1) attempts. */ for (ntries=0; ntries<=max_restarts; ntries++) { /* Initialize the Hessenberg matrix Hes and Givens rotation product. Normalize the initial vector V[0]. */ for (i=0; i<=l_max; i++) for (j=0; j<l_max; j++) Hes[i][j] = ZERO; rotation_product = ONE; N_VScale(ONE/r_norm, V[0], V[0]); /* Inner loop: generate Krylov sequence and Arnoldi basis. */ for (l=0; l<maxit; l++) { (*nli)++; krydim = l + 1; /* Generate A-tilde V[l], where A-tilde = s1 A P_inv s2_inv. */ /* Apply right scaling: vtemp = s2_inv V[l]. */ if (scale2) N_VDiv(V[l], s2, vtemp); else N_VScale(ONE, V[l], vtemp); /* Apply right preconditioner: vtemp = Z[l] = P_inv s2_inv V[l]. */ if (preOnRight) { N_VScale(ONE, vtemp, V[l+1]); ier = psolve(P_data, V[l+1], vtemp, delta, PREC_RIGHT); (*nps)++; if (ier != 0) return((ier < 0) ? SPFGMR_PSOLVE_FAIL_UNREC : SPFGMR_PSOLVE_FAIL_REC); } N_VScale(ONE, vtemp, Z[l]); /* Apply A: V[l+1] = A P_inv s2_inv V[l]. */ ier = atimes(A_data, vtemp, V[l+1]); if (ier != 0) return((ier < 0) ? SPFGMR_ATIMES_FAIL_UNREC : SPFGMR_ATIMES_FAIL_REC); /* Apply left scaling: V[l+1] = s1 A P_inv s2_inv V[l]. */ if (scale1) N_VProd(s1, V[l+1], V[l+1]); /* Orthogonalize V[l+1] against previous V[i]: V[l+1] = w_tilde. */ if (gstype == CLASSICAL_GS) { if (ClassicalGS(V, Hes, l+1, l_max, &(Hes[l+1][l]), vtemp, yg) != 0) return(SPFGMR_GS_FAIL); } else { if (ModifiedGS(V, Hes, l+1, l_max, &(Hes[l+1][l])) != 0) return(SPFGMR_GS_FAIL); } /* Update the QR factorization of Hes. */ if(QRfact(krydim, Hes, givens, l) != 0 ) return(SPFGMR_QRFACT_FAIL); /* Update residual norm estimate; break if convergence test passes. */ rotation_product *= givens[2*l+1]; *res_norm = rho = SUNRabs(rotation_product*r_norm); if (rho <= delta) { converged = SUNTRUE; break; } /* Normalize V[l+1] with norm value from the Gram-Schmidt routine. */ N_VScale(ONE/Hes[l+1][l], V[l+1], V[l+1]); } /* Inner loop is done. Compute the new correction vector xcor. */ /* Construct g, then solve for y. */ yg[0] = r_norm; for (i=1; i<=krydim; i++) yg[i]=ZERO; if (QRsol(krydim, Hes, givens, yg) != 0) return(SPFGMR_QRSOL_FAIL); /* Add correction vector Z_l y to xcor. */ for (k=0; k<krydim; k++) N_VLinearSum(yg[k], Z[k], ONE, xcor, xcor); /* If converged, construct the final solution vector x and return. */ if (converged) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_SUCCESS); } /* Not yet converged; if allowed, prepare for restart. */ if (ntries == max_restarts) break; /* Construct last column of Q in yg. */ s_product = ONE; for (i=krydim; i>0; i--) { yg[i] = s_product*givens[2*i-2]; s_product *= givens[2*i-1]; } yg[0] = s_product; /* Scale r_norm and yg. */ r_norm *= s_product; for (i=0; i<=krydim; i++) yg[i] *= r_norm; r_norm = SUNRabs(r_norm); /* Multiply yg by V_(krydim+1) to get last residual vector; restart. */ N_VScale(yg[0], V[0], V[0]); for (k=1; k<=krydim; k++) N_VLinearSum(yg[k], V[k], ONE, V[0], V[0]); } /* Failed to converge, even after allowed restarts. If the residual norm was reduced below its initial value, compute and return x anyway. Otherwise return failure flag. */ if (rho < beta) { N_VLinearSum(ONE, x, ONE, xcor, x); return(SPFGMR_RES_REDUCED); } return(SPFGMR_CONV_FAIL); }
/* Main Program */ int main() { /* general problem parameters */ realtype T0 = RCONST(0.0); /* initial time */ realtype Tf = RCONST(1.0); /* final time */ realtype rtol = 1.e-3; /* relative tolerance */ realtype atol = 1.e-10; /* absolute tolerance */ realtype hscale = 1.0; /* time step change factor on resizes */ UserData udata = NULL; realtype *data; long int N = 21; /* initial spatial mesh size */ realtype refine = 3.e-3; /* adaptivity refinement tolerance */ realtype k = 0.5; /* heat conductivity */ long int i, nni, nni_cur=0, nni_tot=0, nli, nli_tot=0; int iout=0; /* general problem variables */ int flag; /* reusable error-checking flag */ N_Vector y = NULL; /* empty vector for storing solution */ N_Vector y2 = NULL; /* empty vector for storing solution */ N_Vector yt = NULL; /* empty vector for swapping */ void *arkode_mem = NULL; /* empty ARKode memory structure */ FILE *XFID, *UFID; realtype t, olddt, newdt; realtype *xnew = NULL; long int Nnew; /* allocate and fill initial udata structure */ udata = (UserData) malloc(sizeof(*udata)); udata->N = N; udata->k = k; udata->refine_tol = refine; udata->x = malloc(N * sizeof(realtype)); for (i=0; i<N; i++) udata->x[i] = 1.0*i/(N-1); /* Initial problem output */ printf("\n1D adaptive Heat PDE test problem:\n"); printf(" diffusion coefficient: k = %g\n", udata->k); printf(" initial N = %li\n", udata->N); /* Initialize data structures */ y = N_VNew_Serial(N); /* Create initial serial vector for solution */ if (check_flag((void *) y, "N_VNew_Serial", 0)) return 1; N_VConst(0.0, y); /* Set initial conditions */ /* output mesh to disk */ XFID=fopen("heat_mesh.txt","w"); /* output initial mesh to disk */ for (i=0; i<udata->N; i++) fprintf(XFID," %.16e", udata->x[i]); fprintf(XFID,"\n"); /* Open output stream for results, access data array */ UFID=fopen("heat1D.txt","w"); /* output initial condition to disk */ data = N_VGetArrayPointer(y); for (i=0; i<udata->N; i++) fprintf(UFID," %.16e", data[i]); fprintf(UFID,"\n"); /* Create the solver memory */ arkode_mem = ARKodeCreate(); if (check_flag((void *) arkode_mem, "ARKodeCreate", 0)) return 1; /* Initialize the integrator memory */ flag = ARKodeInit(arkode_mem, NULL, f, T0, y); if (check_flag(&flag, "ARKodeInit", 1)) return 1; /* Set routines */ flag = ARKodeSetUserData(arkode_mem, (void *) udata); /* Pass udata to user functions */ if (check_flag(&flag, "ARKodeSetUserData", 1)) return 1; flag = ARKodeSetMaxNumSteps(arkode_mem, 10000); /* Increase max num steps */ if (check_flag(&flag, "ARKodeSetMaxNumSteps", 1)) return 1; flag = ARKodeSStolerances(arkode_mem, rtol, atol); /* Specify tolerances */ if (check_flag(&flag, "ARKodeSStolerances", 1)) return 1; flag = ARKodeSetAdaptivityMethod(arkode_mem, 2, 1, 0, NULL); /* Set adaptivity method */ if (check_flag(&flag, "ARKodeSetAdaptivityMethod", 1)) return 1; flag = ARKodeSetPredictorMethod(arkode_mem, 0); /* Set predictor method */ if (check_flag(&flag, "ARKodeSetPredictorMethod", 1)) return 1; /* Linear solver specification */ flag = ARKPcg(arkode_mem, 0, N); if (check_flag(&flag, "ARKPcg", 1)) return 1; flag = ARKSpilsSetJacTimesVecFn(arkode_mem, Jac); if (check_flag(&flag, "ARKSpilsSetJacTimesVecFn", 1)) return 1; /* Main time-stepping loop: calls ARKode to perform the integration, then prints results. Stops when the final time has been reached */ t = T0; olddt = 0.0; newdt = 0.0; printf(" iout dt_old dt_new ||u||_rms N NNI NLI\n"); printf(" ----------------------------------------------------------------------------------------\n"); printf(" %4i %19.15e %19.15e %19.15e %li %2i %3i\n", iout, olddt, newdt, sqrt(N_VDotProd(y,y)/udata->N), udata->N, 0, 0); while (t < Tf) { /* "set" routines */ flag = ARKodeSetStopTime(arkode_mem, Tf); if (check_flag(&flag, "ARKodeSetStopTime", 1)) return 1; flag = ARKodeSetInitStep(arkode_mem, newdt); if (check_flag(&flag, "ARKodeSetInitStep", 1)) return 1; /* call integrator */ flag = ARKode(arkode_mem, Tf, y, &t, ARK_ONE_STEP); if (check_flag(&flag, "ARKode", 1)) return 1; /* "get" routines */ flag = ARKodeGetLastStep(arkode_mem, &olddt); if (check_flag(&flag, "ARKodeGetLastStep", 1)) return 1; flag = ARKodeGetCurrentStep(arkode_mem, &newdt); if (check_flag(&flag, "ARKodeGetCurrentStep", 1)) return 1; flag = ARKodeGetNumNonlinSolvIters(arkode_mem, &nni); if (check_flag(&flag, "ARKodeGetNumNonlinSolvIters", 1)) return 1; flag = ARKSpilsGetNumLinIters(arkode_mem, &nli); if (check_flag(&flag, "ARKSpilsGetNumLinIters", 1)) return 1; /* print current solution stats */ iout++; printf(" %4i %19.15e %19.15e %19.15e %li %2li %3li\n", iout, olddt, newdt, sqrt(N_VDotProd(y,y)/udata->N), udata->N, nni-nni_cur, nli); nni_cur = nni; nni_tot = nni; nli_tot += nli; /* output results and current mesh to disk */ data = N_VGetArrayPointer(y); for (i=0; i<udata->N; i++) fprintf(UFID," %.16e", data[i]); fprintf(UFID,"\n"); for (i=0; i<udata->N; i++) fprintf(XFID," %.16e", udata->x[i]); fprintf(XFID,"\n"); /* adapt the spatial mesh */ xnew = adapt_mesh(y, &Nnew, udata); if (check_flag(xnew, "ark_adapt", 0)) return 1; /* create N_Vector of new length */ y2 = N_VNew_Serial(Nnew); if (check_flag((void *) y2, "N_VNew_Serial", 0)) return 1; /* project solution onto new mesh */ flag = project(udata->N, udata->x, y, Nnew, xnew, y2); if (check_flag(&flag, "project", 1)) return 1; /* delete old vector, old mesh */ N_VDestroy_Serial(y); free(udata->x); /* swap x and xnew so that new mesh is stored in udata structure */ udata->x = xnew; xnew = NULL; udata->N = Nnew; /* store size of new mesh */ /* swap y and y2 so that y holds new solution */ yt = y; y = y2; y2 = yt; /* call ARKodeResize to notify integrator of change in mesh */ flag = ARKodeResize(arkode_mem, y, hscale, t, NULL, NULL); if (check_flag(&flag, "ARKodeResize", 1)) return 1; /* destroy and re-allocate linear solver memory */ flag = ARKPcg(arkode_mem, 0, udata->N); if (check_flag(&flag, "ARKPcg", 1)) return 1; flag = ARKSpilsSetJacTimesVecFn(arkode_mem, Jac); if (check_flag(&flag, "ARKSpilsSetJacTimesVecFn", 1)) return 1; } printf(" ----------------------------------------------------------------------------------------\n"); /* Free integrator memory */ ARKodeFree(&arkode_mem); /* print some final statistics */ printf(" Final solver statistics:\n"); printf(" Total number of time steps = %i\n", iout); printf(" Total nonlinear iterations = %li\n", nni_tot); printf(" Total linear iterations = %li\n\n", nli_tot); /* Clean up and return with successful completion */ fclose(UFID); fclose(XFID); N_VDestroy_Serial(y); /* Free vectors */ free(udata->x); /* Free user data */ free(udata); return 0; }