HYPRE_Int hypre_GMRESDestroy( void *gmres_vdata ) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; HYPRE_Int i; if (gmres_data) { if ( (gmres_data->logging>0) || (gmres_data->print_level) > 0 ) { if ( (gmres_data -> norms) != NULL ) hypre_TFreeF( gmres_data -> norms, gmres_functions ); } if ( (gmres_data -> matvec_data) != NULL ) (*(gmres_functions->MatvecDestroy))(gmres_data -> matvec_data); if ( (gmres_data -> r) != NULL ) (*(gmres_functions->DestroyVector))(gmres_data -> r); if ( (gmres_data -> w) != NULL ) (*(gmres_functions->DestroyVector))(gmres_data -> w); if ( (gmres_data -> w_2) != NULL ) (*(gmres_functions->DestroyVector))(gmres_data -> w_2); if ( (gmres_data -> p) != NULL ) { for (i = 0; i < (gmres_data -> k_dim+1); i++) { if ( (gmres_data -> p)[i] != NULL ) (*(gmres_functions->DestroyVector))( (gmres_data -> p) [i]); } hypre_TFreeF( gmres_data->p, gmres_functions ); } hypre_TFreeF( gmres_data, gmres_functions ); hypre_TFreeF( gmres_functions, gmres_functions ); } return hypre_error_flag; }
int hypre_GMRESSolve(void *gmres_vdata, void *A, void *b, void *x) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; int k_dim = (gmres_data -> k_dim); int min_iter = (gmres_data -> min_iter); int max_iter = (gmres_data -> max_iter); int rel_change = (gmres_data -> rel_change); int stop_crit = (gmres_data -> stop_crit); double accuracy = (gmres_data -> tol); double cf_tol = (gmres_data -> cf_tol); void *matvec_data = (gmres_data -> matvec_data); void *r = (gmres_data -> r); void *w = (gmres_data -> w); void **p = (gmres_data -> p); int (*precond)(void*, void*, void*, void*) = (gmres_functions -> precond); int *precond_data = (gmres_data -> precond_data); int print_level = (gmres_data -> print_level); int logging = (gmres_data -> logging); double *norms = (gmres_data -> norms); /* not used yet char *log_file_name = (gmres_data -> log_file_name);*/ /* FILE *fp; */ int ierr = 0; int break_value = 0; int i, j, k; double *rs, **hh, *c, *s; int iter; int my_id, num_procs; double epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm; double epsmac = 1.e-16; double ieee_check = 0.; double guard_zero_residual; double cf_ave_0 = 0.0; double cf_ave_1 = 0.0; double weight; double r_norm_0; double relative_error; (gmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ guard_zero_residual = 0.0; (*(gmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (gmres_data -> norms); /* not used yet log_file_name = (gmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays */ rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); c = hypre_CTAllocF(double,k_dim,gmres_functions); s = hypre_CTAllocF(double,k_dim,gmres_functions); hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); for (i=0; i < k_dim+1; i++) { hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); } (*(gmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(gmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { printf("\n\nERROR detected by Hypre ... BEGIN\n"); printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); printf("User probably placed non-numerics in supplied b.\n"); printf("Returning error flag += 101. Program not terminated.\n"); printf("ERROR detected by Hypre ... END\n\n\n"); } ierr += 101; return ierr; } r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { printf("\n\nERROR detected by Hypre ... BEGIN\n"); printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); printf("User probably placed non-numerics in supplied A or x_0.\n"); printf("Returning error flag += 101. Program not terminated.\n"); printf("ERROR detected by Hypre ... END\n\n\n"); } ierr += 101; return ierr; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) printf("Rel_resid_norm actually contains the residual norm\n"); printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; epsilon= accuracy; /* convergence criterion |r_i| <= accuracy , absolute residual norm*/ if ( stop_crit && !rel_change ) epsilon = accuracy; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {printf("=============================================\n\n"); printf("Iters resid.norm conv.rate rel.res.norm\n"); printf("----- ------------ ---------- ------------\n"); } else {printf("=============================================\n\n"); printf("Iters resid.norm conv.rate\n"); printf("----- ------------ ----------\n"); }; } /* set the relative_error to initially bypass the stopping criterion */ if (rel_change) { relative_error= epsilon + 1.; } while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions); hypre_TFreeF(hh,gmres_functions); ierr = 0; return ierr; } if (r_norm/den_norm <= epsilon && iter >= min_iter) { if (rel_change) { if (relative_error <= epsilon) { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) printf("false convergence 1\n"); } } else { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) printf("false convergence 1\n"); } } t = 1.0 / r_norm; (*(gmres_functions->ScaleVector))(t,p[0]); i = 0; while (i < k_dim && ( (r_norm/den_norm > epsilon || iter < min_iter) || ((rel_change) && relative_error > epsilon) ) && iter < max_iter) { i++; iter++; (*(gmres_functions->ClearVector))(r); precond(precond_data, A, p[i-1], r); (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]); (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(gmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = c[j-1]*t + s[j-1]*hh[j][i-1]; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } gamma = sqrt(hh[i-1][i-1]*hh[i-1][i-1] + hh[i][i-1]*hh[i][i-1]); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -s[i-1]*rs[i-1]; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = c[i-1]*hh[i-1][i-1] + s[i-1]*hh[i][i-1]; r_norm = fabs(rs[i]); if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } } /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = rs[k]; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } rs[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[0],w); (*(gmres_functions->ScaleVector))(rs[0],w); for (j = 1; j < i; j++) (*(gmres_functions->Axpy))(rs[j], p[j], w); (*(gmres_functions->ClearVector))(r); precond(precond_data, A, w, r); (*(gmres_functions->Axpy))(1.0,r,x); /* check for convergence, evaluate actual residual */ if (r_norm/den_norm <= epsilon && iter >= min_iter) { if (rel_change) { x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */ r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error= r_norm/x_norm; } (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } if (rel_change && r_norm > guard_zero_residual) /* Also test on relative change of iterates, x_i - x_(i-1) */ { /* At this point r = x_i - x_(i-1) */ x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */ if ( relative_error < epsilon ) { (gmres_data -> converged) = 1; break; } } else { (gmres_data -> converged) = 1; break; } } else { if ( print_level>0 && my_id == 0) printf("false convergence 2\n"); (*(gmres_functions->CopyVector))(r,p[0]); i = 0; } } /* compute residual vector and continue loop */ for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); for (j=1; j < i+1; j++) (*(gmres_functions->Axpy))(rs[j],p[j],p[0]); } if ( print_level>1 && my_id == 0 ) printf("\n\n"); (gmres_data -> num_iterations) = iter; if (b_norm > 0.0) (gmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (gmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm/den_norm > epsilon) ierr = 1; hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); for (i=0; i < k_dim+1; i++) { hypre_TFreeF(hh[i],gmres_functions); } hypre_TFreeF(hh,gmres_functions); return ierr; }
HYPRE_Int hypre_LGMRESSolve(void *lgmres_vdata, void *A, void *b, void *x) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; HYPRE_Int k_dim = (lgmres_data -> k_dim); HYPRE_Int min_iter = (lgmres_data -> min_iter); HYPRE_Int max_iter = (lgmres_data -> max_iter); HYPRE_Real r_tol = (lgmres_data -> tol); HYPRE_Real cf_tol = (lgmres_data -> cf_tol); HYPRE_Real a_tol = (lgmres_data -> a_tol); void *matvec_data = (lgmres_data -> matvec_data); void *r = (lgmres_data -> r); void *w = (lgmres_data -> w); void **p = (lgmres_data -> p); /* lgmres mod*/ void **aug_vecs = (lgmres_data ->aug_vecs); void **a_aug_vecs = (lgmres_data ->a_aug_vecs); HYPRE_Int *aug_order = (lgmres_data->aug_order); HYPRE_Int aug_dim = (lgmres_data -> aug_dim); HYPRE_Int approx_constant= (lgmres_data ->approx_constant); HYPRE_Int it_arnoldi, aug_ct, it_total, ii, order, it_aug; HYPRE_Int spot = 0; HYPRE_Real tmp_norm, r_norm_last; /*---*/ HYPRE_Int (*precond)(void*,void*,void*,void*) = (lgmres_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data); HYPRE_Int print_level = (lgmres_data -> print_level); HYPRE_Int logging = (lgmres_data -> logging); HYPRE_Real *norms = (lgmres_data -> norms); HYPRE_Int break_value = 0; HYPRE_Int i, j, k; HYPRE_Real *rs, **hh, *c, *s; HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm; HYPRE_Real epsmac = 1.e-16; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; /* We are not checking rel. change for now... */ (lgmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (lgmres_data -> norms); /* not used yet log_file_name = (lgmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays - lgmres includes aug_dim*/ rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */ hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); } (*(lgmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /*lgmres initialization */ for (ii=0; ii<aug_dim; ii++) { aug_order[ii] = 0; } aug_ct = 0; /* number of aug. vectors available */ /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } t = 1.0 / r_norm; r_norm_last = r_norm; (*(lgmres_functions->ScaleVector))(t,p[0]); i = 0; /* lgmres mod: determine number of arnoldi steps to take */ /* if approx_constant then we keep the space the same size even if we don't have the full number of aug vectors yet*/ if (approx_constant) { it_arnoldi = k_dim - aug_ct; } else { it_arnoldi = k_dim - aug_dim; } it_total = it_arnoldi + aug_ct; it_aug = 0; /* keep track of augmented iterations */ /***RESTART CYCLE (right-preconditioning) ***/ while (i < it_total && iter < max_iter) { i++; iter++; (*(lgmres_functions->ClearVector))(r); /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ if ( i <= it_arnoldi) { /* Arnoldi */ precond(precond_data, A, p[i-1], r); (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); } else { /*lgmres aug step */ it_aug ++; order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == order) { spot = ii; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } /* copy a_aug_vecs[spot] to p[i] */ (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]); /*note: an alternate implementation choice would be to only save the AUGVECS and not A_AUGVEC and then apply the PC here to the augvec */ } /*---*/ /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]); (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(lgmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { break; } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } /* form linear combination of p's to get solution */ /* put the new aug_vector in aug_vecs[aug_dim] - a temp position*/ /* i = number of iterations */ /* it_aug = number of augmented iterations */ /* it_arnoldi = number of arnoldi iterations */ /*check if exited early before all arnoldi its */ if (it_arnoldi > i) it_arnoldi = i; if (!it_aug) { (*(lgmres_functions->CopyVector))(p[i-1],w); (*(lgmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(lgmres_functions->Axpy))(rs[j], p[j], w); } else /* need some of the augvecs */ { (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(rs[0],w); /* reg. arnoldi directions */ for (j = 1; j < it_arnoldi; j++) /*first one already done */ { (*(lgmres_functions->Axpy))(rs[j], p[j], w); } /* augment directions */ for (ii=0; ii<it_aug; ii++) { for (j=0; j<aug_dim; j++) { if (aug_order[j] == ii) { spot = j; break; /* must have this because there will be * duplicates before aug_ct = aug_dim */ } } (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w); } } /* grab the new aug vector before the prec*/ (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]); (*(lgmres_functions->ClearVector))(r); /* find correction (in r) (un-wind precond.)*/ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(lgmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { /* calculate actual residual norm*/ (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (lgmres_data -> converged) = 1; break; } else /* conv. has not occurred, according to true residual */ { if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2\n"); (*(lgmres_functions->CopyVector))(r,p[0]); i = 0; } } /* end of convergence check */ /* compute residual vector and continue loop */ /* copy r0 (not scaled) to w*/ (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(r_norm_last,w); for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(lgmres_functions->Axpy))(1.0,p[i],p[0]); } /* lgmres mod */ /* collect aug vector and A*augvector for future restarts - only if we will be restarting (i.e. this cycle performed it_total iterations). ordering starts at 0.*/ if (aug_dim > 0) { if (!aug_ct) { spot = 0; aug_ct++; } else if (aug_ct < aug_dim) { spot = aug_ct; aug_ct++; } else { /* truncate - already have aug_dim number of vectors*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == (aug_dim-1)) { spot = ii; } } } /* aug_vecs[aug_dim] contains new aug vector */ (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]); /*need to normalize */ tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot])); tmp_norm = 1.0/tmp_norm; (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]); /*set new aug vector to order 0 - move all others back one */ for (ii=0; ii < aug_dim; ii++) { aug_order[ii]++; } aug_order[spot] = 0; /*now add the A*aug vector to A_AUGVEC(spot) - this is * independ. of preconditioning type*/ /* A*augvec = V*H*y = r0-rm (r0 is in w and rm is in p[0])*/ (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]); (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/ (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */ (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */ } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (lgmres_data -> num_iterations) = iter; if (b_norm > 0.0) (lgmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (lgmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+1+aug_dim; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; }
HYPRE_Int hypre_LGMRESDestroy( void *lgmres_vdata ) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; HYPRE_Int i; if (lgmres_data) { hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; if ( (lgmres_data->logging>0) || (lgmres_data->print_level) > 0 ) { if ( (lgmres_data -> norms) != NULL ) hypre_TFreeF( lgmres_data -> norms, lgmres_functions ); } if ( (lgmres_data -> matvec_data) != NULL ) (*(lgmres_functions->MatvecDestroy))(lgmres_data -> matvec_data); if ( (lgmres_data -> r) != NULL ) (*(lgmres_functions->DestroyVector))(lgmres_data -> r); if ( (lgmres_data -> w) != NULL ) (*(lgmres_functions->DestroyVector))(lgmres_data -> w); if ( (lgmres_data -> w_2) != NULL ) (*(lgmres_functions->DestroyVector))(lgmres_data -> w_2); if ( (lgmres_data -> p) != NULL ) { for (i = 0; i < (lgmres_data -> k_dim+1); i++) { if ( (lgmres_data -> p)[i] != NULL ) (*(lgmres_functions->DestroyVector))( (lgmres_data -> p) [i]); } hypre_TFreeF( lgmres_data->p, lgmres_functions ); } /* lgmres mod */ if ( (lgmres_data -> aug_vecs) != NULL ) { for (i = 0; i < (lgmres_data -> aug_dim + 1); i++) { if ( (lgmres_data -> aug_vecs)[i] != NULL ) (*(lgmres_functions->DestroyVector))( (lgmres_data -> aug_vecs) [i]); } hypre_TFreeF( lgmres_data->aug_vecs, lgmres_functions ); } if ( (lgmres_data -> a_aug_vecs) != NULL ) { for (i = 0; i < (lgmres_data -> aug_dim); i++) { if ( (lgmres_data -> a_aug_vecs)[i] != NULL ) (*(lgmres_functions->DestroyVector))( (lgmres_data -> a_aug_vecs) [i]); } hypre_TFreeF( lgmres_data->a_aug_vecs, lgmres_functions ); } /*---*/ hypre_TFreeF(lgmres_data->aug_order, lgmres_functions); hypre_TFreeF( lgmres_data, lgmres_functions ); hypre_TFreeF( lgmres_functions, lgmres_functions ); } return hypre_error_flag; }
HYPRE_Int hypre_GMRESSolve(void *gmres_vdata, void *A, void *b, void *x) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; HYPRE_Int k_dim = (gmres_data -> k_dim); HYPRE_Int min_iter = (gmres_data -> min_iter); HYPRE_Int max_iter = (gmres_data -> max_iter); HYPRE_Int rel_change = (gmres_data -> rel_change); HYPRE_Int skip_real_r_check = (gmres_data -> skip_real_r_check); double r_tol = (gmres_data -> tol); double cf_tol = (gmres_data -> cf_tol); double a_tol = (gmres_data -> a_tol); void *matvec_data = (gmres_data -> matvec_data); void *r = (gmres_data -> r); void *w = (gmres_data -> w); /* note: w_2 is only allocated if rel_change = 1 */ void *w_2 = (gmres_data -> w_2); void **p = (gmres_data -> p); HYPRE_Int (*precond)() = (gmres_functions -> precond); HYPRE_Int *precond_data = (gmres_data -> precond_data); HYPRE_Int print_level = (gmres_data -> print_level); HYPRE_Int logging = (gmres_data -> logging); double *norms = (gmres_data -> norms); /* not used yet char *log_file_name = (gmres_data -> log_file_name);*/ /* FILE *fp; */ HYPRE_Int break_value = 0; HYPRE_Int i, j, k; double *rs, **hh, *c, *s, *rs_2; HYPRE_Int iter; HYPRE_Int my_id, num_procs; double epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm; double w_norm; double epsmac = 1.e-16; double ieee_check = 0.; double guard_zero_residual; double cf_ave_0 = 0.0; double cf_ave_1 = 0.0; double weight; double r_norm_0; double relative_error = 1.0; HYPRE_Int rel_change_passed = 0, num_rel_change_check = 0; double real_r_norm_old, real_r_norm_new; (gmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ guard_zero_residual = 0.0; (*(gmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (gmres_data -> norms); } /* initialize work arrays */ rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); c = hypre_CTAllocF(double,k_dim,gmres_functions); s = hypre_CTAllocF(double,k_dim,gmres_functions); if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); for (i=0; i < k_dim+1; i++) { hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); } (*(gmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(gmres_functions->InnerProd))(b,b)); real_r_norm_old = b_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /* once the rel. change check has passed, we do not want to check it again */ rel_change_passed = 0; /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions); hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { if (!rel_change) /* shouldn't exit after no iterations if * relative change is on*/ { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } } t = 1.0 / r_norm; (*(gmres_functions->ScaleVector))(t,p[0]); i = 0; /***RESTART CYCLE (right-preconditioning) ***/ while (i < k_dim && iter < max_iter) { i++; iter++; (*(gmres_functions->ClearVector))(r); precond(precond_data, A, p[i-1], r); (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]); (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(gmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { if (rel_change && !rel_change_passed) { /* To decide whether to break here: to actually determine the relative change requires the approx solution (so a triangular solve) and a precond. solve - so if we have to do this many times, it will be expensive...(unlike cg where is is relatively straightforward) previously, the intent (there was a bug), was to exit the restart cycle based on the residual norm and check the relative change outside the cycle. Here we will check the relative here as we don't want to exit the restart cycle prematurely */ for (k=0; k<i; k++) /* extra copy of rs so we don't need to change the later solve */ rs_2[k] = rs[k]; /* solve tri. system*/ rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs_2[j]; } t+= rs_2[k]; rs_2[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs_2[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs_2[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* copy current solution (x) to w (don't want to over-write x)*/ (*(gmres_functions->CopyVector))(x,w); /* add the correction */ (*(gmres_functions->Axpy))(1.0,r,w); /* now w is the approx solution - get the norm*/ x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* now get x_i - x_i-1 */ if (num_rel_change_check) { /* have already checked once so we can avoid another precond. solve */ (*(gmres_functions->CopyVector))(w, r); (*(gmres_functions->Axpy))(-1.0, w_2, r); /* now r contains x_i - x_i-1*/ /* save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); } else { /* first time to check rel change*/ /* first save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* now r contains x_i - x_i-1 */ } /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error = w_norm/x_norm; if (relative_error <= r_tol) { rel_change_passed = 1; break; } } else { rel_change_passed = 1; break; } num_rel_change_check++; } else /* no relative change */ { break; } } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(gmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { if (skip_real_r_check) { (gmres_data -> converged) = 1; break; } /* calculate actual residual norm*/ (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if (rel_change && !rel_change_passed) /* calculate the relative change */ { /* calculate the norm of the solution */ x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error= w_norm/x_norm; if ( relative_error < r_tol ) { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else /* don't need to check rel. change */ { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } } else /* conv. has not occurred, according to true residual */ { /* exit if the real residual norm has not decreased */ if (real_r_norm_new >= real_r_norm_old) { if (print_level > 1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } /* report discrepancy between real/GMRES residuals and restart */ if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm); (*(gmres_functions->CopyVector))(r,p[0]); i = 0; real_r_norm_old = real_r_norm_new; } } /* end of convergence check */ /* compute residual vector and continue loop */ for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(gmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(gmres_functions->Axpy))(1.0,p[i],p[0]); } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (gmres_data -> num_iterations) = iter; if (b_norm > 0.0) (gmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (gmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) { hypre_TFreeF(hh[i],gmres_functions); } hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; }