void * hypre_GMRESCreate( hypre_GMRESFunctions *gmres_functions ) { hypre_GMRESData *gmres_data; gmres_data = hypre_CTAllocF(hypre_GMRESData, 1, gmres_functions); gmres_data->functions = gmres_functions; /* set defaults */ (gmres_data -> k_dim) = 5; (gmres_data -> tol) = 1.0e-06; (gmres_data -> cf_tol) = 0.0; (gmres_data -> min_iter) = 0; (gmres_data -> max_iter) = 1000; (gmres_data -> rel_change) = 0; (gmres_data -> stop_crit) = 0; /* rel. residual norm */ (gmres_data -> converged) = 0; (gmres_data -> precond_data) = NULL; (gmres_data -> print_level) = 0; (gmres_data -> logging) = 0; (gmres_data -> p) = NULL; (gmres_data -> r) = NULL; (gmres_data -> w) = NULL; (gmres_data -> matvec_data) = NULL; (gmres_data -> norms) = NULL; (gmres_data -> log_file_name) = NULL; return (void *) gmres_data; }
void * hypre_LGMRESCreate( hypre_LGMRESFunctions *lgmres_functions ) { hypre_LGMRESData *lgmres_data; lgmres_data = hypre_CTAllocF(hypre_LGMRESData, 1, lgmres_functions); lgmres_data->functions = lgmres_functions; /* set defaults */ (lgmres_data -> k_dim) = 20; (lgmres_data -> tol) = 1.0e-06; (lgmres_data -> cf_tol) = 0.0; (lgmres_data -> a_tol) = 0.0; /* abs. residual tol */ (lgmres_data -> min_iter) = 0; (lgmres_data -> max_iter) = 1000; (lgmres_data -> rel_change) = 0; (lgmres_data -> stop_crit) = 0; /* rel. residual norm */ (lgmres_data -> converged) = 0; (lgmres_data -> precond_data) = NULL; (lgmres_data -> print_level) = 0; (lgmres_data -> logging) = 0; (lgmres_data -> p) = NULL; (lgmres_data -> r) = NULL; (lgmres_data -> w) = NULL; (lgmres_data -> w_2) = NULL; (lgmres_data -> matvec_data) = NULL; (lgmres_data -> norms) = NULL; (lgmres_data -> log_file_name) = NULL; /* lgmres specific */ (lgmres_data -> aug_dim) = 2; (lgmres_data -> approx_constant) = 1; return (void *) lgmres_data; }
HYPRE_Int hypre_LGMRESSolve(void *lgmres_vdata, void *A, void *b, void *x) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; HYPRE_Int k_dim = (lgmres_data -> k_dim); HYPRE_Int min_iter = (lgmres_data -> min_iter); HYPRE_Int max_iter = (lgmres_data -> max_iter); HYPRE_Real r_tol = (lgmres_data -> tol); HYPRE_Real cf_tol = (lgmres_data -> cf_tol); HYPRE_Real a_tol = (lgmres_data -> a_tol); void *matvec_data = (lgmres_data -> matvec_data); void *r = (lgmres_data -> r); void *w = (lgmres_data -> w); void **p = (lgmres_data -> p); /* lgmres mod*/ void **aug_vecs = (lgmres_data ->aug_vecs); void **a_aug_vecs = (lgmres_data ->a_aug_vecs); HYPRE_Int *aug_order = (lgmres_data->aug_order); HYPRE_Int aug_dim = (lgmres_data -> aug_dim); HYPRE_Int approx_constant= (lgmres_data ->approx_constant); HYPRE_Int it_arnoldi, aug_ct, it_total, ii, order, it_aug; HYPRE_Int spot = 0; HYPRE_Real tmp_norm, r_norm_last; /*---*/ HYPRE_Int (*precond)(void*,void*,void*,void*) = (lgmres_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data); HYPRE_Int print_level = (lgmres_data -> print_level); HYPRE_Int logging = (lgmres_data -> logging); HYPRE_Real *norms = (lgmres_data -> norms); HYPRE_Int break_value = 0; HYPRE_Int i, j, k; HYPRE_Real *rs, **hh, *c, *s; HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm; HYPRE_Real epsmac = 1.e-16; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; /* We are not checking rel. change for now... */ (lgmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (lgmres_data -> norms); /* not used yet log_file_name = (lgmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays - lgmres includes aug_dim*/ rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */ hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); } (*(lgmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /*lgmres initialization */ for (ii=0; ii<aug_dim; ii++) { aug_order[ii] = 0; } aug_ct = 0; /* number of aug. vectors available */ /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } t = 1.0 / r_norm; r_norm_last = r_norm; (*(lgmres_functions->ScaleVector))(t,p[0]); i = 0; /* lgmres mod: determine number of arnoldi steps to take */ /* if approx_constant then we keep the space the same size even if we don't have the full number of aug vectors yet*/ if (approx_constant) { it_arnoldi = k_dim - aug_ct; } else { it_arnoldi = k_dim - aug_dim; } it_total = it_arnoldi + aug_ct; it_aug = 0; /* keep track of augmented iterations */ /***RESTART CYCLE (right-preconditioning) ***/ while (i < it_total && iter < max_iter) { i++; iter++; (*(lgmres_functions->ClearVector))(r); /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ if ( i <= it_arnoldi) { /* Arnoldi */ precond(precond_data, A, p[i-1], r); (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); } else { /*lgmres aug step */ it_aug ++; order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == order) { spot = ii; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } /* copy a_aug_vecs[spot] to p[i] */ (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]); /*note: an alternate implementation choice would be to only save the AUGVECS and not A_AUGVEC and then apply the PC here to the augvec */ } /*---*/ /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]); (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(lgmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { break; } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } /* form linear combination of p's to get solution */ /* put the new aug_vector in aug_vecs[aug_dim] - a temp position*/ /* i = number of iterations */ /* it_aug = number of augmented iterations */ /* it_arnoldi = number of arnoldi iterations */ /*check if exited early before all arnoldi its */ if (it_arnoldi > i) it_arnoldi = i; if (!it_aug) { (*(lgmres_functions->CopyVector))(p[i-1],w); (*(lgmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(lgmres_functions->Axpy))(rs[j], p[j], w); } else /* need some of the augvecs */ { (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(rs[0],w); /* reg. arnoldi directions */ for (j = 1; j < it_arnoldi; j++) /*first one already done */ { (*(lgmres_functions->Axpy))(rs[j], p[j], w); } /* augment directions */ for (ii=0; ii<it_aug; ii++) { for (j=0; j<aug_dim; j++) { if (aug_order[j] == ii) { spot = j; break; /* must have this because there will be * duplicates before aug_ct = aug_dim */ } } (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w); } } /* grab the new aug vector before the prec*/ (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]); (*(lgmres_functions->ClearVector))(r); /* find correction (in r) (un-wind precond.)*/ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(lgmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { /* calculate actual residual norm*/ (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (lgmres_data -> converged) = 1; break; } else /* conv. has not occurred, according to true residual */ { if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2\n"); (*(lgmres_functions->CopyVector))(r,p[0]); i = 0; } } /* end of convergence check */ /* compute residual vector and continue loop */ /* copy r0 (not scaled) to w*/ (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(r_norm_last,w); for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(lgmres_functions->Axpy))(1.0,p[i],p[0]); } /* lgmres mod */ /* collect aug vector and A*augvector for future restarts - only if we will be restarting (i.e. this cycle performed it_total iterations). ordering starts at 0.*/ if (aug_dim > 0) { if (!aug_ct) { spot = 0; aug_ct++; } else if (aug_ct < aug_dim) { spot = aug_ct; aug_ct++; } else { /* truncate - already have aug_dim number of vectors*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == (aug_dim-1)) { spot = ii; } } } /* aug_vecs[aug_dim] contains new aug vector */ (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]); /*need to normalize */ tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot])); tmp_norm = 1.0/tmp_norm; (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]); /*set new aug vector to order 0 - move all others back one */ for (ii=0; ii < aug_dim; ii++) { aug_order[ii]++; } aug_order[spot] = 0; /*now add the A*aug vector to A_AUGVEC(spot) - this is * independ. of preconditioning type*/ /* A*augvec = V*H*y = r0-rm (r0 is in w and rm is in p[0])*/ (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]); (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/ (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */ (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */ } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (lgmres_data -> num_iterations) = iter; if (b_norm > 0.0) (lgmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (lgmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+1+aug_dim; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; }
HYPRE_Int hypre_LGMRESSetup( void *lgmres_vdata, void *A, void *b, void *x ) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; HYPRE_Int k_dim = (lgmres_data -> k_dim); HYPRE_Int max_iter = (lgmres_data -> max_iter); HYPRE_Int (*precond_setup)(void*,void*,void*,void*) = (lgmres_functions->precond_setup); void *precond_data = (lgmres_data -> precond_data); HYPRE_Int rel_change = (lgmres_data -> rel_change); /* lgmres mod */ HYPRE_Int aug_dim = (lgmres_data -> aug_dim); (lgmres_data -> A) = A; /*-------------------------------------------------- * The arguments for NewVector are important to * maintain consistency between the setup and * compute phases of matvec and the preconditioner. *--------------------------------------------------*/ if ((lgmres_data -> p) == NULL) (lgmres_data -> p) = (void**)(*(lgmres_functions->CreateVectorArray))(k_dim+1,x); if ((lgmres_data -> r) == NULL) (lgmres_data -> r) = (*(lgmres_functions->CreateVector))(b); if ((lgmres_data -> w) == NULL) (lgmres_data -> w) = (*(lgmres_functions->CreateVector))(b); if (rel_change) { if ((lgmres_data -> w_2) == NULL) (lgmres_data -> w_2) = (*(lgmres_functions->CreateVector))(b); } /* lgmres mod */ (lgmres_data -> aug_vecs) = (void**)(*(lgmres_functions->CreateVectorArray))(aug_dim+1,x); /* one extra */ (lgmres_data -> a_aug_vecs) = (void**)(*(lgmres_functions->CreateVectorArray))(aug_dim,x); (lgmres_data -> aug_order) = hypre_CTAllocF(HYPRE_Int,aug_dim,lgmres_functions); /*---*/ if ((lgmres_data -> matvec_data) == NULL) (lgmres_data -> matvec_data) = (*(lgmres_functions->MatvecCreate))(A, x); precond_setup(precond_data, A, b, x); /*----------------------------------------------------- * Allocate space for log info *-----------------------------------------------------*/ if ( (lgmres_data->logging)>0 || (lgmres_data->print_level) > 0 ) { if ((lgmres_data -> norms) == NULL) (lgmres_data -> norms) = hypre_CTAllocF(HYPRE_Real, max_iter + 1,lgmres_functions); } if ( (lgmres_data->print_level) > 0 ) { if ((lgmres_data -> log_file_name) == NULL) (lgmres_data -> log_file_name) = (char*)"gmres.out.log"; } return hypre_error_flag; }