HYPRE_Int HYPRE_ParCSRParaSailsCreate( MPI_Comm comm, HYPRE_Solver *solver ) { Secret *secret; secret = (Secret *) malloc(sizeof(Secret)); if (secret == NULL) { hypre_error(HYPRE_ERROR_MEMORY); return hypre_error_flag; } secret->sym = 1; secret->thresh = 0.1; secret->nlevels = 1; secret->filter = 0.1; secret->loadbal = 0.0; secret->reuse = 0; secret->comm = comm; secret->logging = 0; hypre_ParaSailsCreate(comm, &secret->obj); *solver = (HYPRE_Solver) secret; return hypre_error_flag; }
HYPRE_Int hypre_ParaSailsSetupValues(hypre_ParaSails obj, HYPRE_DistributedMatrix *distmat, HYPRE_Real filter, HYPRE_Real loadbal, HYPRE_Int logging) { Matrix *mat; hypre_ParaSails_struct *internal = (hypre_ParaSails_struct *) obj; HYPRE_Int err; mat = convert_matrix(internal->comm, distmat); internal->ps->loadbal_beta = loadbal; internal->ps->setup_pattern_time = 0.0; err = ParaSailsSetupValues(internal->ps, mat, filter); if (logging) ParaSailsStatsValues(internal->ps, mat); MatrixDestroy(mat); if (err) { hypre_error(HYPRE_ERROR_GENERIC); } return hypre_error_flag; }
HYPRE_Int hypre_ParaSailsSetup(hypre_ParaSails obj, HYPRE_DistributedMatrix *distmat, HYPRE_Int sym, HYPRE_Real thresh, HYPRE_Int nlevels, HYPRE_Real filter, HYPRE_Real loadbal, HYPRE_Int logging) { /* HYPRE_Real cost; */ Matrix *mat; hypre_ParaSails_struct *internal = (hypre_ParaSails_struct *) obj; HYPRE_Int err; mat = convert_matrix(internal->comm, distmat); ParaSailsDestroy(internal->ps); internal->ps = ParaSailsCreate(internal->comm, mat->beg_row, mat->end_row, sym); ParaSailsSetupPattern(internal->ps, mat, thresh, nlevels); if (logging) /* cost = */ ParaSailsStatsPattern(internal->ps, mat); internal->ps->loadbal_beta = loadbal; err = ParaSailsSetupValues(internal->ps, mat, filter); if (logging) ParaSailsStatsValues(internal->ps, mat); MatrixDestroy(mat); if (err) { hypre_error(HYPRE_ERROR_GENERIC); } return hypre_error_flag; }
HYPRE_Int hypre_LGMRESSolve(void *lgmres_vdata, void *A, void *b, void *x) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; HYPRE_Int k_dim = (lgmres_data -> k_dim); HYPRE_Int min_iter = (lgmres_data -> min_iter); HYPRE_Int max_iter = (lgmres_data -> max_iter); HYPRE_Real r_tol = (lgmres_data -> tol); HYPRE_Real cf_tol = (lgmres_data -> cf_tol); HYPRE_Real a_tol = (lgmres_data -> a_tol); void *matvec_data = (lgmres_data -> matvec_data); void *r = (lgmres_data -> r); void *w = (lgmres_data -> w); void **p = (lgmres_data -> p); /* lgmres mod*/ void **aug_vecs = (lgmres_data ->aug_vecs); void **a_aug_vecs = (lgmres_data ->a_aug_vecs); HYPRE_Int *aug_order = (lgmres_data->aug_order); HYPRE_Int aug_dim = (lgmres_data -> aug_dim); HYPRE_Int approx_constant= (lgmres_data ->approx_constant); HYPRE_Int it_arnoldi, aug_ct, it_total, ii, order, it_aug; HYPRE_Int spot = 0; HYPRE_Real tmp_norm, r_norm_last; /*---*/ HYPRE_Int (*precond)(void*,void*,void*,void*) = (lgmres_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data); HYPRE_Int print_level = (lgmres_data -> print_level); HYPRE_Int logging = (lgmres_data -> logging); HYPRE_Real *norms = (lgmres_data -> norms); HYPRE_Int break_value = 0; HYPRE_Int i, j, k; HYPRE_Real *rs, **hh, *c, *s; HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm; HYPRE_Real epsmac = 1.e-16; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; /* We are not checking rel. change for now... */ (lgmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (lgmres_data -> norms); /* not used yet log_file_name = (lgmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays - lgmres includes aug_dim*/ rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */ hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); } (*(lgmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /*lgmres initialization */ for (ii=0; ii<aug_dim; ii++) { aug_order[ii] = 0; } aug_ct = 0; /* number of aug. vectors available */ /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } t = 1.0 / r_norm; r_norm_last = r_norm; (*(lgmres_functions->ScaleVector))(t,p[0]); i = 0; /* lgmres mod: determine number of arnoldi steps to take */ /* if approx_constant then we keep the space the same size even if we don't have the full number of aug vectors yet*/ if (approx_constant) { it_arnoldi = k_dim - aug_ct; } else { it_arnoldi = k_dim - aug_dim; } it_total = it_arnoldi + aug_ct; it_aug = 0; /* keep track of augmented iterations */ /***RESTART CYCLE (right-preconditioning) ***/ while (i < it_total && iter < max_iter) { i++; iter++; (*(lgmres_functions->ClearVector))(r); /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ if ( i <= it_arnoldi) { /* Arnoldi */ precond(precond_data, A, p[i-1], r); (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); } else { /*lgmres aug step */ it_aug ++; order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == order) { spot = ii; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } /* copy a_aug_vecs[spot] to p[i] */ (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]); /*note: an alternate implementation choice would be to only save the AUGVECS and not A_AUGVEC and then apply the PC here to the augvec */ } /*---*/ /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]); (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(lgmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { break; } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } /* form linear combination of p's to get solution */ /* put the new aug_vector in aug_vecs[aug_dim] - a temp position*/ /* i = number of iterations */ /* it_aug = number of augmented iterations */ /* it_arnoldi = number of arnoldi iterations */ /*check if exited early before all arnoldi its */ if (it_arnoldi > i) it_arnoldi = i; if (!it_aug) { (*(lgmres_functions->CopyVector))(p[i-1],w); (*(lgmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(lgmres_functions->Axpy))(rs[j], p[j], w); } else /* need some of the augvecs */ { (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(rs[0],w); /* reg. arnoldi directions */ for (j = 1; j < it_arnoldi; j++) /*first one already done */ { (*(lgmres_functions->Axpy))(rs[j], p[j], w); } /* augment directions */ for (ii=0; ii<it_aug; ii++) { for (j=0; j<aug_dim; j++) { if (aug_order[j] == ii) { spot = j; break; /* must have this because there will be * duplicates before aug_ct = aug_dim */ } } (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w); } } /* grab the new aug vector before the prec*/ (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]); (*(lgmres_functions->ClearVector))(r); /* find correction (in r) (un-wind precond.)*/ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(lgmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { /* calculate actual residual norm*/ (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (lgmres_data -> converged) = 1; break; } else /* conv. has not occurred, according to true residual */ { if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2\n"); (*(lgmres_functions->CopyVector))(r,p[0]); i = 0; } } /* end of convergence check */ /* compute residual vector and continue loop */ /* copy r0 (not scaled) to w*/ (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(r_norm_last,w); for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(lgmres_functions->Axpy))(1.0,p[i],p[0]); } /* lgmres mod */ /* collect aug vector and A*augvector for future restarts - only if we will be restarting (i.e. this cycle performed it_total iterations). ordering starts at 0.*/ if (aug_dim > 0) { if (!aug_ct) { spot = 0; aug_ct++; } else if (aug_ct < aug_dim) { spot = aug_ct; aug_ct++; } else { /* truncate - already have aug_dim number of vectors*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == (aug_dim-1)) { spot = ii; } } } /* aug_vecs[aug_dim] contains new aug vector */ (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]); /*need to normalize */ tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot])); tmp_norm = 1.0/tmp_norm; (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]); /*set new aug vector to order 0 - move all others back one */ for (ii=0; ii < aug_dim; ii++) { aug_order[ii]++; } aug_order[spot] = 0; /*now add the A*aug vector to A_AUGVEC(spot) - this is * independ. of preconditioning type*/ /* A*augvec = V*H*y = r0-rm (r0 is in w and rm is in p[0])*/ (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]); (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/ (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */ (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */ } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (lgmres_data -> num_iterations) = iter; if (b_norm > 0.0) (lgmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (lgmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+1+aug_dim; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; }
HYPRE_Int hypre_BoomerAMGSolve( void *amg_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ HYPRE_Int amg_print_level; HYPRE_Int amg_logging; HYPRE_Int cycle_count; HYPRE_Int num_levels; /* HYPRE_Int num_unknowns; */ HYPRE_Real tol; HYPRE_Int block_mode; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRBlockMatrix **A_block_array; /* Local variables */ HYPRE_Int j; HYPRE_Int Solve_err_flag; HYPRE_Int min_iter; HYPRE_Int max_iter; HYPRE_Int num_procs, my_id; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int simple; HYPRE_Real alpha = 1.0; HYPRE_Real beta = -1.0; HYPRE_Real cycle_op_count; HYPRE_Real total_coeffs; HYPRE_Real total_variables; HYPRE_Real *num_coeffs; HYPRE_Real *num_variables; HYPRE_Real cycle_cmplxty = 0.0; HYPRE_Real operat_cmplxty; HYPRE_Real grid_cmplxty; HYPRE_Real conv_factor = 0.0; HYPRE_Real resid_nrm = 1.0; HYPRE_Real resid_nrm_init = 0.0; HYPRE_Real relative_resid; HYPRE_Real rhs_norm = 0.0; HYPRE_Real old_resid; HYPRE_Real ieee_check = 0.; hypre_ParVector *Vtemp; hypre_ParVector *Residual; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); amg_print_level = hypre_ParAMGDataPrintLevel(amg_data); amg_logging = hypre_ParAMGDataLogging(amg_data); if ( amg_logging > 1 ) Residual = hypre_ParAMGDataResidual(amg_data); /* num_unknowns = hypre_ParAMGDataNumUnknowns(amg_data); */ num_levels = hypre_ParAMGDataNumLevels(amg_data); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); tol = hypre_ParAMGDataTol(amg_data); min_iter = hypre_ParAMGDataMinIter(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); simple = hypre_ParAMGDataSimple(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); A_array[0] = A; F_array[0] = f; U_array[0] = u; block_mode = hypre_ParAMGDataBlockMode(amg_data); A_block_array = hypre_ParAMGDataABlockArray(amg_data); /* Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_array[0]), hypre_ParCSRMatrixGlobalNumRows(A_array[0]), hypre_ParCSRMatrixRowStarts(A_array[0])); hypre_ParVectorInitialize(Vtemp); hypre_ParVectorSetPartitioningOwner(Vtemp,0); hypre_ParAMGDataVtemp(amg_data) = Vtemp; */ Vtemp = hypre_ParAMGDataVtemp(amg_data); /*----------------------------------------------------------------------- * Write the solver parameters *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1) hypre_BoomerAMGWriteSolverParams(amg_data); /*----------------------------------------------------------------------- * Initialize the solver error flag and assorted bookkeeping variables *-----------------------------------------------------------------------*/ Solve_err_flag = 0; total_coeffs = 0; total_variables = 0; cycle_count = 0; operat_cmplxty = 0; grid_cmplxty = 0; /*----------------------------------------------------------------------- * write some initial info *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1 && tol > 0.) hypre_printf("\n\nAMG SOLUTION INFO:\n"); /*----------------------------------------------------------------------- * Compute initial fine-grid residual and print *-----------------------------------------------------------------------*/ if (amg_print_level > 1 || amg_logging > 1) { if ( amg_logging > 1 ) { hypre_ParVectorCopy(F_array[0], Residual ); if (tol > 0) hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParVectorCopy(F_array[0], Vtemp); if (tol > 0) hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (resid_nrm != 0.) ieee_check = resid_nrm/resid_nrm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (amg_print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BoomerAMGSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } resid_nrm_init = resid_nrm; rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f)); if (rhs_norm) { relative_resid = resid_nrm_init / rhs_norm; } else { relative_resid = resid_nrm_init; } } else { relative_resid = 1.; } if (my_id == 0 && amg_print_level > 1) { hypre_printf(" relative\n"); hypre_printf(" residual factor residual\n"); hypre_printf(" -------- ------ --------\n"); hypre_printf(" Initial %e %e\n",resid_nrm_init, relative_resid); } /*----------------------------------------------------------------------- * Main V-cycle loop *-----------------------------------------------------------------------*/ while ((relative_resid >= tol || cycle_count < min_iter) && cycle_count < max_iter) { hypre_ParAMGDataCycleOpCount(amg_data) = 0; /* Op count only needed for one cycle */ if ((additive < 0 || additive >= num_levels) && (mult_additive < 0 || mult_additive >= num_levels) && (simple < 0 || simple >= num_levels) ) hypre_BoomerAMGCycle(amg_data, F_array, U_array); else hypre_BoomerAMGAdditiveCycle(amg_data); /*--------------------------------------------------------------- * Compute fine-grid residual and residual norm *----------------------------------------------------------------*/ if (amg_print_level > 1 || amg_logging > 1 || tol > 0.) { old_resid = resid_nrm; if ( amg_logging > 1 ) { hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } if (old_resid) conv_factor = resid_nrm / old_resid; else conv_factor = resid_nrm; if (rhs_norm) { relative_resid = resid_nrm / rhs_norm; } else { relative_resid = resid_nrm; } hypre_ParAMGDataRelativeResidualNorm(amg_data) = relative_resid; } ++cycle_count; hypre_ParAMGDataNumIterations(amg_data) = cycle_count; #ifdef CUMNUMIT ++hypre_ParAMGDataCumNumIterations(amg_data); #endif if (my_id == 0 && amg_print_level > 1) { hypre_printf(" Cycle %2d %e %f %e \n", cycle_count, resid_nrm, conv_factor, relative_resid); } } if (cycle_count == max_iter && tol > 0.) { Solve_err_flag = 1; hypre_error(HYPRE_ERROR_CONV); } /*----------------------------------------------------------------------- * Compute closing statistics *-----------------------------------------------------------------------*/ if (cycle_count > 0 && resid_nrm_init) conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/(HYPRE_Real) cycle_count)); else conv_factor = 1.; if (amg_print_level > 1) { num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_variables = hypre_CTAlloc(HYPRE_Real, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A); num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A); if (block_mode) { for (j = 1; j < num_levels; j++) { num_coeffs[j] = (HYPRE_Real) hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]); num_variables[j] = (HYPRE_Real) hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[j]); } num_coeffs[0] = hypre_ParCSRBlockMatrixDNumNonzeros(A_block_array[0]); num_variables[0] = hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[0]); } else { for (j = 1; j < num_levels; j++) { num_coeffs[j] = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A_array[j]); num_variables[j] = (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[j]); } } for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { total_coeffs += num_coeffs[j]; total_variables += num_variables[j]; } cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); if (num_variables[0]) grid_cmplxty = total_variables / num_variables[0]; if (num_coeffs[0]) { operat_cmplxty = total_coeffs / num_coeffs[0]; cycle_cmplxty = cycle_op_count / num_coeffs[0]; } if (my_id == 0) { if (Solve_err_flag == 1) { hypre_printf("\n\n=============================================="); hypre_printf("\n NOTE: Convergence tolerance was not achieved\n"); hypre_printf(" within the allowed %d V-cycles\n",max_iter); hypre_printf("=============================================="); } hypre_printf("\n\n Average Convergence Factor = %f",conv_factor); hypre_printf("\n\n Complexity: grid = %f\n",grid_cmplxty); hypre_printf(" operator = %f\n",operat_cmplxty); hypre_printf(" cycle = %f\n\n\n\n",cycle_cmplxty); } hypre_TFree(num_coeffs); hypre_TFree(num_variables); } return hypre_error_flag; }
HYPRE_Int hypre_ParVectorReadIJ( MPI_Comm comm, const char *filename, HYPRE_Int *base_j_ptr, hypre_ParVector **vector_ptr) { HYPRE_Int global_size; hypre_ParVector *vector; hypre_Vector *local_vector; double *local_data; HYPRE_Int *partitioning; HYPRE_Int base_j; HYPRE_Int myid, num_procs, i, j, J; char new_filename[255]; FILE *file; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &myid); hypre_sprintf(new_filename,"%s.%05d", filename, myid); if ((file = fopen(new_filename, "r")) == NULL) { hypre_printf("Error: can't open output file %s\n", new_filename); hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } hypre_fscanf(file, "%d", &global_size); #ifdef HYPRE_NO_GLOBAL_PARTITION /* this may need to be changed so that the base is available in the file! */ partitioning = hypre_CTAlloc(HYPRE_Int,2); hypre_fscanf(file, "%d", partitioning); for (i = 0; i < 2; i++) { hypre_fscanf(file, "%d", partitioning+i); } #else partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1); hypre_fscanf(file, "%d", partitioning); for (i = 1; i <= num_procs; i++) { hypre_fscanf(file, "%d", partitioning+i); partitioning[i] -= partitioning[0]; } base_j = partitioning[0]; partitioning[0] = 0; #endif vector = hypre_ParVectorCreate(comm, global_size, partitioning); hypre_ParVectorInitialize(vector); local_vector = hypre_ParVectorLocalVector(vector); local_data = hypre_VectorData(local_vector); #ifdef HYPRE_NO_GLOBAL_PARTITION for (j = 0; j < partitioning[1] - partitioning[0]; j++) #else for (j = 0; j < partitioning[myid+1] - partitioning[myid]; j++) #endif { hypre_fscanf(file, "%d %le", &J, local_data + j); } fclose(file); *base_j_ptr = base_j; *vector_ptr = vector; /* multivector code not written yet >>> */ hypre_assert( hypre_ParVectorNumVectors(vector) == 1 ); if ( hypre_ParVectorNumVectors(vector) != 1 ) hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; }
HYPRE_Int hypre_BiCGSTABSolve(void *bicgstab_vdata, void *A, void *b, void *x) { hypre_BiCGSTABData *bicgstab_data = (hypre_BiCGSTABData*)bicgstab_vdata; hypre_BiCGSTABFunctions *bicgstab_functions = bicgstab_data->functions; HYPRE_Int min_iter = (bicgstab_data -> min_iter); HYPRE_Int max_iter = (bicgstab_data -> max_iter); HYPRE_Int stop_crit = (bicgstab_data -> stop_crit); HYPRE_Real r_tol = (bicgstab_data -> tol); HYPRE_Real cf_tol = (bicgstab_data -> cf_tol); void *matvec_data = (bicgstab_data -> matvec_data); HYPRE_Real a_tol = (bicgstab_data -> a_tol); void *r = (bicgstab_data -> r); void *r0 = (bicgstab_data -> r0); void *s = (bicgstab_data -> s); void *v = (bicgstab_data -> v); void *p = (bicgstab_data -> p); void *q = (bicgstab_data -> q); HYPRE_Int (*precond)(void*,void*,void*,void*) = (bicgstab_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(bicgstab_data -> precond_data); /* logging variables */ HYPRE_Int logging = (bicgstab_data -> logging); HYPRE_Int print_level = (bicgstab_data -> print_level); HYPRE_Real *norms = (bicgstab_data -> norms); /* char *log_file_name = (bicgstab_data -> log_file_name); FILE *fp; */ HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real alpha, beta, gamma, epsilon, temp, res, r_norm, b_norm; HYPRE_Real epsmac = 1.e-128; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; HYPRE_Real den_norm; HYPRE_Real gamma_numer; HYPRE_Real gamma_denom; (bicgstab_data -> converged) = 0; (*(bicgstab_functions->CommInfo))(A,&my_id,&num_procs); if (logging > 0 || print_level > 0) { norms = (bicgstab_data -> norms); /* log_file_name = (bicgstab_data -> log_file_name); fp = fopen(log_file_name,"w"); */ } /* initialize work arrays */ (*(bicgstab_functions->CopyVector))(b,r0); /* compute initial residual */ (*(bicgstab_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, r0); (*(bicgstab_functions->CopyVector))(r0,r); (*(bicgstab_functions->CopyVector))(r0,p); b_norm = sqrt((*(bicgstab_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } res = (*(bicgstab_functions->InnerProd))(r0,r0); r_norm = sqrt(res); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if (logging > 0 || print_level > 0) { norms[0] = r_norm; if (print_level > 0 && my_id == 0) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i| <= r_tol*|b| if |b| > 0 */ den_norm = b_norm; } else { /* convergence criterion |r_i| <= r_tol*|r0| if |b| = 0 */ den_norm = r_norm; }; /* convergence criterion |r_i| <= r_tol/a_tol , absolute residual norm*/ if (stop_crit) { if (a_tol == 0.0) /* this is for backwards compatibility (accomodating setting stop_crit to 1, but not setting a_tol) - eventually we will get rid of the stop_crit flag as with GMRES */ epsilon = r_tol; else epsilon = a_tol; /* this means new interface fcn called */ } else /* default convergence test (stop_crit = 0)*/ { /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user also specifies a_tol or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol, r_tol*den_norm); } if (print_level > 0 && my_id == 0) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); } } (bicgstab_data -> num_iterations) = iter; if (b_norm > 0.0) (bicgstab_data -> rel_residual_norm) = r_norm/b_norm; /* check for convergence before starting */ if (r_norm == 0.0) { return hypre_error_flag; } else if (r_norm <= epsilon && iter >= min_iter) { if (print_level > 0 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Tolerance and min_iter requirements satisfied by initial data.\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (bicgstab_data -> converged) = 1; return hypre_error_flag; } /* Start BiCGStab iterations */ while (iter < max_iter) { iter++; (*(bicgstab_functions->ClearVector))(v); precond(precond_data, A, p, v); (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,q); temp = (*(bicgstab_functions->InnerProd))(r0,q); if (fabs(temp) >= epsmac) alpha = res/temp; else { hypre_printf("BiCGSTAB broke down!! divide by near zero\n"); return(1); } (*(bicgstab_functions->Axpy))(alpha,v,x); (*(bicgstab_functions->Axpy))(-alpha,q,r); (*(bicgstab_functions->ClearVector))(v); precond(precond_data, A, r, v); (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,s); /* Handle case when gamma = 0.0/0.0 as 0.0 and not NAN */ gamma_numer = (*(bicgstab_functions->InnerProd))(r,s); gamma_denom = (*(bicgstab_functions->InnerProd))(s,s); if ((gamma_numer == 0.0) && (gamma_denom == 0.0)) gamma = 0.0; else gamma= gamma_numer/gamma_denom; (*(bicgstab_functions->Axpy))(gamma,v,x); (*(bicgstab_functions->Axpy))(-gamma,s,r); /* residual is now updated, must immediately check for convergence */ r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r)); if (logging > 0 || print_level > 0) { norms[iter] = r_norm; } if (print_level > 0 && my_id == 0) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter], norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } /* check for convergence, evaluate actual residual */ if (r_norm <= epsilon && iter >= min_iter) { (*(bicgstab_functions->CopyVector))(b,r); (*(bicgstab_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if (print_level > 0 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (bicgstab_data -> converged) = 1; break; } } /*-------------------------------------------------------------------- * Optional test to see if adequate progress is being made. * The average convergence factor is recorded and compared * against the tolerance 'cf_tol'. The weighting factor is * intended to pay more attention to the test when an accurate * estimate for average convergence factor is available. *--------------------------------------------------------------------*/ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; if (weight * cf_ave_1 > cf_tol) break; } if (fabs(res) >= epsmac) beta = 1.0/res; else { hypre_printf("BiCGSTAB broke down!! res=0 \n"); return(2); } res = (*(bicgstab_functions->InnerProd))(r0,r); beta *= res; (*(bicgstab_functions->Axpy))(-gamma,q,p); if (fabs(gamma) >= epsmac) (*(bicgstab_functions->ScaleVector))((beta*alpha/gamma),p); else { hypre_printf("BiCGSTAB broke down!! gamma=0 \n"); return(3); } (*(bicgstab_functions->Axpy))(1.0,r,p); } /* end while loop */ (bicgstab_data -> num_iterations) = iter; if (b_norm > 0.0) (bicgstab_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (bicgstab_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); return hypre_error_flag; }
HYPRE_Int HYPRE_SStructSplitSetup( HYPRE_SStructSolver solver, HYPRE_SStructMatrix A, HYPRE_SStructVector b, HYPRE_SStructVector x ) { hypre_SStructVector *y; HYPRE_Int nparts; HYPRE_Int *nvars; void ****smatvec_data; HYPRE_Int (***ssolver_solve)(); HYPRE_Int (***ssolver_destroy)(); void ***ssolver_data; HYPRE_Int ssolver = (solver -> ssolver); MPI_Comm comm; hypre_SStructGrid *grid; hypre_SStructPMatrix *pA; hypre_SStructPVector *px; hypre_SStructPVector *py; hypre_StructMatrix *sA; hypre_StructVector *sx; hypre_StructVector *sy; HYPRE_StructMatrix sAH; HYPRE_StructVector sxH; HYPRE_StructVector syH; HYPRE_Int (*ssolve)(); HYPRE_Int (*sdestroy)(); void *sdata; HYPRE_Int part, vi, vj; comm = hypre_SStructVectorComm(b); grid = hypre_SStructVectorGrid(b); HYPRE_SStructVectorCreate(comm, grid, &y); HYPRE_SStructVectorInitialize(y); HYPRE_SStructVectorAssemble(y); nparts = hypre_SStructMatrixNParts(A); nvars = hypre_TAlloc(HYPRE_Int, nparts); smatvec_data = hypre_TAlloc(void ***, nparts); ssolver_solve = (HYPRE_Int (***)()) hypre_MAlloc((sizeof(HYPRE_Int (**)()) * nparts)); ssolver_destroy = (HYPRE_Int (***)()) hypre_MAlloc((sizeof(HYPRE_Int (**)()) * nparts)); ssolver_data = hypre_TAlloc(void **, nparts); for (part = 0; part < nparts; part++) { pA = hypre_SStructMatrixPMatrix(A, part); px = hypre_SStructVectorPVector(x, part); py = hypre_SStructVectorPVector(y, part); nvars[part] = hypre_SStructPMatrixNVars(pA); smatvec_data[part] = hypre_TAlloc(void **, nvars[part]); ssolver_solve[part] = (HYPRE_Int (**)()) hypre_MAlloc((sizeof(HYPRE_Int (*)()) * nvars[part])); ssolver_destroy[part] = (HYPRE_Int (**)()) hypre_MAlloc((sizeof(HYPRE_Int (*)()) * nvars[part])); ssolver_data[part] = hypre_TAlloc(void *, nvars[part]); for (vi = 0; vi < nvars[part]; vi++) { smatvec_data[part][vi] = hypre_TAlloc(void *, nvars[part]); for (vj = 0; vj < nvars[part]; vj++) { sA = hypre_SStructPMatrixSMatrix(pA, vi, vj); sx = hypre_SStructPVectorSVector(px, vj); smatvec_data[part][vi][vj] = NULL; if (sA != NULL) { smatvec_data[part][vi][vj] = hypre_StructMatvecCreate(); hypre_StructMatvecSetup(smatvec_data[part][vi][vj], sA, sx); } } sA = hypre_SStructPMatrixSMatrix(pA, vi, vi); sx = hypre_SStructPVectorSVector(px, vi); sy = hypre_SStructPVectorSVector(py, vi); sAH = (HYPRE_StructMatrix) sA; sxH = (HYPRE_StructVector) sx; syH = (HYPRE_StructVector) sy; switch(ssolver) { default: /* If no solver is matched, use Jacobi, but throw and error */ if (ssolver != HYPRE_Jacobi) { hypre_error(HYPRE_ERROR_GENERIC); } /* don't break */ case HYPRE_Jacobi: HYPRE_StructJacobiCreate(comm, (HYPRE_StructSolver *)&sdata); HYPRE_StructJacobiSetMaxIter(sdata, 1); HYPRE_StructJacobiSetTol(sdata, 0.0); if (solver -> zero_guess) { HYPRE_StructJacobiSetZeroGuess(sdata); } HYPRE_StructJacobiSetup(sdata, sAH, syH, sxH); ssolve = HYPRE_StructJacobiSolve; sdestroy = HYPRE_StructJacobiDestroy; break; case HYPRE_SMG: HYPRE_StructSMGCreate(comm, (HYPRE_StructSolver *)&sdata); HYPRE_StructSMGSetMemoryUse(sdata, 0); HYPRE_StructSMGSetMaxIter(sdata, 1); HYPRE_StructSMGSetTol(sdata, 0.0); if (solver -> zero_guess) { HYPRE_StructSMGSetZeroGuess(sdata); } HYPRE_StructSMGSetNumPreRelax(sdata, 1); HYPRE_StructSMGSetNumPostRelax(sdata, 1); HYPRE_StructSMGSetLogging(sdata, 0); HYPRE_StructSMGSetPrintLevel(sdata, 0); HYPRE_StructSMGSetup(sdata, sAH, syH, sxH); ssolve = HYPRE_StructSMGSolve; sdestroy = HYPRE_StructSMGDestroy; break; case HYPRE_PFMG: HYPRE_StructPFMGCreate(comm, (HYPRE_StructSolver *)&sdata); HYPRE_StructPFMGSetMaxIter(sdata, 1); HYPRE_StructPFMGSetTol(sdata, 0.0); if (solver -> zero_guess) { HYPRE_StructPFMGSetZeroGuess(sdata); } HYPRE_StructPFMGSetRelaxType(sdata, 1); HYPRE_StructPFMGSetNumPreRelax(sdata, 1); HYPRE_StructPFMGSetNumPostRelax(sdata, 1); HYPRE_StructPFMGSetLogging(sdata, 0); HYPRE_StructPFMGSetPrintLevel(sdata, 0); HYPRE_StructPFMGSetup(sdata, sAH, syH, sxH); ssolve = HYPRE_StructPFMGSolve; sdestroy = HYPRE_StructPFMGDestroy; break; } ssolver_solve[part][vi] = ssolve; ssolver_destroy[part][vi] = sdestroy; ssolver_data[part][vi] = sdata; } } (solver -> y) = y; (solver -> nparts) = nparts; (solver -> nvars) = nvars; (solver -> smatvec_data) = smatvec_data; (solver -> ssolver_solve) = ssolver_solve; (solver -> ssolver_destroy) = ssolver_destroy; (solver -> ssolver_data) = ssolver_data; if ((solver -> tol) > 0.0) { hypre_SStructMatvecCreate(&(solver -> matvec_data)); hypre_SStructMatvecSetup((solver -> matvec_data), A, x); } return hypre_error_flag; }
int HYPRE_IJVectorCreate( MPI_Comm comm, HYPRE_BigInt jlower, HYPRE_BigInt jupper, HYPRE_IJVector *vector ) { hypre_IJVector *vec; int num_procs, my_id; HYPRE_BigInt *partitioning; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_BigInt row0, rowN; #else HYPRE_BigInt *recv_buf; HYPRE_BigInt *info; int i, i2; #endif vec = hypre_CTAlloc(hypre_IJVector, 1); if (!vec) { printf("Out of memory -- HYPRE_IJVectorCreate\n"); hypre_error(HYPRE_ERROR_MEMORY); return hypre_error_flag; } MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm, &my_id); if (jlower > jupper+1 || jlower < 0) { hypre_error_in_arg(2); return hypre_error_flag; } if (jupper < -1) { hypre_error_in_arg(3); return hypre_error_flag; } #ifdef HYPRE_NO_GLOBAL_PARTITION partitioning = hypre_CTAlloc(HYPRE_BigInt, 2); partitioning[0] = jlower; partitioning[1] = jupper+1; /* now we need the global number of rows as well as the global first row index */ /* proc 0 has the first row */ if (my_id==0) { row0 = jlower; } MPI_Bcast(&row0, 1, MPI_HYPRE_BIG_INT, 0, comm); /* proc (num_procs-1) has the last row */ if (my_id == (num_procs-1)) { rowN = jupper; } MPI_Bcast(&rowN, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm); hypre_IJVectorGlobalFirstRow(vec) = row0; hypre_IJVectorGlobalNumRows(vec) = rowN - row0 + 1; #else info = hypre_CTAlloc(HYPRE_BigInt,2); recv_buf = hypre_CTAlloc(HYPRE_BigInt, 2*num_procs); partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1); info[0] = jlower; info[1] = jupper; MPI_Allgather(info, 2, MPI_HYPRE_BIG_INT, recv_buf, 2, MPI_HYPRE_BIG_INT, comm); partitioning[0] = recv_buf[0]; for (i=0; i < num_procs-1; i++) { i2 = i+i; if (recv_buf[i2+1] != (recv_buf[i2+2]-1)) { printf("Inconsistent partitioning -- HYPRE_IJVectorCreate\n"); hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } else partitioning[i+1] = recv_buf[i2+2]; } i2 = (num_procs-1)*2; partitioning[num_procs] = recv_buf[i2+1]+1; hypre_TFree(info); hypre_TFree(recv_buf); hypre_IJVectorGlobalFirstRow(vec) = partitioning[0]; hypre_IJVectorGlobalNumRows(vec)= partitioning[num_procs]-partitioning[0]; #endif hypre_IJVectorComm(vec) = comm; hypre_IJVectorPartitioning(vec) = partitioning; hypre_IJVectorObjectType(vec) = HYPRE_UNITIALIZED; hypre_IJVectorObject(vec) = NULL; hypre_IJVectorTranslator(vec) = NULL; *vector = (HYPRE_IJVector) vec; return hypre_error_flag; }
HYPRE_Int hypre_GMRESSolve(void *gmres_vdata, void *A, void *b, void *x) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; HYPRE_Int k_dim = (gmres_data -> k_dim); HYPRE_Int min_iter = (gmres_data -> min_iter); HYPRE_Int max_iter = (gmres_data -> max_iter); HYPRE_Int rel_change = (gmres_data -> rel_change); HYPRE_Int skip_real_r_check = (gmres_data -> skip_real_r_check); double r_tol = (gmres_data -> tol); double cf_tol = (gmres_data -> cf_tol); double a_tol = (gmres_data -> a_tol); void *matvec_data = (gmres_data -> matvec_data); void *r = (gmres_data -> r); void *w = (gmres_data -> w); /* note: w_2 is only allocated if rel_change = 1 */ void *w_2 = (gmres_data -> w_2); void **p = (gmres_data -> p); HYPRE_Int (*precond)() = (gmres_functions -> precond); HYPRE_Int *precond_data = (gmres_data -> precond_data); HYPRE_Int print_level = (gmres_data -> print_level); HYPRE_Int logging = (gmres_data -> logging); double *norms = (gmres_data -> norms); /* not used yet char *log_file_name = (gmres_data -> log_file_name);*/ /* FILE *fp; */ HYPRE_Int break_value = 0; HYPRE_Int i, j, k; double *rs, **hh, *c, *s, *rs_2; HYPRE_Int iter; HYPRE_Int my_id, num_procs; double epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm; double w_norm; double epsmac = 1.e-16; double ieee_check = 0.; double guard_zero_residual; double cf_ave_0 = 0.0; double cf_ave_1 = 0.0; double weight; double r_norm_0; double relative_error = 1.0; HYPRE_Int rel_change_passed = 0, num_rel_change_check = 0; double real_r_norm_old, real_r_norm_new; (gmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ guard_zero_residual = 0.0; (*(gmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (gmres_data -> norms); } /* initialize work arrays */ rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); c = hypre_CTAllocF(double,k_dim,gmres_functions); s = hypre_CTAllocF(double,k_dim,gmres_functions); if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); for (i=0; i < k_dim+1; i++) { hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); } (*(gmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(gmres_functions->InnerProd))(b,b)); real_r_norm_old = b_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /* once the rel. change check has passed, we do not want to check it again */ rel_change_passed = 0; /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions); hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { if (!rel_change) /* shouldn't exit after no iterations if * relative change is on*/ { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } } t = 1.0 / r_norm; (*(gmres_functions->ScaleVector))(t,p[0]); i = 0; /***RESTART CYCLE (right-preconditioning) ***/ while (i < k_dim && iter < max_iter) { i++; iter++; (*(gmres_functions->ClearVector))(r); precond(precond_data, A, p[i-1], r); (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]); (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(gmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { if (rel_change && !rel_change_passed) { /* To decide whether to break here: to actually determine the relative change requires the approx solution (so a triangular solve) and a precond. solve - so if we have to do this many times, it will be expensive...(unlike cg where is is relatively straightforward) previously, the intent (there was a bug), was to exit the restart cycle based on the residual norm and check the relative change outside the cycle. Here we will check the relative here as we don't want to exit the restart cycle prematurely */ for (k=0; k<i; k++) /* extra copy of rs so we don't need to change the later solve */ rs_2[k] = rs[k]; /* solve tri. system*/ rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs_2[j]; } t+= rs_2[k]; rs_2[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs_2[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs_2[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* copy current solution (x) to w (don't want to over-write x)*/ (*(gmres_functions->CopyVector))(x,w); /* add the correction */ (*(gmres_functions->Axpy))(1.0,r,w); /* now w is the approx solution - get the norm*/ x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* now get x_i - x_i-1 */ if (num_rel_change_check) { /* have already checked once so we can avoid another precond. solve */ (*(gmres_functions->CopyVector))(w, r); (*(gmres_functions->Axpy))(-1.0, w_2, r); /* now r contains x_i - x_i-1*/ /* save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); } else { /* first time to check rel change*/ /* first save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* now r contains x_i - x_i-1 */ } /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error = w_norm/x_norm; if (relative_error <= r_tol) { rel_change_passed = 1; break; } } else { rel_change_passed = 1; break; } num_rel_change_check++; } else /* no relative change */ { break; } } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(gmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { if (skip_real_r_check) { (gmres_data -> converged) = 1; break; } /* calculate actual residual norm*/ (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if (rel_change && !rel_change_passed) /* calculate the relative change */ { /* calculate the norm of the solution */ x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error= w_norm/x_norm; if ( relative_error < r_tol ) { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else /* don't need to check rel. change */ { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } } else /* conv. has not occurred, according to true residual */ { /* exit if the real residual norm has not decreased */ if (real_r_norm_new >= real_r_norm_old) { if (print_level > 1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } /* report discrepancy between real/GMRES residuals and restart */ if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm); (*(gmres_functions->CopyVector))(r,p[0]); i = 0; real_r_norm_old = real_r_norm_new; } } /* end of convergence check */ /* compute residual vector and continue loop */ for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(gmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(gmres_functions->Axpy))(1.0,p[i],p[0]); } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (gmres_data -> num_iterations) = iter; if (b_norm > 0.0) (gmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (gmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) { hypre_TFreeF(hh[i],gmres_functions); } hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; }
HYPRE_Int HYPRE_IJMatrixCreate( MPI_Comm comm, HYPRE_Int ilower, HYPRE_Int iupper, HYPRE_Int jlower, HYPRE_Int jupper, HYPRE_IJMatrix *matrix ) { HYPRE_Int *row_partitioning; HYPRE_Int *col_partitioning; HYPRE_Int *info; HYPRE_Int num_procs; HYPRE_Int myid; hypre_IJMatrix *ijmatrix; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int row0, col0, rowN, colN; #else HYPRE_Int *recv_buf; HYPRE_Int i, i4; HYPRE_Int square; #endif ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1); hypre_IJMatrixComm(ijmatrix) = comm; hypre_IJMatrixObject(ijmatrix) = NULL; hypre_IJMatrixTranslator(ijmatrix) = NULL; hypre_IJMatrixObjectType(ijmatrix) = HYPRE_UNITIALIZED; hypre_IJMatrixAssembleFlag(ijmatrix) = 0; hypre_IJMatrixPrintLevel(ijmatrix) = 0; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm, &myid); if (ilower > iupper+1 || ilower < 0) { hypre_error_in_arg(2); hypre_TFree(ijmatrix); return hypre_error_flag; } if (iupper < -1) { hypre_error_in_arg(3); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jlower > jupper+1 || jlower < 0) { hypre_error_in_arg(4); hypre_TFree(ijmatrix); return hypre_error_flag; } if (jupper < -1) { hypre_error_in_arg(5); hypre_TFree(ijmatrix); return hypre_error_flag; } #ifdef HYPRE_NO_GLOBAL_PARTITION info = hypre_CTAlloc(HYPRE_Int,2); row_partitioning = hypre_CTAlloc(HYPRE_Int, 2); col_partitioning = hypre_CTAlloc(HYPRE_Int, 2); row_partitioning[0] = ilower; row_partitioning[1] = iupper+1; col_partitioning[0] = jlower; col_partitioning[1] = jupper+1; /* now we need the global number of rows and columns as well as the global first row and column index */ /* proc 0 has the first row and col */ if (myid==0) { info[0] = ilower; info[1] = jlower; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm); row0 = info[0]; col0 = info[1]; /* proc (num_procs-1) has the last row and col */ if (myid == (num_procs-1)) { info[0] = iupper; info[1] = jupper; } hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm); rowN = info[0]; colN = info[1]; hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0; hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1; hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1; hypre_TFree(info); #else info = hypre_CTAlloc(HYPRE_Int,4); recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs); row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1); info[0] = ilower; info[1] = iupper; info[2] = jlower; info[3] = jupper; /* Generate row- and column-partitioning through information exchange across all processors, check whether the matrix is square, and if the partitionings match. i.e. no overlaps or gaps, if there are overlaps or gaps in the row partitioning or column partitioning , ierr will be set to -9 or -10, respectively */ hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm); row_partitioning[0] = recv_buf[0]; square = 1; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) ) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); return hypre_error_flag; } else row_partitioning[i+1] = recv_buf[i4+4]; if ((square && (recv_buf[i4] != recv_buf[i4+2])) || (recv_buf[i4+1] != recv_buf[i4+3]) ) { square = 0; } } i4 = (num_procs-1)*4; row_partitioning[num_procs] = recv_buf[i4+1]+1; if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) square = 0; if (square) col_partitioning = row_partitioning; else { col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1); col_partitioning[0] = recv_buf[2]; for (i=0; i < num_procs-1; i++) { i4 = 4*i; if (recv_buf[i4+3] != recv_buf[i4+6]-1) { hypre_error(HYPRE_ERROR_GENERIC); hypre_TFree(ijmatrix); hypre_TFree(info); hypre_TFree(recv_buf); hypre_TFree(row_partitioning); hypre_TFree(col_partitioning); return hypre_error_flag; } else col_partitioning[i+1] = recv_buf[i4+6]; } col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1; } hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0]; hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0]; hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - row_partitioning[0]; hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - col_partitioning[0]; hypre_TFree(info); hypre_TFree(recv_buf); #endif hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning; hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning; *matrix = (HYPRE_IJMatrix) ijmatrix; return hypre_error_flag; }