HYPRE_Int hypre_FillResponseParToVectorAll( void *p_recv_contact_buf, HYPRE_Int contact_size, HYPRE_Int contact_proc, void *ro, MPI_Comm comm, void **p_send_response_buf, HYPRE_Int *response_message_size ) { HYPRE_Int myid; HYPRE_Int i, index, count, elength; HYPRE_Int *recv_contact_buf = (HYPRE_Int * ) p_recv_contact_buf; hypre_DataExchangeResponse *response_obj = ro; hypre_ProcListElements *send_proc_obj = response_obj->data2; hypre_MPI_Comm_rank(comm, &myid ); /*check to see if we need to allocate more space in send_proc_obj for ids*/ if (send_proc_obj->length == send_proc_obj->storage_length) { send_proc_obj->storage_length +=10; /*add space for 10 more processors*/ send_proc_obj->id = hypre_TReAlloc(send_proc_obj->id,HYPRE_Int, send_proc_obj->storage_length); send_proc_obj->vec_starts = hypre_TReAlloc(send_proc_obj->vec_starts,HYPRE_Int, send_proc_obj->storage_length + 1); } /*initialize*/ count = send_proc_obj->length; index = send_proc_obj->vec_starts[count]; /*this is the number of elements*/ /*send proc*/ send_proc_obj->id[count] = contact_proc; /*do we need more storage for the elements?*/ if (send_proc_obj->element_storage_length < index + contact_size) { elength = hypre_max(contact_size, 10); elength += index; send_proc_obj->elements = hypre_TReAlloc(send_proc_obj->elements, HYPRE_Int, elength); send_proc_obj->element_storage_length = elength; } /*populate send_proc_obj*/ for (i=0; i< contact_size; i++) { send_proc_obj->elements[index++] = recv_contact_buf[i]; } send_proc_obj->vec_starts[count+1] = index; send_proc_obj->length++; /*output - no message to return (confirmation) */ *response_message_size = 0; return hypre_error_flag; }
int hypre_SMGSetNumPreRelax( void *smg_vdata, int num_pre_relax ) { hypre_SMGData *smg_data = smg_vdata; int ierr = 0; (smg_data -> num_pre_relax) = hypre_max(num_pre_relax,1); return ierr; }
int hypre_SMGRelaxSetNumPreRelax( void *relax_vdata, int num_pre_relax ) { hypre_SMGRelaxData *relax_data = relax_vdata; int ierr = 0; (relax_data -> num_pre_relax) = hypre_max(num_pre_relax,1); return ierr; }
HYPRE_Int hypre_IntersectBoxes( hypre_Box *box1, hypre_Box *box2, hypre_Box *ibox ) { HYPRE_Int d; /* find x, y, and z bounds */ for (d = 0; d < 3; d++) { hypre_BoxIMinD(ibox, d) = hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d)); hypre_BoxIMaxD(ibox, d) = hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d)); } return hypre_error_flag; }
int hypre_IntersectBoxes( hypre_Box *box1, hypre_Box *box2, hypre_Box *ibox ) { int ierr = 0; int d; /* find x, y, and z bounds */ for (d = 0; d < 3; d++) { hypre_BoxIMinD(ibox, d) = hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d)); hypre_BoxIMaxD(ibox, d) = hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d)); } return ierr; }
int hypre_BoxNeighborsAssemble( hypre_BoxNeighbors *neighbors, int max_distance, int prune ) { hypre_BoxArray *boxes; int *procs; int *ids; int first_local; int num_local; int num_periodic; int keep_box; int num_boxes; hypre_RankLink *rank_link; hypre_Box *local_box; hypre_Box *neighbor_box; int distance; int distance_index[3]; int diff; int i, j, d, ilocal, inew; int ierr = 0; /*--------------------------------------------- * Find neighboring boxes *---------------------------------------------*/ boxes = hypre_BoxNeighborsBoxes(neighbors); procs = hypre_BoxNeighborsProcs(neighbors); ids = hypre_BoxNeighborsIDs(neighbors); first_local = hypre_BoxNeighborsFirstLocal(neighbors); num_local = hypre_BoxNeighborsNumLocal(neighbors); num_periodic = hypre_BoxNeighborsNumPeriodic(neighbors); /*--------------------------------------------- * Find neighboring boxes *---------------------------------------------*/ inew = 0; num_boxes = 0; hypre_ForBoxI(i, boxes) { keep_box = 0; for (j = 0; j < num_local + num_periodic; j++) { ilocal = first_local + j; if (i != ilocal) { local_box = hypre_BoxArrayBox(boxes, ilocal); neighbor_box = hypre_BoxArrayBox(boxes, i); /* compute distance info */ distance = 0; for (d = 0; d < 3; d++) { distance_index[d] = 0; diff = hypre_BoxIMinD(neighbor_box, d) - hypre_BoxIMaxD(local_box, d); if (diff > 0) { distance_index[d] = 1; distance = hypre_max(distance, diff); } diff = hypre_BoxIMinD(local_box, d) - hypre_BoxIMaxD(neighbor_box, d); if (diff > 0) { distance_index[d] = -1; distance = hypre_max(distance, diff); } } /* create new rank_link */ if (distance <= max_distance) { keep_box = 1; if (j < num_local) { hypre_RankLinkCreate(num_boxes, &rank_link); hypre_RankLinkNext(rank_link) = hypre_BoxNeighborsRankLink(neighbors, j, distance_index[0], distance_index[1], distance_index[2]); hypre_BoxNeighborsRankLink(neighbors, j, distance_index[0], distance_index[1], distance_index[2]) = rank_link; } } } else { keep_box = 1; } } if (prune) { /* use procs array to store which boxes to keep */ if (keep_box) { procs[i] = -procs[i]; if (inew < i) { procs[inew] = i; } inew = i + 1; num_boxes++; } } else { /* keep all of the boxes */ num_boxes++; } }
int hypre_GMRESSolve(void *gmres_vdata, void *A, void *b, void *x) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; int k_dim = (gmres_data -> k_dim); int min_iter = (gmres_data -> min_iter); int max_iter = (gmres_data -> max_iter); int rel_change = (gmres_data -> rel_change); int stop_crit = (gmres_data -> stop_crit); double accuracy = (gmres_data -> tol); double cf_tol = (gmres_data -> cf_tol); void *matvec_data = (gmres_data -> matvec_data); void *r = (gmres_data -> r); void *w = (gmres_data -> w); void **p = (gmres_data -> p); int (*precond)(void*, void*, void*, void*) = (gmres_functions -> precond); int *precond_data = (gmres_data -> precond_data); int print_level = (gmres_data -> print_level); int logging = (gmres_data -> logging); double *norms = (gmres_data -> norms); /* not used yet char *log_file_name = (gmres_data -> log_file_name);*/ /* FILE *fp; */ int ierr = 0; int break_value = 0; int i, j, k; double *rs, **hh, *c, *s; int iter; int my_id, num_procs; double epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm; double epsmac = 1.e-16; double ieee_check = 0.; double guard_zero_residual; double cf_ave_0 = 0.0; double cf_ave_1 = 0.0; double weight; double r_norm_0; double relative_error; (gmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ guard_zero_residual = 0.0; (*(gmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (gmres_data -> norms); /* not used yet log_file_name = (gmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays */ rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); c = hypre_CTAllocF(double,k_dim,gmres_functions); s = hypre_CTAllocF(double,k_dim,gmres_functions); hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); for (i=0; i < k_dim+1; i++) { hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); } (*(gmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(gmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { printf("\n\nERROR detected by Hypre ... BEGIN\n"); printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); printf("User probably placed non-numerics in supplied b.\n"); printf("Returning error flag += 101. Program not terminated.\n"); printf("ERROR detected by Hypre ... END\n\n\n"); } ierr += 101; return ierr; } r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { printf("\n\nERROR detected by Hypre ... BEGIN\n"); printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); printf("User probably placed non-numerics in supplied A or x_0.\n"); printf("Returning error flag += 101. Program not terminated.\n"); printf("ERROR detected by Hypre ... END\n\n\n"); } ierr += 101; return ierr; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) printf("Rel_resid_norm actually contains the residual norm\n"); printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; epsilon= accuracy; /* convergence criterion |r_i| <= accuracy , absolute residual norm*/ if ( stop_crit && !rel_change ) epsilon = accuracy; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {printf("=============================================\n\n"); printf("Iters resid.norm conv.rate rel.res.norm\n"); printf("----- ------------ ---------- ------------\n"); } else {printf("=============================================\n\n"); printf("Iters resid.norm conv.rate\n"); printf("----- ------------ ----------\n"); }; } /* set the relative_error to initially bypass the stopping criterion */ if (rel_change) { relative_error= epsilon + 1.; } while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions); hypre_TFreeF(hh,gmres_functions); ierr = 0; return ierr; } if (r_norm/den_norm <= epsilon && iter >= min_iter) { if (rel_change) { if (relative_error <= epsilon) { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) printf("false convergence 1\n"); } } else { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) printf("false convergence 1\n"); } } t = 1.0 / r_norm; (*(gmres_functions->ScaleVector))(t,p[0]); i = 0; while (i < k_dim && ( (r_norm/den_norm > epsilon || iter < min_iter) || ((rel_change) && relative_error > epsilon) ) && iter < max_iter) { i++; iter++; (*(gmres_functions->ClearVector))(r); precond(precond_data, A, p[i-1], r); (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]); (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(gmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = c[j-1]*t + s[j-1]*hh[j][i-1]; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } gamma = sqrt(hh[i-1][i-1]*hh[i-1][i-1] + hh[i][i-1]*hh[i][i-1]); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -s[i-1]*rs[i-1]; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = c[i-1]*hh[i-1][i-1] + s[i-1]*hh[i][i-1]; r_norm = fabs(rs[i]); if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } } /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = rs[k]; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } rs[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[0],w); (*(gmres_functions->ScaleVector))(rs[0],w); for (j = 1; j < i; j++) (*(gmres_functions->Axpy))(rs[j], p[j], w); (*(gmres_functions->ClearVector))(r); precond(precond_data, A, w, r); (*(gmres_functions->Axpy))(1.0,r,x); /* check for convergence, evaluate actual residual */ if (r_norm/den_norm <= epsilon && iter >= min_iter) { if (rel_change) { x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */ r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error= r_norm/x_norm; } (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); if (r_norm/den_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { printf("\n\n"); printf("Final L2 norm of residual: %e\n\n", r_norm); } if (rel_change && r_norm > guard_zero_residual) /* Also test on relative change of iterates, x_i - x_(i-1) */ { /* At this point r = x_i - x_(i-1) */ x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */ if ( relative_error < epsilon ) { (gmres_data -> converged) = 1; break; } } else { (gmres_data -> converged) = 1; break; } } else { if ( print_level>0 && my_id == 0) printf("false convergence 2\n"); (*(gmres_functions->CopyVector))(r,p[0]); i = 0; } } /* compute residual vector and continue loop */ for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); for (j=1; j < i+1; j++) (*(gmres_functions->Axpy))(rs[j],p[j],p[0]); } if ( print_level>1 && my_id == 0 ) printf("\n\n"); (gmres_data -> num_iterations) = iter; if (b_norm > 0.0) (gmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (gmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm/den_norm > epsilon) ierr = 1; hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); for (i=0; i < k_dim+1; i++) { hypre_TFreeF(hh[i],gmres_functions); } hypre_TFreeF(hh,gmres_functions); return ierr; }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
hypre_ParCSRMatrix * hypre_ParMatMinus_F( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) /* hypre_ParMatMinus_F subtracts selected rows of its second argument from selected rows of its first argument. The marker array determines which rows are affected - those for which CF_marker<0. The result is returned as a new matrix. */ { /* If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want new Pik = Pik - Cik, for Fine i only, all k. This computation is purely local. */ /* This is _not_ a general-purpose matrix subtraction function. This is written for an interpolation problem where it is known that C(i,k) exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements). */ hypre_ParCSRMatrix *Pnew; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P ); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); HYPRE_Int *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C ); HYPRE_Int *Pnew_diag_i; HYPRE_Int *Pnew_diag_j; double *Pnew_diag_data; HYPRE_Int *Pnew_offd_i; HYPRE_Int *Pnew_offd_j; double *Pnew_offd_data; HYPRE_Int *Pnew_j2m; HYPRE_Int *Pnew_col_map_offd; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); /* HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */ HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int num_cols_offd_Pnew, num_rows_offd_Pnew; HYPRE_Int i1, jmin, jmax, jrange, jrangem1; HYPRE_Int j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg; double dc, dp; /* Pnew = hypre_ParCSRMatrixCompleteClone( C );*/ Pnew = hypre_ParCSRMatrixUnion( C, P ); ; hypre_ParCSRMatrixZero_F( Pnew, CF_marker ); /* fine rows of Pnew set to 0 */ hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */ /* ...Zero_F may not be needed depending on how Pnew is made */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag); Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag); Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd); Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd); Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag); Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd); Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew ); num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd); num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd); /* Find the j-ranges, needed to allocate a "reverse lookup" array. */ /* This is the max j - min j over P and Pnew (which here is a copy of C). Only the diag block is considered. */ /* For scalability reasons (jrange can get big) this won't work for the offd block. Also, indexing is more complicated in the offd block (c.f. col_map_offd). It's not clear, though whether the "quadratic" algorithm I'm using for the offd block is really any slower than the more complicated "linear" algorithm here. */ jrange = 0; jrangem1=-1; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* only Fine rows matter */ { jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ]; jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); /* If columns (of a given row) were in increasing order, the above would be sufficient. If not, the following would be necessary (and sufficient) */ jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1] ]; for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); } } /*----------------------------------------------------------------------- * Loop over Pnew_diag rows. Construct a temporary reverse array: * If j is a column number, Pnew_j2m[j] is the array index for j, i.e. * Pnew_diag_j[ Pnew_j2m[j] ] = j *-----------------------------------------------------------------------*/ Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange ); for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* Fine data only */ { /* just needed for an assertion below... */ for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1; jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; /* If columns (of a given row) were in increasing order, the above line would be sufficient. If not, the following loop would have to be added (or store the jmin computed above )*/ for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; hypre_assert( j-jmin>=0 ); hypre_assert( j-jmin<jrange ); Pnew_j2m[ j-jmin ] = m; } /*----------------------------------------------------------------------- * Loop over C_diag data for the current row. * Subtract each C data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc ) { jc = C_diag_j[mc]; dc = C_diag_data[mc]; m = Pnew_j2m[jc-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] -= dc; } /*----------------------------------------------------------------------- * Loop over P_diag data for the current row. * Add each P data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp ) { jp = P_diag_j[mp]; dp = P_diag_data[mp]; m = Pnew_j2m[jp-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] += dp; } } } /*----------------------------------------------------------------------- * Repeat for the offd block. *-----------------------------------------------------------------------*/ for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 ) /* Fine data only */ { if ( num_cols_offd_Pnew ) { /* This is a simple quadratic algorithm. If necessary I may try to implement the ideas used on the diag block later. */ for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m ) { j = Pnew_offd_j[m]; jg = Pnew_col_map_offd[j]; Pnew_offd_data[m] = 0; if ( num_cols_offd_C ) for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc ) { jC = C_offd_j[mc]; jCg = C_col_map_offd[jC]; if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc]; } if ( num_cols_offd_P ) for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp ) { jP = P_offd_j[mp]; jPg = P_col_map_offd[jP]; if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp]; } } } } } hypre_TFree(Pnew_j2m); return Pnew; }
/** * Partition the input so that * a1[0:*out1) and a2[0:*out2) contain the smallest k elements */ static void kth_element( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { // either of the inputs is empty if (n1 == 0) { *out1 = 0; *out2 = k; return; } if (n2 == 0) { *out1 = k; *out2 = 0; return; } if (k >= n1 + n2) { *out1 = n1; *out2 = n2; return; } // one is greater than the other if (k < n1 && a1[k] <= a2[0]) { *out1 = k; *out2 = 0; return; } if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1]) { *out1 = n1; *out2 = k - n1; return; } if (k < n2 && a2[k] <= a1[0]) { *out1 = 0; *out2 = k; return; } if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1]) { *out1 = k - n2; *out2 = n2; return; } // now k > 0 // faster to do binary search on the shorter sequence if (n1 > n2) { SWAP(HYPRE_Int, n1, n2); SWAP(HYPRE_Int *, a1, a2); SWAP(HYPRE_Int *, out1, out2); } if (k < (n1 + n2)/2) { kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k); } else { // when k is big, faster to find (n1 + n2 - k)th biggest element HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0); HYPRE_Int new_k = k - offset1 - offset2; HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1); HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1); kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k); *out1 += offset1; *out2 += offset2; } #ifdef DBG_MERGE_SORT assert(*out1 + *out2 == k); #endif }
int hypre_StructGridAssembleWithAP( hypre_StructGrid *grid ) { int ierr = 0; int tmp_i; int size, global_num_boxes, num_local_boxes; int i, j, d, k, index; int num_procs, myid; int *sendbuf8, *recvbuf8, *sendbuf2, *recvbuf2; int min_box_size, max_box_size; int global_min_box_size, global_max_box_size; int *ids; int max_regions, max_refinements, ologp; double gamma; hypre_Index min_index, max_index; int prune; hypre_Box *box; MPI_Comm comm = hypre_StructGridComm(grid); hypre_Box *bounding_box = hypre_StructGridBoundingBox(grid); hypre_BoxArray *local_boxes = hypre_StructGridBoxes(grid); int dim = hypre_StructGridDim(grid); hypre_BoxNeighbors *neighbors = hypre_StructGridNeighbors(grid); int max_distance = hypre_StructGridMaxDistance(grid); hypre_IndexRef periodic = hypre_StructGridPeriodic(grid); int *local_boxnums; double dbl_global_size, tmp_dbl; hypre_BoxArray *my_partition; int *part_ids, *part_boxnums; int *proc_array, proc_count, proc_alloc, count; int *tmp_proc_ids = NULL; int max_response_size; int *ap_proc_ids, *send_buf, *send_buf_starts; int *response_buf, *response_buf_starts; hypre_BoxArray *neighbor_boxes, *n_boxes_copy; int *neighbor_proc_ids, *neighbor_boxnums; int *order_index, *delete_array; int tmp_id, start, first_local; int grow, grow_array[6]; hypre_Box *grow_box; int *numghost; int ghostsize; hypre_Box *ghostbox; hypre_StructAssumedPart *assumed_part; hypre_DataExchangeResponse response_obj; int px = hypre_IndexX(periodic); int py = hypre_IndexY(periodic); int pz = hypre_IndexZ(periodic); int i_periodic = px ? 1 : 0; int j_periodic = py ? 1 : 0; int k_periodic = pz ? 1 : 0; int num_periods, multiple_ap, p; hypre_Box *result_box, *period_box; hypre_Index *pshifts; hypre_IndexRef pshift; #if NEIGH_PRINT double start_time, end_time; #endif /*--------------------------------------------- Step 1: Initializations -----------------------------------------------*/ prune = 1; /* default is to prune */ num_local_boxes = hypre_BoxArraySize(local_boxes); num_periods = (1+2*i_periodic) * (1+2*j_periodic) * (1+2*k_periodic); MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm, &myid); /*--------------------------------------------- Step 2: Determine the global size, total number of boxes, and global bounding box. Also get the min and max box sizes since it is convenient to do so. -----------------------------------------------*/ if (neighbors == NULL) { /*these may not be needed - check later */ ids = hypre_TAlloc(int, num_local_boxes); /* for the vol and number of boxes */ sendbuf2 = hypre_CTAlloc(int, 2); recvbuf2 = hypre_CTAlloc(int, 2); size = 0; bounding_box = hypre_BoxCreate(); grow_box = hypre_BoxCreate(); if (num_local_boxes) { min_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0)); max_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0)); /* initialize min and max */ for (d=0; d<3; d++) { hypre_IndexD(min_index, d) = pow(2,30); hypre_IndexD(max_index, d) = -pow(2,30); } hypre_ForBoxI(i, local_boxes) { box = hypre_BoxArrayBox(local_boxes, i); /* get global size and number of boxes */ tmp_i = hypre_BoxVolume(box); size += tmp_i; min_box_size = hypre_min(min_box_size, tmp_i); max_box_size = hypre_max(max_box_size, tmp_i); /* set id */ ids[i] = i; /* 1/3/05 we need this for the case of holes in the domain. (I had commented it out on 12/04 - as I thought this was not necessary. */ /* zero volume boxes - still look at for getting the bounding box */ if (hypre_BoxVolume(box) == 0) /* zero volume boxes - still count */ { hypre_CopyBox(box, grow_box); for (d = 0; d < 3; d++) { if(!hypre_BoxSizeD(box, d)) { grow = (hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(box, d) + 1)/2; grow_array[2*d] = grow; grow_array[2*d+1] = grow; } else { grow_array[2*d] = 0; grow_array[2*d+1] = 0; } } /* expand the box */ hypre_BoxExpand(grow_box, grow_array); box = grow_box; /*pointer copy*/ } /*now we have a vol > 0 box */ for (d = 0; d < dim; d++) /* for each dimension */ { hypre_IndexD(min_index, d) = hypre_min( hypre_IndexD(min_index, d), hypre_BoxIMinD(box, d)); hypre_IndexD(max_index, d) = hypre_max( hypre_IndexD(max_index, d), hypre_BoxIMaxD(box, d)); } }/*end for each box loop */ /* bounding box extents */ hypre_BoxSetExtents(bounding_box, min_index, max_index); }
HYPRE_Int hypre_BoomerAMGCycle( void *amg_vdata, hypre_ParVector **F_array, hypre_ParVector **U_array ) { hypre_ParAMGData *amg_data = amg_vdata; HYPRE_Solver *smoother; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_ParCSRMatrix **R_array; hypre_ParVector *Utemp; hypre_ParVector *Vtemp; hypre_ParVector *Rtemp; hypre_ParVector *Ptemp; hypre_ParVector *Ztemp; hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; hypre_ParCSRBlockMatrix **A_block_array; hypre_ParCSRBlockMatrix **P_block_array; hypre_ParCSRBlockMatrix **R_block_array; HYPRE_Real *Ztemp_data; HYPRE_Real *Ptemp_data; HYPRE_Int **CF_marker_array; /* HYPRE_Int **unknown_map_array; HYPRE_Int **point_map_array; HYPRE_Int **v_at_point_array; */ HYPRE_Real cycle_op_count; HYPRE_Int cycle_type; HYPRE_Int num_levels; HYPRE_Int max_levels; HYPRE_Real *num_coeffs; HYPRE_Int *num_grid_sweeps; HYPRE_Int *grid_relax_type; HYPRE_Int **grid_relax_points; HYPRE_Int block_mode; HYPRE_Real *max_eig_est; HYPRE_Real *min_eig_est; HYPRE_Int cheby_order; HYPRE_Real cheby_fraction; /* Local variables */ HYPRE_Int *lev_counter; HYPRE_Int Solve_err_flag; HYPRE_Int k; HYPRE_Int i, j, jj; HYPRE_Int level; HYPRE_Int cycle_param; HYPRE_Int coarse_grid; HYPRE_Int fine_grid; HYPRE_Int Not_Finished; HYPRE_Int num_sweep; HYPRE_Int cg_num_sweep = 1; HYPRE_Int relax_type; HYPRE_Int relax_points; HYPRE_Int relax_order; HYPRE_Int relax_local; HYPRE_Int old_version = 0; HYPRE_Real *relax_weight; HYPRE_Real *omega; HYPRE_Real alfa, beta, gammaold; HYPRE_Real gamma = 1.0; HYPRE_Int local_size; /* HYPRE_Int *smooth_option; */ HYPRE_Int smooth_type; HYPRE_Int smooth_num_levels; HYPRE_Int num_threads, my_id; HYPRE_Real alpha; HYPRE_Real **l1_norms = NULL; HYPRE_Real *l1_norms_level; HYPRE_Int seq_cg = 0; MPI_Comm comm; #if 0 HYPRE_Real *D_mat; HYPRE_Real *S_vec; #endif /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); R_array = hypre_ParAMGDataRArray(amg_data); CF_marker_array = hypre_ParAMGDataCFMarkerArray(amg_data); Vtemp = hypre_ParAMGDataVtemp(amg_data); Rtemp = hypre_ParAMGDataRtemp(amg_data); Ptemp = hypre_ParAMGDataPtemp(amg_data); Ztemp = hypre_ParAMGDataZtemp(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); max_levels = hypre_ParAMGDataMaxLevels(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); A_block_array = hypre_ParAMGDataABlockArray(amg_data); P_block_array = hypre_ParAMGDataPBlockArray(amg_data); R_block_array = hypre_ParAMGDataRBlockArray(amg_data); block_mode = hypre_ParAMGDataBlockMode(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); omega = hypre_ParAMGDataOmega(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); l1_norms = hypre_ParAMGDataL1Norms(amg_data); /* smooth_option = hypre_ParAMGDataSmoothOption(amg_data); */ max_eig_est = hypre_ParAMGDataMaxEigEst(amg_data); min_eig_est = hypre_ParAMGDataMinEigEst(amg_data); cheby_order = hypre_ParAMGDataChebyOrder(amg_data); cheby_fraction = hypre_ParAMGDataChebyFraction(amg_data); cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels); if (hypre_ParAMGDataParticipate(amg_data)) seq_cg = 1; /* Initialize */ Solve_err_flag = 0; if (grid_relax_points) old_version = 1; num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_rank(comm,&my_id); if (block_mode) { for (j = 1; j < num_levels; j++) num_coeffs[j] = hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]); } else { for (j = 1; j < num_levels; j++) num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); } /*--------------------------------------------------------------------- * Initialize cycling control counter * * Cycling is controlled using a level counter: lev_counter[k] * * Each time relaxation is performed on level k, the * counter is decremented by 1. If the counter is then * negative, we go to the next finer level. If non- * negative, we go to the next coarser level. The * following actions control cycling: * * a. lev_counter[0] is initialized to 1. * b. lev_counter[k] is initialized to cycle_type for k>0. * * c. During cycling, when going down to level k, lev_counter[k] * is set to the max of (lev_counter[k],cycle_type) *---------------------------------------------------------------------*/ Not_Finished = 1; lev_counter[0] = 1; for (k = 1; k < num_levels; ++k) { lev_counter[k] = cycle_type; } level = 0; cycle_param = 1; smoother = hypre_ParAMGDataSmoother(amg_data); if (smooth_num_levels > 0) { if (smooth_type == 7 || smooth_type == 8 || smooth_type == 17 || smooth_type == 18 || smooth_type == 9 || smooth_type == 19) { HYPRE_Int actual_local_size = hypre_ParVectorActualLocalSize(Vtemp); Utemp = hypre_ParVectorCreate(comm,hypre_ParVectorGlobalSize(Vtemp), hypre_ParVectorPartitioning(Vtemp)); hypre_ParVectorOwnsPartitioning(Utemp) = 0; local_size = hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)); if (local_size < actual_local_size) { hypre_VectorData(hypre_ParVectorLocalVector(Utemp)) = hypre_CTAlloc(HYPRE_Complex, actual_local_size); hypre_ParVectorActualLocalSize(Utemp) = actual_local_size; } else hypre_ParVectorInitialize(Utemp); } } /*--------------------------------------------------------------------- * Main loop of cycling *--------------------------------------------------------------------*/ while (Not_Finished) { if (num_levels > 1) { local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level])); hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)) = local_size; if (smooth_num_levels <= level) { cg_num_sweep = 1; num_sweep = num_grid_sweeps[cycle_param]; Aux_U = U_array[level]; Aux_F = F_array[level]; } else if (smooth_type > 9) { hypre_VectorSize(hypre_ParVectorLocalVector(Ztemp)) = local_size; hypre_VectorSize(hypre_ParVectorLocalVector(Rtemp)) = local_size; hypre_VectorSize(hypre_ParVectorLocalVector(Ptemp)) = local_size; Ztemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ztemp)); Ptemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ptemp)); hypre_ParVectorSetConstantValues(Ztemp,0); alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], U_array[level], beta, F_array[level], Rtemp); cg_num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data); num_sweep = num_grid_sweeps[cycle_param]; Aux_U = Ztemp; Aux_F = Rtemp; } else { cg_num_sweep = 1; num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data); Aux_U = U_array[level]; Aux_F = F_array[level]; } relax_type = grid_relax_type[cycle_param]; } else /* AB: 4/08: removed the max_levels > 1 check - should do this when max-levels = 1 also */ { /* If no coarsening occurred, apply a simple smoother once */ Aux_U = U_array[level]; Aux_F = F_array[level]; num_sweep = 1; /* TK: Use the user relax type (instead of 0) to allow for setting a convergent smoother (e.g. in the solution of singular problems). */ relax_type = hypre_ParAMGDataUserRelaxType(amg_data); } if (l1_norms != NULL) l1_norms_level = l1_norms[level]; else l1_norms_level = NULL; if (cycle_param == 3 && seq_cg) { hypre_seqAMGCycle(amg_data, level, F_array, U_array); } else { /*------------------------------------------------------------------ * Do the relaxation num_sweep times *-----------------------------------------------------------------*/ for (jj = 0; jj < cg_num_sweep; jj++) { if (smooth_num_levels > level && smooth_type > 9) hypre_ParVectorSetConstantValues(Aux_U,0); for (j = 0; j < num_sweep; j++) { if (num_levels == 1 && max_levels > 1) { relax_points = 0; relax_local = 0; } else { if (old_version) relax_points = grid_relax_points[cycle_param][j]; relax_local = relax_order; } /*----------------------------------------------- * VERY sloppy approximation to cycle complexity *-----------------------------------------------*/ if (old_version && level < num_levels -1) { switch (relax_points) { case 1: cycle_op_count += num_coeffs[level+1]; break; case -1: cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); break; } } else { cycle_op_count += num_coeffs[level]; } /*----------------------------------------------- Choose Smoother -----------------------------------------------*/ if (smooth_num_levels > level && (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || smooth_type == 19 || smooth_type == 17 || smooth_type == 18)) { hypre_VectorSize(hypre_ParVectorLocalVector(Utemp)) = local_size; alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], U_array[level], beta, Aux_F, Vtemp); if (smooth_type == 8 || smooth_type == 18) HYPRE_ParCSRParaSailsSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); else if (smooth_type == 7 || smooth_type == 17) HYPRE_ParCSRPilutSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); else if (smooth_type == 9 || smooth_type == 19) HYPRE_EuclidSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); hypre_ParVectorAxpy(relax_weight[level],Utemp,Aux_U); } else if (smooth_num_levels > level && (smooth_type == 6 || smooth_type == 16)) { HYPRE_SchwarzSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Aux_F, (HYPRE_ParVector) Aux_U); } /*else if (relax_type == 99)*/ else if (relax_type == 9 || relax_type == 99) { /* Gaussian elimination */ hypre_GaussElimSolve(amg_data, level, relax_type); } else if (relax_type == 18) { /* L1 - Jacobi*/ if (relax_order == 1 && cycle_param < 3) { /* need to do CF - so can't use the AMS one */ HYPRE_Int i; HYPRE_Int loc_relax_points[2]; if (cycle_type < 2) { loc_relax_points[0] = 1; loc_relax_points[1] = -1; } else { loc_relax_points[0] = -1; loc_relax_points[1] = 1; } for (i=0; i < 2; i++) hypre_ParCSRRelax_L1_Jacobi(A_array[level], Aux_F, CF_marker_array[level], loc_relax_points[i], relax_weight[level], l1_norms[level], Aux_U, Vtemp); } else /* not CF - so use through AMS */ { if (num_threads == 1) hypre_ParCSRRelax(A_array[level], Aux_F, 1, 1, l1_norms_level, relax_weight[level], omega[level],0,0,0,0, Aux_U, Vtemp, Ztemp); else hypre_ParCSRRelaxThreads(A_array[level], Aux_F, 1, 1, l1_norms_level, relax_weight[level], omega[level], Aux_U, Vtemp, Ztemp); } } else if (relax_type == 15) { /* CG */ if (j ==0) /* do num sweep iterations of CG */ hypre_ParCSRRelax_CG( smoother[level], A_array[level], Aux_F, Aux_U, num_sweep); } else if (relax_type == 16) { /* scaled Chebyshev */ HYPRE_Int scale = 1; HYPRE_Int variant = 0; hypre_ParCSRRelax_Cheby(A_array[level], Aux_F, max_eig_est[level], min_eig_est[level], cheby_fraction, cheby_order, scale, variant, Aux_U, Vtemp, Ztemp ); } else if (relax_type ==17) { hypre_BoomerAMGRelax_FCFJacobi(A_array[level], Aux_F, CF_marker_array[level], relax_weight[level], Aux_U, Vtemp); } else if (old_version) { Solve_err_flag = hypre_BoomerAMGRelax(A_array[level], Aux_F, CF_marker_array[level], relax_type, relax_points, relax_weight[level], omega[level], l1_norms_level, Aux_U, Vtemp, Ztemp); } else { /* smoother than can have CF ordering */ if (block_mode) { Solve_err_flag = hypre_BoomerAMGBlockRelaxIF(A_block_array[level], Aux_F, CF_marker_array[level], relax_type, relax_local, cycle_param, relax_weight[level], omega[level], Aux_U, Vtemp); } else { Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level], Aux_F, CF_marker_array[level], relax_type, relax_local, cycle_param, relax_weight[level], omega[level], l1_norms_level, Aux_U, Vtemp, Ztemp); } } if (Solve_err_flag != 0) return(Solve_err_flag); } if (smooth_num_levels > level && smooth_type > 9) { gammaold = gamma; gamma = hypre_ParVectorInnerProd(Rtemp,Ztemp); if (jj == 0) hypre_ParVectorCopy(Ztemp,Ptemp); else { beta = gamma/gammaold; for (i=0; i < local_size; i++) Ptemp_data[i] = Ztemp_data[i] + beta*Ptemp_data[i]; } hypre_ParCSRMatrixMatvec(1.0,A_array[level],Ptemp,0.0,Vtemp); alfa = gamma /hypre_ParVectorInnerProd(Ptemp,Vtemp); hypre_ParVectorAxpy(alfa,Ptemp,U_array[level]); hypre_ParVectorAxpy(-alfa,Vtemp,Rtemp); } } } /*------------------------------------------------------------------ * Decrement the control counter and determine which grid to visit next *-----------------------------------------------------------------*/ --lev_counter[level]; if (lev_counter[level] >= 0 && level != num_levels-1) { /*--------------------------------------------------------------- * Visit coarser level next. * Compute residual using hypre_ParCSRMatrixMatvec. * Perform restriction using hypre_ParCSRMatrixMatvecT. * Reset counters and cycling parameters for coarse level *--------------------------------------------------------------*/ fine_grid = level; coarse_grid = level + 1; hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); alpha = -1.0; beta = 1.0; if (block_mode) { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); hypre_ParCSRBlockMatrixMatvec(alpha, A_block_array[fine_grid], U_array[fine_grid], beta, Vtemp); } else { // JSP: avoid unnecessary copy using out-of-place version of SpMV hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid], beta, F_array[fine_grid], Vtemp); } alpha = 1.0; beta = 0.0; if (block_mode) { hypre_ParCSRBlockMatrixMatvecT(alpha,R_block_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } else { hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } ++level; lev_counter[level] = hypre_max(lev_counter[level],cycle_type); cycle_param = 1; if (level == num_levels-1) cycle_param = 3; } else if (level != 0) { /*--------------------------------------------------------------- * Visit finer level next. * Interpolate and add correction using hypre_ParCSRMatrixMatvec. * Reset counters and cycling parameters for finer level. *--------------------------------------------------------------*/ fine_grid = level - 1; coarse_grid = level; alpha = 1.0; beta = 1.0; if (block_mode) { hypre_ParCSRBlockMatrixMatvec(alpha, P_block_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } else { hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } --level; cycle_param = 2; } else { Not_Finished = 0; } } hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count; hypre_TFree(lev_counter); hypre_TFree(num_coeffs); if (smooth_num_levels > 0) { if (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || smooth_type == 17 || smooth_type == 18 || smooth_type == 19 ) hypre_ParVectorDestroy(Utemp); } return(Solve_err_flag); }
HYPRE_Int hypre_BoomerAMGAdditiveCycle( void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_ParCSRMatrix **R_array; hypre_ParCSRMatrix *Lambda; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParVector *Vtemp; hypre_ParVector *Ztemp; hypre_ParVector *Xtilde, *Rtilde; HYPRE_Int **CF_marker_array; HYPRE_Int num_levels; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int simple; HYPRE_Int i, num_rows; HYPRE_Int n_global; HYPRE_Int rlx_order; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int level; HYPRE_Int coarse_grid; HYPRE_Int fine_grid; HYPRE_Int relax_type; HYPRE_Int rlx_down; HYPRE_Int rlx_up; HYPRE_Int *grid_relax_type; HYPRE_Real **l1_norms; HYPRE_Real alpha, beta; HYPRE_Int num_threads; HYPRE_Real *u_data; HYPRE_Real *f_data; HYPRE_Real *v_data; HYPRE_Real *l1_norms_lvl; HYPRE_Real *D_inv; HYPRE_Real *x_global; HYPRE_Real *r_global; HYPRE_Real *relax_weight; HYPRE_Real *omega; #if 0 HYPRE_Real *D_mat; HYPRE_Real *S_vec; #endif /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); R_array = hypre_ParAMGDataRArray(amg_data); CF_marker_array = hypre_ParAMGDataCFMarkerArray(amg_data); Vtemp = hypre_ParAMGDataVtemp(amg_data); Ztemp = hypre_ParAMGDataZtemp(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); simple = hypre_ParAMGDataSimple(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); Lambda = hypre_ParAMGDataLambda(amg_data); Xtilde = hypre_ParAMGDataXtilde(amg_data); Rtilde = hypre_ParAMGDataRtilde(amg_data); l1_norms = hypre_ParAMGDataL1Norms(amg_data); D_inv = hypre_ParAMGDataDinv(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); omega = hypre_ParAMGDataOmega(amg_data); rlx_order = hypre_ParAMGDataRelaxOrder(amg_data); /* Initialize */ addlvl = hypre_max(additive, mult_additive); addlvl = hypre_max(addlvl, simple); Solve_err_flag = 0; /*--------------------------------------------------------------------- * Main loop of cycling --- multiplicative version --- V-cycle *--------------------------------------------------------------------*/ /* down cycle */ relax_type = grid_relax_type[1]; rlx_down = grid_relax_type[1]; rlx_up = grid_relax_type[2]; for (level = 0; level < num_levels-1; level++) { fine_grid = level; coarse_grid = level + 1; u_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[fine_grid])); f_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[fine_grid])); v_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp)); l1_norms_lvl = l1_norms[level]; hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); if (level < addlvl) /* multiplicative version */ { /* smoothing step */ if (rlx_down == 0) { HYPRE_Real *A_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A_array[fine_grid])); HYPRE_Int *A_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A_array[fine_grid])); hypre_ParVectorCopy(F_array[fine_grid],Vtemp); num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid])); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) u_data[i] = relax_weight[level]*v_data[i] / A_data[A_i[i]]; } else if (rlx_down != 18) { /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_down,0,*/ hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], rlx_down,rlx_order,1, relax_weight[fine_grid], omega[fine_grid], l1_norms_lvl, U_array[fine_grid], Vtemp, Ztemp); hypre_ParVectorCopy(F_array[fine_grid],Vtemp); } else { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid])); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) u_data[i] += v_data[i] / l1_norms_lvl[i]; } alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, A_array[fine_grid], U_array[fine_grid], beta, Vtemp); alpha = 1.0; beta = 0.0; hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } else /* additive version */ { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); if (level == 0) /* compute residual */ { hypre_ParVectorCopy(Vtemp, Rtilde); hypre_ParVectorCopy(U_array[fine_grid],Xtilde); } alpha = 1.0; beta = 0.0; hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } } /* solve coarse grid */ if (addlvl < num_levels) { if (simple > -1) { x_global = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_global = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); n_global = hypre_VectorSize(hypre_ParVectorLocalVector(Xtilde)); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < n_global; i++) x_global[i] += D_inv[i]*r_global[i]; } else hypre_ParCSRMatrixMatvec(1.0, Lambda, Rtilde, 1.0, Xtilde); if (addlvl == 0) hypre_ParVectorCopy(Xtilde, U_array[0]); } else { fine_grid = num_levels -1; hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid], 1, 1, l1_norms[fine_grid], 1.0, 1.0 ,0,0,0,0, U_array[fine_grid], Vtemp, Ztemp); } /* up cycle */ relax_type = grid_relax_type[2]; for (level = num_levels-1; level > 0; level--) { fine_grid = level - 1; coarse_grid = level; if (level <= addlvl) /* multiplicative version */ { alpha = 1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); if (rlx_up != 18) /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_up,0,*/ hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], rlx_up,rlx_order,2, relax_weight[fine_grid], omega[fine_grid], l1_norms[fine_grid], U_array[fine_grid], Vtemp, Ztemp); else if (rlx_order) { HYPRE_Int loc_relax_points[2]; loc_relax_points[0] = -1; loc_relax_points[1] = 1; for (i=0; i < 2; i++) hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], loc_relax_points[i], 1.0, l1_norms[fine_grid], U_array[fine_grid], Vtemp); } else hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid], 1, 1, l1_norms[fine_grid], 1.0, 1.0 ,0,0,0,0, U_array[fine_grid], Vtemp, Ztemp); } else /* additive version */ { alpha = 1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } } return(Solve_err_flag); }
HYPRE_Int hypre_CreateLambda(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ MPI_Comm comm; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_ParCSRMatrix *Lambda; hypre_CSRMatrix *L_diag; hypre_CSRMatrix *L_offd; hypre_CSRMatrix *A_tmp_diag; hypre_CSRMatrix *A_tmp_offd; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; hypre_ParCSRCommPkg *comm_pkg; hypre_ParCSRCommPkg *L_comm_pkg = NULL; hypre_ParCSRCommHandle *comm_handle; HYPRE_Real *L_diag_data; HYPRE_Real *L_offd_data; HYPRE_Real *buf_data = NULL; HYPRE_Real *tmp_data; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *l1_norms; HYPRE_Real *A_tmp_diag_data; HYPRE_Real *A_tmp_offd_data; HYPRE_Real *D_data = NULL; HYPRE_Real *D_data_offd = NULL; HYPRE_Int *L_diag_i; HYPRE_Int *L_diag_j; HYPRE_Int *L_offd_i; HYPRE_Int *L_offd_j; HYPRE_Int *A_tmp_diag_i; HYPRE_Int *A_tmp_offd_i; HYPRE_Int *A_tmp_diag_j; HYPRE_Int *A_tmp_offd_j; HYPRE_Int *L_recv_ptr = NULL; HYPRE_Int *L_send_ptr = NULL; HYPRE_Int *L_recv_procs = NULL; HYPRE_Int *L_send_procs = NULL; HYPRE_Int *L_send_map_elmts = NULL; HYPRE_Int *recv_procs; HYPRE_Int *send_procs; HYPRE_Int *send_map_elmts; HYPRE_Int *send_map_starts; HYPRE_Int *recv_vec_starts; HYPRE_Int *all_send_procs = NULL; HYPRE_Int *all_recv_procs = NULL; HYPRE_Int *remap = NULL; HYPRE_Int *level_start; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_procs; HYPRE_Int num_sends, num_recvs; HYPRE_Int num_sends_L = 0; HYPRE_Int num_recvs_L = 0; HYPRE_Int send_data_L = 0; HYPRE_Int num_rows_L = 0; HYPRE_Int num_rows_tmp = 0; HYPRE_Int num_cols_offd_L = 0; HYPRE_Int num_cols_offd = 0; HYPRE_Int level, i, j, k; HYPRE_Int this_proc, cnt, cnt_diag, cnt_offd; HYPRE_Int cnt_recv, cnt_send, cnt_row, row_start; HYPRE_Int start_diag, start_offd, indx, cnt_map; HYPRE_Int start, j_indx, index, cnt_level; HYPRE_Int max_sends, max_recvs; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_size(comm,&num_procs); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); addlvl = hypre_max(additive, mult_additive); num_add_lvls = num_levels+1-addlvl; level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1); send_data_L = 0; num_rows_L = 0; num_cols_offd_L = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; level_start[0] = 0; cnt = 1; max_sends = 0; max_recvs = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd); num_rows_L += num_rows_tmp; level_start[cnt] = level_start[cnt-1] + num_rows_tmp; cnt++; num_cols_offd_L += num_cols_offd; num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp]; num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); max_sends += num_sends; if (num_sends) send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends); max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg); } } if (max_sends >= num_procs ||max_recvs >= num_procs) { max_sends = num_procs; max_recvs = num_procs; } if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends); if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs); cnt_send = 0; cnt_recv = 0; if (max_sends || max_recvs) { if (max_sends < num_procs && max_recvs < num_procs) { for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); for (j = 0; j < num_sends; j++) all_send_procs[cnt_send++] = send_procs[j]; for (j = 0; j < num_recvs; j++) all_recv_procs[cnt_recv++] = recv_procs[j]; } } if (max_sends) { qsort0(all_send_procs, 0, max_sends-1); num_sends_L = 1; this_proc = all_send_procs[0]; for (i=1; i < max_sends; i++) { if (all_send_procs[i] > this_proc) { this_proc = all_send_procs[i]; all_send_procs[num_sends_L++] = this_proc; } } L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); for (j=0; j < num_sends_L; j++) L_send_procs[j] = all_send_procs[j]; hypre_TFree(all_send_procs); } if (max_recvs) { qsort0(all_recv_procs, 0, max_recvs-1); num_recvs_L = 1; this_proc = all_recv_procs[0]; for (i=1; i < max_recvs; i++) { if (all_recv_procs[i] > this_proc) { this_proc = all_recv_procs[i]; all_recv_procs[num_recvs_L++] = this_proc; } } L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); for (j=0; j < num_recvs_L; j++) L_recv_procs[j] = all_recv_procs[j]; hypre_TFree(all_recv_procs); } L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } for (k = 0; k < num_sends; k++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L); L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k]; } for (k = 0; k < num_recvs; k++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L); L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k]; } } L_recv_ptr[0] = 0; for (i=1; i < num_recvs_L; i++) L_recv_ptr[i+1] += L_recv_ptr[i]; L_send_ptr[0] = 0; for (i=1; i < num_sends_L; i++) L_send_ptr[i+1] += L_send_ptr[i]; } else { num_recvs_L = 0; num_sends_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); for (j = 0; j < num_sends; j++) { this_proc = send_procs[j]; if (all_send_procs[this_proc] == 0) num_sends_L++; all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j]; } for (j = 0; j < num_recvs; j++) { this_proc = recv_procs[j]; if (all_recv_procs[this_proc] == 0) num_recvs_L++; all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j]; } } } if (max_sends) { L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); num_sends_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_send_procs[j]; if (this_proc) { L_send_procs[num_sends_L++] = j; L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1]; } } } if (max_recvs) { L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); num_recvs_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_recv_procs[j]; if (this_proc) { L_recv_procs[num_recvs_L++] = j; L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1]; } } } } } if (max_sends) hypre_TFree(all_send_procs); if (max_recvs) hypre_TFree(all_recv_procs); L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag); L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd); hypre_CSRMatrixInitialize(L_diag); hypre_CSRMatrixInitialize(L_offd); if (num_nonzeros_diag) { L_diag_data = hypre_CSRMatrixData(L_diag); L_diag_j = hypre_CSRMatrixJ(L_diag); } L_diag_i = hypre_CSRMatrixI(L_diag); if (num_nonzeros_offd) { L_offd_data = hypre_CSRMatrixData(L_offd); L_offd_j = hypre_CSRMatrixJ(L_offd); } L_offd_i = hypre_CSRMatrixI(L_offd); if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L); if (send_data_L) { L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L); buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L); } if (num_cols_offd_L) { D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L); /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/ remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L); } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); cnt = 0; cnt_level = 0; cnt_diag = 0; cnt_offd = 0; cnt_row = 1; L_diag_i[0] = 0; L_offd_i[0] = 0; for (level=addlvl; level < num_levels; level++) { row_start = level_start[cnt_level]; if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } cnt_level++; start_diag = L_diag_i[cnt_row-1]; start_offd = L_offd_i[cnt_row-1]; A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag); A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd); A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } /* Compute new combined communication package */ for (i=0; i < num_sends; i++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L); indx = L_send_ptr[this_proc]; for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++) { L_send_map_elmts[indx++] = row_start + send_map_elmts[j]; } L_send_ptr[this_proc] = indx; } cnt_map = 0; for (i = 0; i < num_recvs; i++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L); indx = L_recv_ptr[this_proc]; for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { remap[cnt_map++] = indx++; } L_recv_ptr[this_proc] = indx; } /* Compute Lambda */ if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = 1.0/l1_norms[i]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } if (num_procs > 1) { index = 0; for (i=0; i < num_sends; i++) { start = send_map_starts[i]; for (j=start; j < send_map_starts[i+1]; j++) buf_data[index++] = D_data[send_map_elmts[j]]; } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, D_data_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } for (i = 0; i < num_rows_tmp; i++) { j_indx = A_tmp_diag_i[i]; L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i]; L_diag_j[cnt_diag++] = i+row_start; for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++) { j_indx = A_tmp_diag_j[j]; L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i]; L_diag_j[cnt_diag++] = j_indx+row_start; } for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++) { j_indx = A_tmp_offd_j[j]; L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i]; L_offd_j[cnt_offd++] = remap[j_indx]; } } cnt_row += num_rows_tmp; } if (L_send_ptr) { for (i=num_sends_L-1; i > 0; i--) L_send_ptr[i] = L_send_ptr[i-1]; L_send_ptr[0] = 0; } else L_send_ptr = hypre_CTAlloc(HYPRE_Int,1); if (L_recv_ptr) { for (i=num_recvs_L-1; i > 0; i--) L_recv_ptr[i] = L_recv_ptr[i-1]; L_recv_ptr[0] = 0; } else L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1); L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L; hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L; hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs; hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs; hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr; hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr; hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts; hypre_ParCSRCommPkgComm(L_comm_pkg) = comm; Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1); hypre_ParCSRMatrixDiag(Lambda) = L_diag; hypre_ParCSRMatrixOffd(Lambda) = L_offd; hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg; hypre_ParCSRMatrixComm(Lambda) = comm; hypre_ParCSRMatrixOwnsData(Lambda) = 1; hypre_ParAMGDataLambda(amg_data) = Lambda; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; hypre_TFree(D_data_offd); hypre_TFree(D_data); if (num_procs > 1) hypre_TFree(buf_data); hypre_TFree(remap); hypre_TFree(buf_data); hypre_TFree(level_start); return Solve_err_flag; }
HYPRE_Int hypre_GMRESSolve(void *gmres_vdata, void *A, void *b, void *x) { hypre_GMRESData *gmres_data = gmres_vdata; hypre_GMRESFunctions *gmres_functions = gmres_data->functions; HYPRE_Int k_dim = (gmres_data -> k_dim); HYPRE_Int min_iter = (gmres_data -> min_iter); HYPRE_Int max_iter = (gmres_data -> max_iter); HYPRE_Int rel_change = (gmres_data -> rel_change); HYPRE_Int skip_real_r_check = (gmres_data -> skip_real_r_check); double r_tol = (gmres_data -> tol); double cf_tol = (gmres_data -> cf_tol); double a_tol = (gmres_data -> a_tol); void *matvec_data = (gmres_data -> matvec_data); void *r = (gmres_data -> r); void *w = (gmres_data -> w); /* note: w_2 is only allocated if rel_change = 1 */ void *w_2 = (gmres_data -> w_2); void **p = (gmres_data -> p); HYPRE_Int (*precond)() = (gmres_functions -> precond); HYPRE_Int *precond_data = (gmres_data -> precond_data); HYPRE_Int print_level = (gmres_data -> print_level); HYPRE_Int logging = (gmres_data -> logging); double *norms = (gmres_data -> norms); /* not used yet char *log_file_name = (gmres_data -> log_file_name);*/ /* FILE *fp; */ HYPRE_Int break_value = 0; HYPRE_Int i, j, k; double *rs, **hh, *c, *s, *rs_2; HYPRE_Int iter; HYPRE_Int my_id, num_procs; double epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm; double w_norm; double epsmac = 1.e-16; double ieee_check = 0.; double guard_zero_residual; double cf_ave_0 = 0.0; double cf_ave_1 = 0.0; double weight; double r_norm_0; double relative_error = 1.0; HYPRE_Int rel_change_passed = 0, num_rel_change_check = 0; double real_r_norm_old, real_r_norm_new; (gmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ guard_zero_residual = 0.0; (*(gmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (gmres_data -> norms); } /* initialize work arrays */ rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); c = hypre_CTAllocF(double,k_dim,gmres_functions); s = hypre_CTAllocF(double,k_dim,gmres_functions); if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); for (i=0; i < k_dim+1; i++) { hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); } (*(gmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(gmres_functions->InnerProd))(b,b)); real_r_norm_old = b_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /* once the rel. change check has passed, we do not want to check it again */ rel_change_passed = 0; /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions); hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { if (!rel_change) /* shouldn't exit after no iterations if * relative change is on*/ { (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(gmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } } t = 1.0 / r_norm; (*(gmres_functions->ScaleVector))(t,p[0]); i = 0; /***RESTART CYCLE (right-preconditioning) ***/ while (i < k_dim && iter < max_iter) { i++; iter++; (*(gmres_functions->ClearVector))(r); precond(precond_data, A, p[i-1], r); (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]); (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(gmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { if (rel_change && !rel_change_passed) { /* To decide whether to break here: to actually determine the relative change requires the approx solution (so a triangular solve) and a precond. solve - so if we have to do this many times, it will be expensive...(unlike cg where is is relatively straightforward) previously, the intent (there was a bug), was to exit the restart cycle based on the residual norm and check the relative change outside the cycle. Here we will check the relative here as we don't want to exit the restart cycle prematurely */ for (k=0; k<i; k++) /* extra copy of rs so we don't need to change the later solve */ rs_2[k] = rs[k]; /* solve tri. system*/ rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs_2[j]; } t+= rs_2[k]; rs_2[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs_2[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs_2[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* copy current solution (x) to w (don't want to over-write x)*/ (*(gmres_functions->CopyVector))(x,w); /* add the correction */ (*(gmres_functions->Axpy))(1.0,r,w); /* now w is the approx solution - get the norm*/ x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* now get x_i - x_i-1 */ if (num_rel_change_check) { /* have already checked once so we can avoid another precond. solve */ (*(gmres_functions->CopyVector))(w, r); (*(gmres_functions->Axpy))(-1.0, w_2, r); /* now r contains x_i - x_i-1*/ /* save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); } else { /* first time to check rel change*/ /* first save current soln w in w_2 for next time */ (*(gmres_functions->CopyVector))(w, w_2); /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* now r contains x_i - x_i-1 */ } /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error = w_norm/x_norm; if (relative_error <= r_tol) { rel_change_passed = 1; break; } } else { rel_change_passed = 1; break; } num_rel_change_check++; } else /* no relative change */ { break; } } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } (*(gmres_functions->CopyVector))(p[i-1],w); (*(gmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(gmres_functions->Axpy))(rs[j], p[j], w); (*(gmres_functions->ClearVector))(r); /* find correction (in r) */ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(gmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { if (skip_real_r_check) { (gmres_data -> converged) = 1; break; } /* calculate actual residual norm*/ (*(gmres_functions->CopyVector))(b,r); (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if (rel_change && !rel_change_passed) /* calculate the relative change */ { /* calculate the norm of the solution */ x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) ); if ( !(x_norm <= guard_zero_residual )) /* don't divide by zero */ { /* for relative change take x_(i-1) to be x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. Now x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]} - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]} = M^{-1} rs_{i-1}{p_{i-1}} */ (*(gmres_functions->ClearVector))(w); (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w); (*(gmres_functions->ClearVector))(r); /* apply the preconditioner */ precond(precond_data, A, w, r); /* find the norm of x_i - x_i-1 */ w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) ); relative_error= w_norm/x_norm; if ( relative_error < r_tol ) { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else { (gmres_data -> converged) = 1; if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } } else /* don't need to check rel. change */ { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } } else /* conv. has not occurred, according to true residual */ { /* exit if the real residual norm has not decreased */ if (real_r_norm_new >= real_r_norm_old) { if (print_level > 1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (gmres_data -> converged) = 1; break; } /* report discrepancy between real/GMRES residuals and restart */ if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm); (*(gmres_functions->CopyVector))(r,p[0]); i = 0; real_r_norm_old = real_r_norm_new; } } /* end of convergence check */ /* compute residual vector and continue loop */ for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(gmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(gmres_functions->Axpy))(1.0,p[i],p[0]); } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (gmres_data -> num_iterations) = iter; if (b_norm > 0.0) (gmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (gmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,gmres_functions); hypre_TFreeF(s,gmres_functions); hypre_TFreeF(rs,gmres_functions); if (rel_change) hypre_TFreeF(rs_2,gmres_functions); for (i=0; i < k_dim+1; i++) { hypre_TFreeF(hh[i],gmres_functions); } hypre_TFreeF(hh,gmres_functions); return hypre_error_flag; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
HYPRE_Int hypre_LGMRESSolve(void *lgmres_vdata, void *A, void *b, void *x) { hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata; hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions; HYPRE_Int k_dim = (lgmres_data -> k_dim); HYPRE_Int min_iter = (lgmres_data -> min_iter); HYPRE_Int max_iter = (lgmres_data -> max_iter); HYPRE_Real r_tol = (lgmres_data -> tol); HYPRE_Real cf_tol = (lgmres_data -> cf_tol); HYPRE_Real a_tol = (lgmres_data -> a_tol); void *matvec_data = (lgmres_data -> matvec_data); void *r = (lgmres_data -> r); void *w = (lgmres_data -> w); void **p = (lgmres_data -> p); /* lgmres mod*/ void **aug_vecs = (lgmres_data ->aug_vecs); void **a_aug_vecs = (lgmres_data ->a_aug_vecs); HYPRE_Int *aug_order = (lgmres_data->aug_order); HYPRE_Int aug_dim = (lgmres_data -> aug_dim); HYPRE_Int approx_constant= (lgmres_data ->approx_constant); HYPRE_Int it_arnoldi, aug_ct, it_total, ii, order, it_aug; HYPRE_Int spot = 0; HYPRE_Real tmp_norm, r_norm_last; /*---*/ HYPRE_Int (*precond)(void*,void*,void*,void*) = (lgmres_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data); HYPRE_Int print_level = (lgmres_data -> print_level); HYPRE_Int logging = (lgmres_data -> logging); HYPRE_Real *norms = (lgmres_data -> norms); HYPRE_Int break_value = 0; HYPRE_Int i, j, k; HYPRE_Real *rs, **hh, *c, *s; HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm; HYPRE_Real epsmac = 1.e-16; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; /* We are not checking rel. change for now... */ (lgmres_data -> converged) = 0; /*----------------------------------------------------------------------- * With relative change convergence test on, it is possible to attempt * another iteration with a zero residual. This causes the parameter * alpha to go NaN. The guard_zero_residual parameter is to circumvent * this. Perhaps it should be set to something non-zero (but small). *-----------------------------------------------------------------------*/ (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs); if ( logging>0 || print_level>0 ) { norms = (lgmres_data -> norms); /* not used yet log_file_name = (lgmres_data -> log_file_name);*/ /* fp = fopen(log_file_name,"w"); */ } /* initialize work arrays - lgmres includes aug_dim*/ rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */ hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); } (*(lgmres_functions->CopyVector))(b,p[0]); /* compute initial residual */ (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]); b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0])); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if ( logging>0 || print_level > 0) { norms[0] = r_norm; if ( print_level>1 && my_id == 0 ) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */ den_norm= b_norm; } else { /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */ den_norm= r_norm; }; /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user specifies a_tol, or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol,r_tol*den_norm); /* so now our stop criteria is |r_i| <= epsilon */ if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); }; } /*lgmres initialization */ for (ii=0; ii<aug_dim; ii++) { aug_order[ii] = 0; } aug_ct = 0; /* number of aug. vectors available */ /* outer iteration cycle */ while (iter < max_iter) { /* initialize first term of hessenberg system */ rs[0] = r_norm; if (r_norm == 0.0) { hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+aug_dim+1; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; } /* see if we are already converged and should print the final norm and exit */ if (r_norm <= epsilon && iter >= min_iter) { (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } break; } else if ( print_level>0 && my_id == 0) hypre_printf("false convergence 1\n"); } t = 1.0 / r_norm; r_norm_last = r_norm; (*(lgmres_functions->ScaleVector))(t,p[0]); i = 0; /* lgmres mod: determine number of arnoldi steps to take */ /* if approx_constant then we keep the space the same size even if we don't have the full number of aug vectors yet*/ if (approx_constant) { it_arnoldi = k_dim - aug_ct; } else { it_arnoldi = k_dim - aug_dim; } it_total = it_arnoldi + aug_ct; it_aug = 0; /* keep track of augmented iterations */ /***RESTART CYCLE (right-preconditioning) ***/ while (i < it_total && iter < max_iter) { i++; iter++; (*(lgmres_functions->ClearVector))(r); /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ if ( i <= it_arnoldi) { /* Arnoldi */ precond(precond_data, A, p[i-1], r); (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]); } else { /*lgmres aug step */ it_aug ++; order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == order) { spot = ii; break; /* must have this because there will be duplicates before aug_ct = aug_dim */ } } /* copy a_aug_vecs[spot] to p[i] */ (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]); /*note: an alternate implementation choice would be to only save the AUGVECS and not A_AUGVEC and then apply the PC here to the augvec */ } /*---*/ /* modified Gram_Schmidt */ for (j=0; j < i; j++) { hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]); (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]); } t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i])); hh[i][i-1] = t; if (t != 0.0) { t = 1.0/t; (*(lgmres_functions->ScaleVector))(t,p[i]); } /* done with modified Gram_schmidt and Arnoldi step. update factorization of hh */ for (j = 1; j < i; j++) { t = hh[j-1][i-1]; hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t; hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1]; } t= hh[i][i-1]*hh[i][i-1]; t+= hh[i-1][i-1]*hh[i-1][i-1]; gamma = sqrt(t); if (gamma == 0.0) gamma = epsmac; c[i-1] = hh[i-1][i-1]/gamma; s[i-1] = hh[i][i-1]/gamma; rs[i] = -hh[i][i-1]*rs[i-1]; rs[i]/= gamma; rs[i-1] = c[i-1]*rs[i-1]; /* determine residual norm */ hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1]; r_norm = fabs(rs[i]); /* print ? */ if ( print_level>0 ) { norms[iter] = r_norm; if ( print_level>1 && my_id == 0 ) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter],norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } } /*convergence factor tolerance */ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; #if 0 hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n", i, cf_ave_1, cf_ave_0, weight ); #endif if (weight * cf_ave_1 > cf_tol) { break_value = 1; break; } } /* should we exit the restart cycle? (conv. check) */ if (r_norm <= epsilon && iter >= min_iter) { break; } } /*** end of restart cycle ***/ /* now compute solution, first solve upper triangular system */ if (break_value) break; rs[i-1] = rs[i-1]/hh[i-1][i-1]; for (k = i-2; k >= 0; k--) { t = 0.0; for (j = k+1; j < i; j++) { t -= hh[k][j]*rs[j]; } t+= rs[k]; rs[k] = t/hh[k][k]; } /* form linear combination of p's to get solution */ /* put the new aug_vector in aug_vecs[aug_dim] - a temp position*/ /* i = number of iterations */ /* it_aug = number of augmented iterations */ /* it_arnoldi = number of arnoldi iterations */ /*check if exited early before all arnoldi its */ if (it_arnoldi > i) it_arnoldi = i; if (!it_aug) { (*(lgmres_functions->CopyVector))(p[i-1],w); (*(lgmres_functions->ScaleVector))(rs[i-1],w); for (j = i-2; j >=0; j--) (*(lgmres_functions->Axpy))(rs[j], p[j], w); } else /* need some of the augvecs */ { (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(rs[0],w); /* reg. arnoldi directions */ for (j = 1; j < it_arnoldi; j++) /*first one already done */ { (*(lgmres_functions->Axpy))(rs[j], p[j], w); } /* augment directions */ for (ii=0; ii<it_aug; ii++) { for (j=0; j<aug_dim; j++) { if (aug_order[j] == ii) { spot = j; break; /* must have this because there will be * duplicates before aug_ct = aug_dim */ } } (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w); } } /* grab the new aug vector before the prec*/ (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]); (*(lgmres_functions->ClearVector))(r); /* find correction (in r) (un-wind precond.)*/ precond(precond_data, A, w, r); /* update current solution x (in x) */ (*(lgmres_functions->Axpy))(1.0,r,x); /* check for convergence by evaluating the actual residual */ if (r_norm <= epsilon && iter >= min_iter) { /* calculate actual residual norm*/ (*(lgmres_functions->CopyVector))(b,r); (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) ); if (r_norm <= epsilon) { if ( print_level>1 && my_id == 0 ) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (lgmres_data -> converged) = 1; break; } else /* conv. has not occurred, according to true residual */ { if ( print_level>0 && my_id == 0) hypre_printf("false convergence 2\n"); (*(lgmres_functions->CopyVector))(r,p[0]); i = 0; } } /* end of convergence check */ /* compute residual vector and continue loop */ /* copy r0 (not scaled) to w*/ (*(lgmres_functions->CopyVector))(p[0],w); (*(lgmres_functions->ScaleVector))(r_norm_last,w); for (j=i ; j > 0; j--) { rs[j-1] = -s[j-1]*rs[j]; rs[j] = c[j-1]*rs[j]; } if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]); for (j=i-1 ; j > 0; j--) (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]); if (i) { (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]); (*(lgmres_functions->Axpy))(1.0,p[i],p[0]); } /* lgmres mod */ /* collect aug vector and A*augvector for future restarts - only if we will be restarting (i.e. this cycle performed it_total iterations). ordering starts at 0.*/ if (aug_dim > 0) { if (!aug_ct) { spot = 0; aug_ct++; } else if (aug_ct < aug_dim) { spot = aug_ct; aug_ct++; } else { /* truncate - already have aug_dim number of vectors*/ for (ii=0; ii<aug_dim; ii++) { if (aug_order[ii] == (aug_dim-1)) { spot = ii; } } } /* aug_vecs[aug_dim] contains new aug vector */ (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]); /*need to normalize */ tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot])); tmp_norm = 1.0/tmp_norm; (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]); /*set new aug vector to order 0 - move all others back one */ for (ii=0; ii < aug_dim; ii++) { aug_order[ii]++; } aug_order[spot] = 0; /*now add the A*aug vector to A_AUGVEC(spot) - this is * independ. of preconditioning type*/ /* A*augvec = V*H*y = r0-rm (r0 is in w and rm is in p[0])*/ (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]); (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/ (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */ (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */ } } /* END of iteration while loop */ if ( print_level>1 && my_id == 0 ) hypre_printf("\n\n"); (lgmres_data -> num_iterations) = iter; if (b_norm > 0.0) (lgmres_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (lgmres_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); hypre_TFreeF(c,lgmres_functions); hypre_TFreeF(s,lgmres_functions); hypre_TFreeF(rs,lgmres_functions); for (i=0; i < k_dim+1+aug_dim; i++) { hypre_TFreeF(hh[i],lgmres_functions); } hypre_TFreeF(hh,lgmres_functions); return hypre_error_flag; }
HYPRE_Int hypre_CreateCommInfoFromStencil( hypre_StructGrid *grid, hypre_StructStencil *stencil, hypre_CommInfo **comm_info_ptr ) { HYPRE_Int i,j,k, d, m, s; hypre_BoxArrayArray *send_boxes; hypre_BoxArrayArray *recv_boxes; HYPRE_Int **send_procs; HYPRE_Int **recv_procs; HYPRE_Int **send_rboxnums; HYPRE_Int **recv_rboxnums; hypre_BoxArrayArray *send_rboxes; hypre_BoxArrayArray *recv_rboxes; hypre_BoxArray *local_boxes; HYPRE_Int num_boxes; HYPRE_Int *local_ids; hypre_BoxManager *boxman; hypre_Index *stencil_shape; hypre_IndexRef stencil_offset; hypre_IndexRef pshift; hypre_Box *box; hypre_Box *hood_box; hypre_Box *grow_box; hypre_Box *extend_box; hypre_Box *int_box; hypre_Box *periodic_box; HYPRE_Int stencil_grid[3][3][3]; HYPRE_Int grow[3][2]; hypre_BoxManEntry **entries; hypre_BoxManEntry *entry; HYPRE_Int num_entries; hypre_BoxArray *neighbor_boxes = NULL; HYPRE_Int *neighbor_procs = NULL; HYPRE_Int *neighbor_ids = NULL; HYPRE_Int *neighbor_shifts = NULL; HYPRE_Int neighbor_count; HYPRE_Int neighbor_alloc; hypre_Index ilower, iupper; hypre_BoxArray *send_box_array; hypre_BoxArray *recv_box_array; hypre_BoxArray *send_rbox_array; hypre_BoxArray *recv_rbox_array; hypre_Box **cboxes; hypre_Box *cboxes_mem; HYPRE_Int *cboxes_neighbor_location; HYPRE_Int num_cboxes, cbox_alloc; HYPRE_Int istart[3], istop[3]; HYPRE_Int sgindex[3]; HYPRE_Int num_periods, loc, box_id, id, proc_id; HYPRE_Int myid; MPI_Comm comm; /*------------------------------------------------------ * Initializations *------------------------------------------------------*/ local_boxes = hypre_StructGridBoxes(grid); local_ids = hypre_StructGridIDs(grid); num_boxes = hypre_BoxArraySize(local_boxes); num_periods = hypre_StructGridNumPeriods(grid); boxman = hypre_StructGridBoxMan(grid); comm = hypre_StructGridComm(grid); hypre_MPI_Comm_rank(comm, &myid); for (k = 0; k < 3; k++) { for (j = 0; j < 3; j++) { for (i = 0; i < 3; i++) { stencil_grid[i][j][k] = 0; } } } /*------------------------------------------------------ * Compute the "grow" information from the stencil *------------------------------------------------------*/ stencil_shape = hypre_StructStencilShape(stencil); for (d = 0; d < 3; d++) { grow[d][0] = 0; grow[d][1] = 0; } for (s = 0; s < hypre_StructStencilSize(stencil); s++) { stencil_offset = stencil_shape[s]; for (d = 0; d < 3; d++) { m = stencil_offset[d]; istart[d] = 1; istop[d] = 1; if (m < 0) { istart[d] = 0; grow[d][0] = hypre_max(grow[d][0], -m); } else if (m > 0) { istop[d] = 2; grow[d][1] = hypre_max(grow[d][1], m); } } /* update stencil grid from the grow_stencil */ for (k = istart[2]; k <= istop[2]; k++) { for (j = istart[1]; j <= istop[1]; j++) { for (i = istart[0]; i <= istop[0]; i++) { stencil_grid[i][j][k] = 1; } } } } /*------------------------------------------------------ * Compute send/recv boxes and procs for each local box *------------------------------------------------------*/ /* initialize: for each local box, we create an array of send/recv info */ send_boxes = hypre_BoxArrayArrayCreate(num_boxes); recv_boxes = hypre_BoxArrayArrayCreate(num_boxes); send_procs = hypre_CTAlloc(HYPRE_Int *, num_boxes); recv_procs = hypre_CTAlloc(HYPRE_Int *, num_boxes); /* Remote boxnums and boxes describe data on the opposing processor, so some shifting of boxes is needed below for periodic neighbor boxes. Remote box info is also needed for receives to allow for reverse communication. */ send_rboxnums = hypre_CTAlloc(HYPRE_Int *, num_boxes); send_rboxes = hypre_BoxArrayArrayCreate(num_boxes); recv_rboxnums = hypre_CTAlloc(HYPRE_Int *, num_boxes); recv_rboxes = hypre_BoxArrayArrayCreate(num_boxes); grow_box = hypre_BoxCreate(); extend_box = hypre_BoxCreate(); int_box = hypre_BoxCreate(); periodic_box = hypre_BoxCreate(); /* storage we will use and keep track of the neighbors */ neighbor_alloc = 30; /* initial guess at max size */ neighbor_boxes = hypre_BoxArrayCreate(neighbor_alloc); neighbor_procs = hypre_CTAlloc(HYPRE_Int, neighbor_alloc); neighbor_ids = hypre_CTAlloc(HYPRE_Int, neighbor_alloc); neighbor_shifts = hypre_CTAlloc(HYPRE_Int, neighbor_alloc); /* storage we will use to collect all of the intersected boxes (the send and recv regions for box i (this may not be enough in the case of periodic boxes, so we will have to check) */ cbox_alloc = hypre_BoxManNEntries(boxman); cboxes_neighbor_location = hypre_CTAlloc(HYPRE_Int, cbox_alloc); cboxes = hypre_CTAlloc(hypre_Box *, cbox_alloc); cboxes_mem = hypre_CTAlloc(hypre_Box, cbox_alloc); /******* loop through each local box **************/ for (i = 0; i < num_boxes; i++) { /* get the box */ box = hypre_BoxArrayBox(local_boxes, i); /* box_id = local_ids[i]; the box id in the Box Manager is the box number, * and we use this to find out if a box has intersected with itself */ box_id = i; /* grow box local i according to the stencil*/ hypre_CopyBox(box, grow_box); for (d = 0; d < 3; d++) { hypre_BoxIMinD(grow_box, d) -= grow[d][0]; hypre_BoxIMaxD(grow_box, d) += grow[d][1]; } /* extend_box - to find the list of potential neighbors, we need to grow the local box a bit differently in case, for example, the stencil grows in one dimension [0] and not the other [1] */ hypre_CopyBox(box, extend_box); for (d = 0; d < 3; d++) { hypre_BoxIMinD(extend_box, d) -= hypre_max(grow[d][0],grow[d][1]); hypre_BoxIMaxD(extend_box, d) += hypre_max(grow[d][0],grow[d][1]); } /*------------------------------------------------ * Determine the neighbors of box i *------------------------------------------------*/ /* Do this by intersecting the extend box with the BoxManager. We must also check for periodic neighbors. */ neighbor_count = 0; hypre_BoxArraySetSize(neighbor_boxes, 0); /* shift the box by each period (k=0 is original box) */ for (k = 0; k < num_periods; k++) { hypre_CopyBox(extend_box, periodic_box); pshift = hypre_StructGridPShift(grid, k); hypre_BoxShiftPos(periodic_box, pshift); /* get the intersections */ hypre_BoxManIntersect(boxman, hypre_BoxIMin(periodic_box) , hypre_BoxIMax(periodic_box) , &entries , &num_entries); /* note: do we need to remove the intersection with our original box? no if periodic, yes if non-periodic (k=0) */ /* unpack entries (first check storage) */ if (neighbor_count + num_entries > neighbor_alloc) { neighbor_alloc = neighbor_count + num_entries + 5; neighbor_procs = hypre_TReAlloc(neighbor_procs, HYPRE_Int, neighbor_alloc); neighbor_ids = hypre_TReAlloc(neighbor_ids, HYPRE_Int, neighbor_alloc); neighbor_shifts = hypre_TReAlloc(neighbor_shifts, HYPRE_Int, neighbor_alloc); } /* check storage for the array */ hypre_BoxArraySetSize(neighbor_boxes, neighbor_count + num_entries); /* now unpack */ for (j = 0; j < num_entries; j++) { entry = entries[j]; proc_id = hypre_BoxManEntryProc(entry); id = hypre_BoxManEntryId(entry); /* don't keep box i in the non-periodic case*/ if (!k) { if((myid == proc_id) && (box_id == id)) { continue; } } hypre_BoxManEntryGetExtents(entry, ilower, iupper); hypre_BoxSetExtents(hypre_BoxArrayBox(neighbor_boxes, neighbor_count), ilower, iupper); /* shift the periodic boxes (needs to be the opposite of above) */ if (k) { hypre_BoxShiftNeg( hypre_BoxArrayBox(neighbor_boxes, neighbor_count), pshift); } neighbor_procs[neighbor_count] = proc_id; neighbor_ids[neighbor_count] = id; neighbor_shifts[neighbor_count] = k; neighbor_count++; } hypre_BoxArraySetSize(neighbor_boxes, neighbor_count); hypre_TFree(entries); } /* end of loop through periods k */ /* Now we have a list of all of the neighbors for box i! */ /* note: we don't want/need to remove duplicates - they should have different intersections (TO DO: put more thought into if there are ever any exceptions to this? - the intersection routine already eliminates duplicates - so what i mean is eliminating duplicates from multiple intersection calls in periodic case) */ /*------------------------------------------------ * Compute recv_box_array for box i *------------------------------------------------*/ /* check size of storage for cboxes */ /* let's make sure that we have enough storage in case each neighbor produces a send/recv region */ if (neighbor_count > cbox_alloc) { cbox_alloc = neighbor_count; cboxes_neighbor_location = hypre_TReAlloc(cboxes_neighbor_location, HYPRE_Int, cbox_alloc); cboxes = hypre_TReAlloc(cboxes, hypre_Box *, cbox_alloc); cboxes_mem = hypre_TReAlloc(cboxes_mem, hypre_Box, cbox_alloc); } /* Loop through each neighbor box. If the neighbor box intersects the grown box i (grown according to our stencil), then the intersection is a recv region. If the neighbor box was shifted to handle periodicity, we need to (positive) shift it back. */ num_cboxes = 0; for (k = 0; k < neighbor_count; k++) { hood_box = hypre_BoxArrayBox(neighbor_boxes, k); /* check the stencil grid to see if it makes sense to intersect */ for (d = 0; d < 3; d++) { sgindex[d] = 1; s = hypre_BoxIMinD(hood_box, d) - hypre_BoxIMaxD(box, d); if (s > 0) { sgindex[d] = 2; } s = hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(hood_box, d); if (s > 0) { sgindex[d] = 0; } } /* it makes sense only if we have at least one non-zero entry */ if (stencil_grid[sgindex[0]][sgindex[1]][sgindex[2]]) { /* intersect - result is int_box */ hypre_IntersectBoxes(grow_box, hood_box, int_box); /* if we have a positive volume box, this is a recv region */ if (hypre_BoxVolume(int_box)) { /* keep track of which neighbor: k... */ cboxes_neighbor_location[num_cboxes] = k; cboxes[num_cboxes] = &cboxes_mem[num_cboxes]; /* keep the intersected box */ hypre_CopyBox(int_box, cboxes[num_cboxes]); num_cboxes++; } } } /* end of loop through each neighbor */ /* create recv_box_array and recv_procs for box i */ recv_box_array = hypre_BoxArrayArrayBoxArray(recv_boxes, i); hypre_BoxArraySetSize(recv_box_array, num_cboxes); recv_procs[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes); recv_rboxnums[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes); recv_rbox_array = hypre_BoxArrayArrayBoxArray(recv_rboxes, i); hypre_BoxArraySetSize(recv_rbox_array, num_cboxes); for (m = 0; m < num_cboxes; m++) { loc = cboxes_neighbor_location[m]; recv_procs[i][m] = neighbor_procs[loc]; recv_rboxnums[i][m] = neighbor_ids[loc]; hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(recv_box_array, m)); /* if periodic, positive shift before copying to the rbox_array */ if (neighbor_shifts[loc]) /* periodic if shift != 0 */ { pshift = hypre_StructGridPShift(grid, neighbor_shifts[loc]); hypre_BoxShiftPos(cboxes[m], pshift); } hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(recv_rbox_array, m)); cboxes[m] = NULL; } /*------------------------------------------------ * Compute send_box_array for box i *------------------------------------------------*/ /* Loop through each neighbor box. If the grown neighbor box intersects box i, then the intersection is a send region. If the neighbor box was shifted to handle periodicity, we need to (positive) shift it back. */ num_cboxes = 0; for (k = 0; k < neighbor_count; k++) { hood_box = hypre_BoxArrayBox(neighbor_boxes, k); /* check the stencil grid to see if it makes sense to intersect */ for (d = 0; d < 3; d++) { sgindex[d] = 1; s = hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(hood_box, d); if (s > 0) { sgindex[d] = 2; } s = hypre_BoxIMinD(hood_box, d) - hypre_BoxIMaxD(box, d); if (s > 0) { sgindex[d] = 0; } } /* it makes sense only if we have at least one non-zero entry */ if (stencil_grid[sgindex[0]][sgindex[1]][sgindex[2]]) { /* grow the neighbor box and intersect */ hypre_CopyBox(hood_box, grow_box); for (d = 0; d < 3; d++) { hypre_BoxIMinD(grow_box, d) -= grow[d][0]; hypre_BoxIMaxD(grow_box, d) += grow[d][1]; } hypre_IntersectBoxes(box, grow_box, int_box); /* if we have a positive volume box, this is a send region */ if (hypre_BoxVolume(int_box)) { /* keep track of which neighbor: k... */ cboxes_neighbor_location[num_cboxes] = k; cboxes[num_cboxes] = &cboxes_mem[num_cboxes]; /* keep the intersected box */ hypre_CopyBox(int_box, cboxes[num_cboxes]); num_cboxes++; } } }/* end of loop through neighbors */ /* create send_box_array and send_procs for box i */ send_box_array = hypre_BoxArrayArrayBoxArray(send_boxes, i); hypre_BoxArraySetSize(send_box_array, num_cboxes); send_procs[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes); send_rboxnums[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes); send_rbox_array = hypre_BoxArrayArrayBoxArray(send_rboxes, i); hypre_BoxArraySetSize(send_rbox_array, num_cboxes); for (m = 0; m < num_cboxes; m++) { loc = cboxes_neighbor_location[m]; send_procs[i][m] = neighbor_procs[loc]; send_rboxnums[i][m] = neighbor_ids[loc]; hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(send_box_array, m)); /* if periodic, positive shift before copying to the rbox_array */ if (neighbor_shifts[loc]) /* periodic if shift != 0 */ { pshift = hypre_StructGridPShift(grid, neighbor_shifts[loc]); hypre_BoxShiftPos(cboxes[m], pshift); } hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(send_rbox_array, m)); cboxes[m] = NULL; } } /* end of loop through each local box */
HYPRE_Int hypre_BoomerAMGCycleT( void *amg_vdata, hypre_ParVector **F_array, hypre_ParVector **U_array ) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_ParCSRMatrix **R_array; hypre_ParVector *Vtemp; HYPRE_Int **CF_marker_array; /* HYPRE_Int **unknown_map_array; */ /* HYPRE_Int **point_map_array; */ /* HYPRE_Int **v_at_point_array; */ HYPRE_Real cycle_op_count; HYPRE_Int cycle_type; HYPRE_Int num_levels; HYPRE_Int max_levels; HYPRE_Real *num_coeffs; HYPRE_Int *num_grid_sweeps; HYPRE_Int *grid_relax_type; HYPRE_Int **grid_relax_points; /* Local variables */ HYPRE_Int *lev_counter; HYPRE_Int Solve_err_flag; HYPRE_Int k; HYPRE_Int j; HYPRE_Int level; HYPRE_Int cycle_param; HYPRE_Int coarse_grid; HYPRE_Int fine_grid; HYPRE_Int Not_Finished; HYPRE_Int num_sweep; HYPRE_Int relax_type; HYPRE_Int relax_points; HYPRE_Real *relax_weight; HYPRE_Int relax_local; HYPRE_Int relax_order; HYPRE_Int old_version = 0; HYPRE_Real alpha; HYPRE_Real beta; #if 0 HYPRE_Real *D_mat; HYPRE_Real *S_vec; #endif /* Acquire data and allocate storage */ A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); R_array = hypre_ParAMGDataRArray(amg_data); CF_marker_array = hypre_ParAMGDataCFMarkerArray(amg_data); /* unknown_map_array = hypre_ParAMGDataUnknownMapArray(amg_data); */ /* point_map_array = hypre_ParAMGDataPointMapArray(amg_data); */ /* v_at_point_array = hypre_ParAMGDataVatPointArray(amg_data); */ Vtemp = hypre_ParAMGDataVtemp(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); max_levels = hypre_ParAMGDataMaxLevels(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); /* num_unknowns = hypre_ParCSRMatrixNumRows(A_array[0]); */ num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels); /* Initialize */ Solve_err_flag = 0; if (grid_relax_points) old_version = 1; num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); for (j = 1; j < num_levels; j++) num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); /*--------------------------------------------------------------------- * Initialize cycling control counter * * Cycling is controlled using a level counter: lev_counter[k] * * Each time relaxation is performed on level k, the * counter is decremented by 1. If the counter is then * negative, we go to the next finer level. If non- * negative, we go to the next coarser level. The * following actions control cycling: * * a. lev_counter[0] is initialized to 1. * b. lev_counter[k] is initialized to cycle_type for k>0. * * c. During cycling, when going down to level k, lev_counter[k] * is set to the max of (lev_counter[k],cycle_type) *---------------------------------------------------------------------*/ Not_Finished = 1; lev_counter[0] = 1; for (k = 1; k < num_levels; ++k) { lev_counter[k] = cycle_type; } level = 0; cycle_param = 0; /*--------------------------------------------------------------------- * Main loop of cycling *--------------------------------------------------------------------*/ while (Not_Finished) { num_sweep = num_grid_sweeps[cycle_param]; relax_type = grid_relax_type[cycle_param]; if (relax_type != 7 && relax_type != 9) relax_type = 7; /*------------------------------------------------------------------ * Do the relaxation num_sweep times *-----------------------------------------------------------------*/ for (j = 0; j < num_sweep; j++) { if (num_levels == 1 && max_levels > 1) { relax_points = 0; relax_local = 0; } else { if (old_version) relax_points = grid_relax_points[cycle_param][j]; relax_local = relax_order; } /*----------------------------------------------- * VERY sloppy approximation to cycle complexity *-----------------------------------------------*/ if (old_version && level < num_levels -1) { switch (relax_points) { case 1: cycle_op_count += num_coeffs[level+1]; break; case -1: cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); break; } } else { cycle_op_count += num_coeffs[level]; } /* note: this does not use relax_points, so it doesn't matter if its the "old version" */ Solve_err_flag = hypre_BoomerAMGRelaxT(A_array[level], F_array[level], CF_marker_array[level], relax_type, relax_points, relax_weight[level], U_array[level], Vtemp); if (Solve_err_flag != 0) { hypre_TFree(lev_counter); hypre_TFree(num_coeffs); return(Solve_err_flag); } } /*------------------------------------------------------------------ * Decrement the control counter and determine which grid to visit next *-----------------------------------------------------------------*/ --lev_counter[level]; if (lev_counter[level] >= 0 && level != num_levels-1) { /*--------------------------------------------------------------- * Visit coarser level next. Compute residual using hypre_ParCSRMatrixMatvec. * Use interpolation (since transpose i.e. P^TATR instead of * RAP) using hypre_ParCSRMatrixMatvecT. * Reset counters and cycling parameters for coarse level *--------------------------------------------------------------*/ fine_grid = level; coarse_grid = level + 1; hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); hypre_ParVectorCopy(F_array[fine_grid],Vtemp); alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvecT(alpha, A_array[fine_grid], U_array[fine_grid], beta, Vtemp); alpha = 1.0; beta = 0.0; hypre_ParCSRMatrixMatvecT(alpha,P_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); ++level; lev_counter[level] = hypre_max(lev_counter[level],cycle_type); cycle_param = 1; if (level == num_levels-1) cycle_param = 3; } else if (level != 0) { /*--------------------------------------------------------------- * Visit finer level next. * Use restriction (since transpose i.e. P^TA^TR instead of RAP) * and add correction using hypre_ParCSRMatrixMatvec. * Reset counters and cycling parameters for finer level. *--------------------------------------------------------------*/ fine_grid = level - 1; coarse_grid = level; alpha = 1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, R_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); --level; cycle_param = 2; if (level == 0) cycle_param = 0; } else { Not_Finished = 0; } } hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count; hypre_TFree(lev_counter); hypre_TFree(num_coeffs); return(Solve_err_flag); }
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P, HYPRE_Real eps, HYPRE_Real dlt, HYPRE_Int * CF_marker ) /* Truncate the interpolation matrix P, but only in rows for which the marker is <0. Truncation means that an element P(i,j) is set to 0 if P(i,j)>0 and P(i,j)<eps*max( P(i,j) ) or if P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) ) or if P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) ) or if P(i,j)<0 and P(i,j)>eps*min( P(i,j) ) ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, ) The min and max are only computed locally, as I'm guessing that there isn't usually much to be gained (in the way of improved performance) by getting them perfectly right. */ /* The function hypre_BoomerAMGInterpTruncation in par_interp.c is very similar. It looks at fabs(value) rather than separately dealing with value<0 and value>0 as recommended by Klaus Stuben, thus as this function does. In this function, only "marked" rows are affected. Lastly, in hypre_BoomerAMGInterpTruncation, if any element gets discarded, it reallocates arrays to the new size. */ { hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_P_diag_i; HYPRE_Int *new_P_offd_i; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd); #if 0 MPI_Comm comm = hypre_ParCSRMatrixComm( P ); HYPRE_Real vmax1, vmin1; #endif HYPRE_Real vmax = 0.0; HYPRE_Real vmin = 0.0; HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin; HYPRE_Int i1, m, m1d, m1o; /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */ for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } } #if 0 /* This can make max,min global so results don't depend on no. processors We don't want this except for testing, or maybe this could be put someplace better. I don't like adding communication here, for a minor reason. */ vmax1 = vmax; vmin1 = vmin; hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm ); hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm ); #endif if ( vmax <= 0.0 ) vmax = 1.0; /* make sure no v is v>vmax if no v is v>0 */ if ( vmin >= 0.0 ) vmin = -1.0; /* make sure no v is v<vmin if no v is v<0 */ wmax = - dlt * vmin; wmin = - dlt * vmax; vmax *= eps; vmin *= eps; /* Repack the i,j,and data arrays so as to discard the small elements of P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 ); new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 ); m1d = P_diag_i[0]; m1o = P_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { old_sum = 0; new_sum = 0; for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_diag_j[m1d] = P_diag_j[m]; P_diag_data[m1d] = P_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_offd_j[m1o] = P_offd_j[m]; P_offd_data[m1o] = P_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_P_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m ) P_diag_data[m] *= scale; if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */ for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m ) P_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_P; i1++ ) { P_diag_i[i1] = new_P_diag_i[i1]; if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1]; } hypre_TFree( new_P_diag_i ); if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i ); hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd; hypre_ParCSRMatrixSetDNumNonzeros( P ); hypre_ParCSRMatrixSetNumNonzeros( P ); }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }
HYPRE_Int hypre_BiCGSTABSolve(void *bicgstab_vdata, void *A, void *b, void *x) { hypre_BiCGSTABData *bicgstab_data = (hypre_BiCGSTABData*)bicgstab_vdata; hypre_BiCGSTABFunctions *bicgstab_functions = bicgstab_data->functions; HYPRE_Int min_iter = (bicgstab_data -> min_iter); HYPRE_Int max_iter = (bicgstab_data -> max_iter); HYPRE_Int stop_crit = (bicgstab_data -> stop_crit); HYPRE_Real r_tol = (bicgstab_data -> tol); HYPRE_Real cf_tol = (bicgstab_data -> cf_tol); void *matvec_data = (bicgstab_data -> matvec_data); HYPRE_Real a_tol = (bicgstab_data -> a_tol); void *r = (bicgstab_data -> r); void *r0 = (bicgstab_data -> r0); void *s = (bicgstab_data -> s); void *v = (bicgstab_data -> v); void *p = (bicgstab_data -> p); void *q = (bicgstab_data -> q); HYPRE_Int (*precond)(void*,void*,void*,void*) = (bicgstab_functions -> precond); HYPRE_Int *precond_data = (HYPRE_Int*)(bicgstab_data -> precond_data); /* logging variables */ HYPRE_Int logging = (bicgstab_data -> logging); HYPRE_Int print_level = (bicgstab_data -> print_level); HYPRE_Real *norms = (bicgstab_data -> norms); /* char *log_file_name = (bicgstab_data -> log_file_name); FILE *fp; */ HYPRE_Int iter; HYPRE_Int my_id, num_procs; HYPRE_Real alpha, beta, gamma, epsilon, temp, res, r_norm, b_norm; HYPRE_Real epsmac = 1.e-128; HYPRE_Real ieee_check = 0.; HYPRE_Real cf_ave_0 = 0.0; HYPRE_Real cf_ave_1 = 0.0; HYPRE_Real weight; HYPRE_Real r_norm_0; HYPRE_Real den_norm; HYPRE_Real gamma_numer; HYPRE_Real gamma_denom; (bicgstab_data -> converged) = 0; (*(bicgstab_functions->CommInfo))(A,&my_id,&num_procs); if (logging > 0 || print_level > 0) { norms = (bicgstab_data -> norms); /* log_file_name = (bicgstab_data -> log_file_name); fp = fopen(log_file_name,"w"); */ } /* initialize work arrays */ (*(bicgstab_functions->CopyVector))(b,r0); /* compute initial residual */ (*(bicgstab_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, r0); (*(bicgstab_functions->CopyVector))(r0,r); (*(bicgstab_functions->CopyVector))(r0,p); b_norm = sqrt((*(bicgstab_functions->InnerProd))(b,b)); /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied b.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } res = (*(bicgstab_functions->InnerProd))(r0,r0); r_norm = sqrt(res); r_norm_0 = r_norm; /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (logging > 0 || print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A or x_0.\n"); hypre_printf("Returning error flag += 101. Program not terminated.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } if (logging > 0 || print_level > 0) { norms[0] = r_norm; if (print_level > 0 && my_id == 0) { hypre_printf("L2 norm of b: %e\n", b_norm); if (b_norm == 0.0) hypre_printf("Rel_resid_norm actually contains the residual norm\n"); hypre_printf("Initial L2 norm of residual: %e\n", r_norm); } } iter = 0; if (b_norm > 0.0) { /* convergence criterion |r_i| <= r_tol*|b| if |b| > 0 */ den_norm = b_norm; } else { /* convergence criterion |r_i| <= r_tol*|r0| if |b| = 0 */ den_norm = r_norm; }; /* convergence criterion |r_i| <= r_tol/a_tol , absolute residual norm*/ if (stop_crit) { if (a_tol == 0.0) /* this is for backwards compatibility (accomodating setting stop_crit to 1, but not setting a_tol) - eventually we will get rid of the stop_crit flag as with GMRES */ epsilon = r_tol; else epsilon = a_tol; /* this means new interface fcn called */ } else /* default convergence test (stop_crit = 0)*/ { /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm) den_norm = |r_0| or |b| note: default for a_tol is 0.0, so relative residual criteria is used unless user also specifies a_tol or sets r_tol = 0.0, which means absolute tol only is checked */ epsilon = hypre_max(a_tol, r_tol*den_norm); } if (print_level > 0 && my_id == 0) { if (b_norm > 0.0) {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate rel.res.norm\n"); hypre_printf("----- ------------ ---------- ------------\n"); } else {hypre_printf("=============================================\n\n"); hypre_printf("Iters resid.norm conv.rate\n"); hypre_printf("----- ------------ ----------\n"); } } (bicgstab_data -> num_iterations) = iter; if (b_norm > 0.0) (bicgstab_data -> rel_residual_norm) = r_norm/b_norm; /* check for convergence before starting */ if (r_norm == 0.0) { return hypre_error_flag; } else if (r_norm <= epsilon && iter >= min_iter) { if (print_level > 0 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Tolerance and min_iter requirements satisfied by initial data.\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (bicgstab_data -> converged) = 1; return hypre_error_flag; } /* Start BiCGStab iterations */ while (iter < max_iter) { iter++; (*(bicgstab_functions->ClearVector))(v); precond(precond_data, A, p, v); (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,q); temp = (*(bicgstab_functions->InnerProd))(r0,q); if (fabs(temp) >= epsmac) alpha = res/temp; else { hypre_printf("BiCGSTAB broke down!! divide by near zero\n"); return(1); } (*(bicgstab_functions->Axpy))(alpha,v,x); (*(bicgstab_functions->Axpy))(-alpha,q,r); (*(bicgstab_functions->ClearVector))(v); precond(precond_data, A, r, v); (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,s); /* Handle case when gamma = 0.0/0.0 as 0.0 and not NAN */ gamma_numer = (*(bicgstab_functions->InnerProd))(r,s); gamma_denom = (*(bicgstab_functions->InnerProd))(s,s); if ((gamma_numer == 0.0) && (gamma_denom == 0.0)) gamma = 0.0; else gamma= gamma_numer/gamma_denom; (*(bicgstab_functions->Axpy))(gamma,v,x); (*(bicgstab_functions->Axpy))(-gamma,s,r); /* residual is now updated, must immediately check for convergence */ r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r)); if (logging > 0 || print_level > 0) { norms[iter] = r_norm; } if (print_level > 0 && my_id == 0) { if (b_norm > 0.0) hypre_printf("% 5d %e %f %e\n", iter, norms[iter], norms[iter]/norms[iter-1], norms[iter]/b_norm); else hypre_printf("% 5d %e %f\n", iter, norms[iter], norms[iter]/norms[iter-1]); } /* check for convergence, evaluate actual residual */ if (r_norm <= epsilon && iter >= min_iter) { (*(bicgstab_functions->CopyVector))(b,r); (*(bicgstab_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r); r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r)); if (r_norm <= epsilon) { if (print_level > 0 && my_id == 0) { hypre_printf("\n\n"); hypre_printf("Final L2 norm of residual: %e\n\n", r_norm); } (bicgstab_data -> converged) = 1; break; } } /*-------------------------------------------------------------------- * Optional test to see if adequate progress is being made. * The average convergence factor is recorded and compared * against the tolerance 'cf_tol'. The weighting factor is * intended to pay more attention to the test when an accurate * estimate for average convergence factor is available. *--------------------------------------------------------------------*/ if (cf_tol > 0.0) { cf_ave_0 = cf_ave_1; cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter)); weight = fabs(cf_ave_1 - cf_ave_0); weight = weight / hypre_max(cf_ave_1, cf_ave_0); weight = 1.0 - weight; if (weight * cf_ave_1 > cf_tol) break; } if (fabs(res) >= epsmac) beta = 1.0/res; else { hypre_printf("BiCGSTAB broke down!! res=0 \n"); return(2); } res = (*(bicgstab_functions->InnerProd))(r0,r); beta *= res; (*(bicgstab_functions->Axpy))(-gamma,q,p); if (fabs(gamma) >= epsmac) (*(bicgstab_functions->ScaleVector))((beta*alpha/gamma),p); else { hypre_printf("BiCGSTAB broke down!! gamma=0 \n"); return(3); } (*(bicgstab_functions->Axpy))(1.0,r,p); } /* end while loop */ (bicgstab_data -> num_iterations) = iter; if (b_norm > 0.0) (bicgstab_data -> rel_residual_norm) = r_norm/b_norm; if (b_norm == 0.0) (bicgstab_data -> rel_residual_norm) = r_norm; if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV); return hypre_error_flag; }
int hypre_StructMatrixInitializeShell( hypre_StructMatrix *matrix ) { int ierr = 0; hypre_StructGrid *grid; hypre_StructStencil *user_stencil; hypre_StructStencil *stencil; hypre_Index *stencil_shape; int stencil_size; int num_values; int *symm_elements; int *num_ghost; int extra_ghost[] = {0, 0, 0, 0, 0, 0}; hypre_BoxArray *data_space; hypre_BoxArray *boxes; hypre_Box *box; hypre_Box *data_box; int **data_indices; int data_size; int data_box_volume; int i, j, d; grid = hypre_StructMatrixGrid(matrix); /*----------------------------------------------------------------------- * Set up stencil and num_values: * The stencil is a "symmetrized" version of the user's stencil * as computed by hypre_StructStencilSymmetrize. * * The `symm_elements' array is used to determine what data is * explicitely stored (symm_elements[i] < 0) and what data does is * not explicitely stored (symm_elements[i] >= 0), but is instead * stored as the transpose coefficient at a neighboring grid point. *-----------------------------------------------------------------------*/ if (hypre_StructMatrixStencil(matrix) == NULL) { user_stencil = hypre_StructMatrixUserStencil(matrix); hypre_StructStencilSymmetrize(user_stencil, &stencil, &symm_elements); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); if (!hypre_StructMatrixSymmetric(matrix)) { /* store all element data */ for (i = 0; i < stencil_size; i++) symm_elements[i] = -1; num_values = stencil_size; } else { num_values = (stencil_size + 1) / 2; } hypre_StructMatrixStencil(matrix) = stencil; hypre_StructMatrixSymmElements(matrix) = symm_elements; hypre_StructMatrixNumValues(matrix) = num_values; } /*----------------------------------------------------------------------- * Set ghost-layer size for symmetric storage * - All stencil coeffs are to be available at each point in the * grid, as well as in the user-specified ghost layer. *-----------------------------------------------------------------------*/ num_ghost = hypre_StructMatrixNumGhost(matrix); stencil = hypre_StructMatrixStencil(matrix); stencil_shape = hypre_StructStencilShape(stencil); stencil_size = hypre_StructStencilSize(stencil); symm_elements = hypre_StructMatrixSymmElements(matrix); for (i = 0; i < stencil_size; i++) { if (symm_elements[i] >= 0) { for (d = 0; d < 3; d++) { extra_ghost[2*d] = hypre_max(extra_ghost[2*d], -hypre_IndexD(stencil_shape[i], d)); extra_ghost[2*d + 1] = hypre_max(extra_ghost[2*d + 1], hypre_IndexD(stencil_shape[i], d)); } } } for (d = 0; d < 3; d++) { num_ghost[2*d] += extra_ghost[2*d]; num_ghost[2*d + 1] += extra_ghost[2*d + 1]; } /*----------------------------------------------------------------------- * Set up data_space *-----------------------------------------------------------------------*/ if (hypre_StructMatrixDataSpace(matrix) == NULL) { boxes = hypre_StructGridBoxes(grid); data_space = hypre_BoxArrayCreate(hypre_BoxArraySize(boxes)); hypre_ForBoxI(i, boxes) { box = hypre_BoxArrayBox(boxes, i); data_box = hypre_BoxArrayBox(data_space, i); hypre_CopyBox(box, data_box); for (d = 0; d < 3; d++) { hypre_BoxIMinD(data_box, d) -= num_ghost[2*d]; hypre_BoxIMaxD(data_box, d) += num_ghost[2*d + 1]; } }
int hypre_SStructPMatrixCreate( MPI_Comm comm, hypre_SStructPGrid *pgrid, hypre_SStructStencil **stencils, hypre_SStructPMatrix **pmatrix_ptr ) { hypre_SStructPMatrix *pmatrix; int nvars; int **smaps; hypre_StructStencil ***sstencils; hypre_StructMatrix ***smatrices; int **symmetric; hypre_StructStencil *sstencil; int *vars; hypre_Index *sstencil_shape; int sstencil_size; int new_dim; int *new_sizes; hypre_Index **new_shapes; int size; hypre_StructGrid *sgrid; int vi, vj; int i, j, k; pmatrix = hypre_TAlloc(hypre_SStructPMatrix, 1); hypre_SStructPMatrixComm(pmatrix) = comm; hypre_SStructPMatrixPGrid(pmatrix) = pgrid; hypre_SStructPMatrixStencils(pmatrix) = stencils; nvars = hypre_SStructPGridNVars(pgrid); hypre_SStructPMatrixNVars(pmatrix) = nvars; /* create sstencils */ smaps = hypre_TAlloc(int *, nvars); sstencils = hypre_TAlloc(hypre_StructStencil **, nvars); new_sizes = hypre_TAlloc(int, nvars); new_shapes = hypre_TAlloc(hypre_Index *, nvars); size = 0; for (vi = 0; vi < nvars; vi++) { sstencils[vi] = hypre_TAlloc(hypre_StructStencil *, nvars); for (vj = 0; vj < nvars; vj++) { sstencils[vi][vj] = NULL; new_sizes[vj] = 0; } sstencil = hypre_SStructStencilSStencil(stencils[vi]); vars = hypre_SStructStencilVars(stencils[vi]); sstencil_shape = hypre_StructStencilShape(sstencil); sstencil_size = hypre_StructStencilSize(sstencil); smaps[vi] = hypre_TAlloc(int, sstencil_size); for (i = 0; i < sstencil_size; i++) { j = vars[i]; new_sizes[j]++; } for (vj = 0; vj < nvars; vj++) { if (new_sizes[vj]) { new_shapes[vj] = hypre_TAlloc(hypre_Index, new_sizes[vj]); new_sizes[vj] = 0; } } for (i = 0; i < sstencil_size; i++) { j = vars[i]; k = new_sizes[j]; hypre_CopyIndex(sstencil_shape[i], new_shapes[j][k]); smaps[vi][i] = k; new_sizes[j]++; } new_dim = hypre_StructStencilDim(sstencil); for (vj = 0; vj < nvars; vj++) { if (new_sizes[vj]) { sstencils[vi][vj] = hypre_StructStencilCreate(new_dim, new_sizes[vj], new_shapes[vj]); } size = hypre_max(size, new_sizes[vj]); } } hypre_SStructPMatrixSMaps(pmatrix) = smaps; hypre_SStructPMatrixSStencils(pmatrix) = sstencils; hypre_TFree(new_sizes); hypre_TFree(new_shapes); /* create smatrices */ smatrices = hypre_TAlloc(hypre_StructMatrix **, nvars); for (vi = 0; vi < nvars; vi++) { smatrices[vi] = hypre_TAlloc(hypre_StructMatrix *, nvars); for (vj = 0; vj < nvars; vj++) { smatrices[vi][vj] = NULL; if (sstencils[vi][vj] != NULL) { sgrid = hypre_SStructPGridSGrid(pgrid, vi); smatrices[vi][vj] = hypre_StructMatrixCreate(comm, sgrid, sstencils[vi][vj]); } } } hypre_SStructPMatrixSMatrices(pmatrix) = smatrices; /* create symmetric */ symmetric = hypre_TAlloc(int *, nvars); for (vi = 0; vi < nvars; vi++) { symmetric[vi] = hypre_TAlloc(int, nvars); for (vj = 0; vj < nvars; vj++) { symmetric[vi][vj] = 0; } } hypre_SStructPMatrixSymmetric(pmatrix) = symmetric; hypre_SStructPMatrixSEntriesSize(pmatrix) = size; hypre_SStructPMatrixSEntries(pmatrix) = hypre_TAlloc(int, size); hypre_SStructPMatrixRefCount(pmatrix) = 1; *pmatrix_ptr = pmatrix; return hypre_error_flag; }
int hypre_CreateComputeInfo( hypre_StructGrid *grid, hypre_StructStencil *stencil, hypre_ComputeInfo **compute_info_ptr ) { int ierr = 0; hypre_CommInfo *comm_info; hypre_BoxArrayArray *indt_boxes; hypre_BoxArrayArray *dept_boxes; hypre_BoxArray *boxes; hypre_BoxArray *cbox_array; hypre_Box *cbox; int i; #ifdef HYPRE_OVERLAP_COMM_COMP hypre_Box *rembox; hypre_Index *stencil_shape; int border[3][2] = {{0, 0}, {0, 0}, {0, 0}}; int cbox_array_size; int s, d; #endif /*------------------------------------------------------ * Extract needed grid info *------------------------------------------------------*/ boxes = hypre_StructGridBoxes(grid); /*------------------------------------------------------ * Get communication info *------------------------------------------------------*/ hypre_CreateCommInfoFromStencil(grid, stencil, &comm_info); #ifdef HYPRE_OVERLAP_COMM_COMP /*------------------------------------------------------ * Compute border info *------------------------------------------------------*/ stencil_shape = hypre_StructStencilShape(stencil); for (s = 0; s < hypre_StructStencilSize(stencil); s++) { for (d = 0; d < 3; d++) { i = hypre_IndexD(stencil_shape[s], d); if (i < 0) { border[d][0] = hypre_max(border[d][0], -i); } else if (i > 0) { border[d][1] = hypre_max(border[d][1], i); } } } /*------------------------------------------------------ * Set up the dependent boxes *------------------------------------------------------*/ dept_boxes = hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxes)); rembox = hypre_BoxCreate(); hypre_ForBoxI(i, boxes) { cbox_array = hypre_BoxArrayArrayBoxArray(dept_boxes, i); hypre_BoxArraySetSize(cbox_array, 6); hypre_CopyBox(hypre_BoxArrayBox(boxes, i), rembox); cbox_array_size = 0; for (d = 0; d < 3; d++) { if ( (hypre_BoxVolume(rembox)) && (border[d][0]) ) { cbox = hypre_BoxArrayBox(cbox_array, cbox_array_size); hypre_CopyBox(rembox, cbox); hypre_BoxIMaxD(cbox, d) = hypre_BoxIMinD(cbox, d) + border[d][0] - 1; hypre_BoxIMinD(rembox, d) = hypre_BoxIMinD(cbox, d) + border[d][0]; cbox_array_size++; } if ( (hypre_BoxVolume(rembox)) && (border[d][1]) ) { cbox = hypre_BoxArrayBox(cbox_array, cbox_array_size); hypre_CopyBox(rembox, cbox); hypre_BoxIMinD(cbox, d) = hypre_BoxIMaxD(cbox, d) - border[d][1] + 1; hypre_BoxIMaxD(rembox, d) = hypre_BoxIMaxD(cbox, d) - border[d][1]; cbox_array_size++; } } hypre_BoxArraySetSize(cbox_array, cbox_array_size); }
int hypre_SStructUMatrixInitialize( hypre_SStructMatrix *matrix ) { HYPRE_IJMatrix ijmatrix = hypre_SStructMatrixIJMatrix(matrix); hypre_SStructGraph *graph = hypre_SStructMatrixGraph(matrix); hypre_SStructGrid *grid = hypre_SStructGraphGrid(graph); int nparts = hypre_SStructGraphNParts(graph); hypre_SStructPGrid **pgrids = hypre_SStructGraphPGrids(graph); hypre_SStructStencil ***stencils = hypre_SStructGraphStencils(graph); int nUventries = hypre_SStructGraphNUVEntries(graph); int *iUventries = hypre_SStructGraphIUVEntries(graph); hypre_SStructUVEntry **Uventries = hypre_SStructGraphUVEntries(graph); int **nvneighbors = hypre_SStructGridNVNeighbors(grid); hypre_StructGrid *sgrid; hypre_SStructStencil *stencil; int *split; int nvars; int nrows, nnzs ; int part, var, entry, i, j, k,m,b; int *row_sizes; int max_row_size; int matrix_type = hypre_SStructMatrixObjectType(matrix); hypre_Box *gridbox; hypre_Box *loopbox; hypre_Box *ghostbox; hypre_BoxArray *boxes; int *num_ghost; HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); /* GEC1002 the ghlocalsize is used to set the number of rows */ if (matrix_type == HYPRE_PARCSR) { nrows = hypre_SStructGridLocalSize(grid); } if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT) { nrows = hypre_SStructGridGhlocalSize(grid) ; } /* set row sizes */ m = 0; row_sizes = hypre_CTAlloc(int, nrows); max_row_size = 0; for (part = 0; part < nparts; part++) { nvars = hypre_SStructPGridNVars(pgrids[part]); for (var = 0; var < nvars; var++) { sgrid = hypre_SStructPGridSGrid(pgrids[part], var); stencil = stencils[part][var]; split = hypre_SStructMatrixSplit(matrix, part, var); nnzs = 0; for (entry = 0; entry < hypre_SStructStencilSize(stencil); entry++) { if (split[entry] == -1) { nnzs++; } } #if 0 /* TODO: For now, assume stencil is full/complete */ if (hypre_SStructMatrixSymmetric(matrix)) { nnzs = 2*nnzs - 1; } #endif /**************/ boxes = hypre_StructGridBoxes(sgrid) ; num_ghost = hypre_StructGridNumGhost(sgrid); for (b = 0; b < hypre_BoxArraySize(boxes); b++) { gridbox = hypre_BoxArrayBox(boxes, b); ghostbox = hypre_BoxCreate(); loopbox = hypre_BoxCreate(); hypre_CopyBox(gridbox,ghostbox); hypre_BoxExpand(ghostbox,num_ghost); if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT) { hypre_CopyBox(ghostbox,loopbox); } if (matrix_type == HYPRE_PARCSR) { hypre_CopyBox(gridbox,loopbox); } for (k = hypre_BoxIMinZ(loopbox); k <= hypre_BoxIMaxZ(loopbox); k++) { for (j = hypre_BoxIMinY(loopbox); j <= hypre_BoxIMaxY(loopbox); j++) { for (i = hypre_BoxIMinX(loopbox); i <= hypre_BoxIMaxX(loopbox); i++) { if ( ( ( i>=hypre_BoxIMinX(gridbox) ) && ( j>=hypre_BoxIMinY(gridbox) ) ) && ( k>=hypre_BoxIMinZ(gridbox) ) ) { if ( ( ( i<=hypre_BoxIMaxX(gridbox) ) && ( j<=hypre_BoxIMaxY(gridbox) ) ) && ( k<=hypre_BoxIMaxZ(gridbox) ) ) { row_sizes[m] = nnzs; max_row_size = hypre_max(max_row_size, row_sizes[m]); } } m++; } } } hypre_BoxDestroy(ghostbox); hypre_BoxDestroy(loopbox); } if (nvneighbors[part][var]) { max_row_size = hypre_max(max_row_size, hypre_SStructStencilSize(stencil)); } /*********************/ } } /* GEC0902 essentially for each UVentry we figure out how many extra columns * we need to add to the rowsizes */ for (entry = 0; entry < nUventries; entry++) { i = iUventries[entry]; row_sizes[i] += hypre_SStructUVEntryNUEntries(Uventries[i]); max_row_size = hypre_max(max_row_size, row_sizes[i]); } /* ZTODO: Update row_sizes based on neighbor off-part couplings */ HYPRE_IJMatrixSetRowSizes (ijmatrix, (const int *) row_sizes); hypre_TFree(row_sizes); hypre_SStructMatrixTmpColCoords(matrix) = hypre_CTAlloc(HYPRE_BigInt, max_row_size); hypre_SStructMatrixTmpCoeffs(matrix) = hypre_CTAlloc(double, max_row_size); /* GEC1002 at this point the processor has the partitioning (creation of ij) */ HYPRE_IJMatrixInitialize(ijmatrix); return hypre_error_flag; }
HYPRE_Int hypre_AMGSetupStats( void *amg_vdata ) { hypre_AMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_CSRMatrix **A_array; hypre_CSRMatrix **P_array; HYPRE_Int num_levels; HYPRE_Int num_nonzeros; /* HYPRE_Int amg_ioutdat; char *log_file_name; */ /* Local variables */ HYPRE_Int *A_i; double *A_data; HYPRE_Int *P_i; double *P_data; HYPRE_Int level; HYPRE_Int i,j; HYPRE_Int fine_size; HYPRE_Int coarse_size; HYPRE_Int entries; HYPRE_Int total_entries; HYPRE_Int min_entries; HYPRE_Int max_entries; double avg_entries; double rowsum; double min_rowsum; double max_rowsum; double sparse; double min_weight; double max_weight; double op_complxty=0; double grid_complxty=0; double num_nz0; double num_var0; A_array = hypre_AMGDataAArray(amg_data); P_array = hypre_AMGDataPArray(amg_data); num_levels = hypre_AMGDataNumLevels(amg_data); /* amg_ioutdat = hypre_AMGDataIOutDat(amg_data); log_file_name = hypre_AMGDataLogFileName(amg_data); */ hypre_printf("\n AMG SETUP PARAMETERS:\n\n"); hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data)); hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data)); hypre_printf(" Num levels = %d\n\n",num_levels); hypre_printf( "\nOperator Matrix Information:\n\n"); hypre_printf(" nonzero entries p"); hypre_printf("er row row sums\n"); hypre_printf("lev rows entries sparse min max "); hypre_printf("avg min max\n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]); num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]); for (level = 0; level < num_levels; level++) { A_i = hypre_CSRMatrixI(A_array[level]); A_data = hypre_CSRMatrixData(A_array[level]); fine_size = hypre_CSRMatrixNumRows(A_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]); sparse = num_nonzeros /((double) fine_size * (double) fine_size); op_complxty += ((double)num_nonzeros/num_nz0); grid_complxty += ((double)fine_size/num_var0); min_entries = A_i[1]-A_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; for (j = A_i[0]; j < A_i[1]; j++) min_rowsum += A_data[j]; max_rowsum = min_rowsum; for (j = 0; j < fine_size; j++) { entries = A_i[j+1] - A_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = A_i[j]; i < A_i[j+1]; i++) rowsum += A_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } avg_entries = ((double) total_entries) / ((double) fine_size); hypre_printf( "%2d %5d %7d %0.3f %3d %3d", level, fine_size, num_nonzeros, sparse, min_entries, max_entries); hypre_printf(" %4.1f %10.3e %10.3e\n", avg_entries, min_rowsum, max_rowsum); } hypre_printf( "\n\nInterpolation Matrix Information:\n\n"); hypre_printf(" entries/row min max"); hypre_printf(" row sums\n"); hypre_printf("lev rows cols min max "); hypre_printf(" weight weight min max \n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { P_i = hypre_CSRMatrixI(P_array[level]); P_data = hypre_CSRMatrixData(P_array[level]); fine_size = hypre_CSRMatrixNumRows(P_array[level]); coarse_size = hypre_CSRMatrixNumCols(P_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]); min_entries = P_i[1]-P_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; min_weight = P_data[0]; max_weight = 0.0; for (j = P_i[0]; j < P_i[1]; j++) min_rowsum += P_data[j]; max_rowsum = min_rowsum; for (j = 0; j < num_nonzeros; j++) { if (P_data[j] != 1.0) { min_weight = hypre_min(min_weight,P_data[j]); max_weight = hypre_max(max_weight,P_data[j]); } } for (j = 0; j < fine_size; j++) { entries = P_i[j+1] - P_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = P_i[j]; i < P_i[j+1]; i++) rowsum += P_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } hypre_printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, min_entries, max_entries); hypre_printf(" %5.3e %5.3e %5.3e %5.3e\n", min_weight, max_weight, min_rowsum, max_rowsum); } hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); hypre_printf(" Grid Complexity: %8.3f\n", grid_complxty); hypre_WriteSolverParams(amg_data); return(0); }
HYPRE_Int hypre_SparseMSGSolve( void *smsg_vdata, hypre_StructMatrix *A, hypre_StructVector *b, hypre_StructVector *x ) { hypre_SparseMSGData *smsg_data = smsg_vdata; HYPRE_Real tol = (smsg_data -> tol); HYPRE_Int max_iter = (smsg_data -> max_iter); HYPRE_Int rel_change = (smsg_data -> rel_change); HYPRE_Int zero_guess = (smsg_data -> zero_guess); HYPRE_Int jump = (smsg_data -> jump); HYPRE_Int num_pre_relax = (smsg_data -> num_pre_relax); HYPRE_Int num_post_relax = (smsg_data -> num_post_relax); HYPRE_Int num_fine_relax = (smsg_data -> num_fine_relax); HYPRE_Int *num_grids = (smsg_data -> num_grids); HYPRE_Int num_all_grids = (smsg_data -> num_all_grids); HYPRE_Int num_levels = (smsg_data -> num_levels); hypre_StructMatrix **A_array = (smsg_data -> A_array); hypre_StructMatrix **Px_array = (smsg_data -> Px_array); hypre_StructMatrix **Py_array = (smsg_data -> Py_array); hypre_StructMatrix **Pz_array = (smsg_data -> Pz_array); hypre_StructMatrix **RTx_array = (smsg_data -> RTx_array); hypre_StructMatrix **RTy_array = (smsg_data -> RTy_array); hypre_StructMatrix **RTz_array = (smsg_data -> RTz_array); hypre_StructVector **b_array = (smsg_data -> b_array); hypre_StructVector **x_array = (smsg_data -> x_array); hypre_StructVector **t_array = (smsg_data -> t_array); hypre_StructVector **r_array = (smsg_data -> r_array); hypre_StructVector **e_array = (smsg_data -> e_array); hypre_StructVector **visitx_array = (smsg_data -> visitx_array); hypre_StructVector **visity_array = (smsg_data -> visity_array); hypre_StructVector **visitz_array = (smsg_data -> visitz_array); HYPRE_Int *grid_on = (smsg_data -> grid_on); void **relax_array = (smsg_data -> relax_array); void **matvec_array = (smsg_data -> matvec_array); void **restrictx_array = (smsg_data -> restrictx_array); void **restricty_array = (smsg_data -> restricty_array); void **restrictz_array = (smsg_data -> restrictz_array); void **interpx_array = (smsg_data -> interpx_array); void **interpy_array = (smsg_data -> interpy_array); void **interpz_array = (smsg_data -> interpz_array); HYPRE_Int logging = (smsg_data -> logging); HYPRE_Real *norms = (smsg_data -> norms); HYPRE_Real *rel_norms = (smsg_data -> rel_norms); HYPRE_Int *restrict_count; HYPRE_Real b_dot_b, r_dot_r, eps; HYPRE_Real e_dot_e, x_dot_x; HYPRE_Int i, l, lx, ly, lz; HYPRE_Int lymin, lymax, lzmin, lzmax; HYPRE_Int fi, ci; HYPRE_Int ierr = 0; #if DEBUG char filename[255]; #endif /*----------------------------------------------------- * Initialize some things and deal with special cases *-----------------------------------------------------*/ hypre_BeginTiming(smsg_data -> time_index); hypre_StructMatrixDestroy(A_array[0]); hypre_StructVectorDestroy(b_array[0]); hypre_StructVectorDestroy(x_array[0]); A_array[0] = hypre_StructMatrixRef(A); b_array[0] = hypre_StructVectorRef(b); x_array[0] = hypre_StructVectorRef(x); (smsg_data -> num_iterations) = 0; /* if max_iter is zero, return */ if (max_iter == 0) { /* if using a zero initial guess, return zero */ if (zero_guess) { hypre_StructVectorSetConstantValues(x, 0.0); } hypre_EndTiming(smsg_data -> time_index); return ierr; } /* part of convergence check */ if (tol > 0.0) { /* eps = (tol^2) */ b_dot_b = hypre_StructInnerProd(b_array[0], b_array[0]); eps = tol*tol; /* if rhs is zero, return a zero solution */ if (b_dot_b == 0.0) { hypre_StructVectorSetConstantValues(x, 0.0); if (logging > 0) { norms[0] = 0.0; rel_norms[0] = 0.0; } hypre_EndTiming(smsg_data -> time_index); return ierr; } } restrict_count = hypre_TAlloc(HYPRE_Int, num_all_grids); /*----------------------------------------------------- * Do V-cycles: * For each index l, "fine" = l, "coarse" = (l+1) *-----------------------------------------------------*/ for (i = 0; i < max_iter; i++) { /*-------------------------------------------------- * Down cycle: * Note that r = b = x through the jump region *--------------------------------------------------*/ /* fine grid pre-relaxation */ hypre_PFMGRelaxSetPreRelax(relax_array[0]); hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[0], zero_guess); hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]); zero_guess = 0; /* compute fine grid residual (b - Ax) */ hypre_StructCopy(b_array[0], r_array[0]); hypre_StructMatvecCompute(matvec_array[0], -1.0, A_array[0], x_array[0], 1.0, r_array[0]); /* convergence check */ if (tol > 0.0) { r_dot_r = hypre_StructInnerProd(r_array[0], r_array[0]); if (logging > 0) { norms[i] = sqrt(r_dot_r); if (b_dot_b > 0) rel_norms[i] = sqrt(r_dot_r/b_dot_b); else rel_norms[i] = 0.0; } /* RDF */ #if 0 hypre_printf("iter = %d, rel_norm = %e\n", i, rel_norms[i]); #endif /* always do at least 1 V-cycle */ if ((r_dot_r/b_dot_b < eps) && (i > 0)) { if (rel_change) { if ((e_dot_e/x_dot_x) < eps) break; } else { break; } } } if (num_levels > 1) { /* initialize restrict_count */ for (fi = 0; fi < num_all_grids; fi++) { restrict_count[fi] = 0; } for (l = 0; l <= (num_levels - 2); l++) { lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0); lzmax = hypre_min((l), (num_grids[2] - 1)); for (lz = lzmin; lz <= lzmax; lz++) { lymin = hypre_max((l - lz - num_grids[0] + 1), 0); lymax = hypre_min((l - lz), (num_grids[1] - 1)); for (ly = lymin; ly <= lymax; ly++) { lx = l - lz - ly; hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi); if (!grid_on[fi]) { break; } if (restrict_count[fi] > 1) { hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]); } if (l > jump) { /* pre-relaxation */ hypre_PFMGRelaxSetPreRelax(relax_array[fi]); hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_pre_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); /* compute residual (b - Ax) */ hypre_StructCopy(b_array[fi], r_array[fi]); hypre_StructMatvecCompute(matvec_array[fi], -1.0, A_array[fi], x_array[fi], 1.0, r_array[fi]); } if ((lx+1) < num_grids[0]) { /* restrict to ((lx+1), ly, lz) */ hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restrictx_array[fi], RTx_array[lx], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restrictx_array[fi], RTx_array[lx], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } if ((ly+1) < num_grids[1]) { /* restrict to (lx, (ly+1), lz) */ hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restricty_array[fi], RTy_array[ly], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restricty_array[fi], RTy_array[ly], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } if ((lz+1) < num_grids[2]) { /* restrict to (lx, ly, (lz+1)) */ hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restrictz_array[fi], RTz_array[lz], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restrictz_array[fi], RTz_array[lz], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } #if DEBUG hypre_sprintf(filename, "zoutSMSG_bdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, b_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_xdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_rdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, r_array[fi], 0); #endif } } } /*-------------------------------------------------- * Bottom *--------------------------------------------------*/ fi = num_all_grids - 1; if (restrict_count[fi] > 1) { hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]); } hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); #if DEBUG hypre_sprintf(filename, "zoutSMSG_bbottom.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, b_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_xbottom.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); #endif /*-------------------------------------------------- * Up cycle * Note that r = b = x through the jump region *--------------------------------------------------*/ for (l = (num_levels - 2); l >= 0; l--) { lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0); lzmax = hypre_min((l), (num_grids[2] - 1)); for (lz = lzmax; lz >= lzmin; lz--) { lymin = hypre_max((l - lz - num_grids[0] + 1), 0); lymax = hypre_min((l - lz), (num_grids[1] - 1)); for (ly = lymax; ly >= lymin; ly--) { lx = l - lz - ly; hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi); if (!grid_on[fi]) { break; } if ((l >= 1) && (l <= jump)) { hypre_StructVectorSetConstantValues(x_array[fi], 0.0); } if ((lx+1) < num_grids[0]) { /* interpolate from ((lx+1), ly, lz) */ hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpx_array[fi], Px_array[lx], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visitx_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } if ((ly+1) < num_grids[1]) { /* interpolate from (lx, (ly+1), lz) */ hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpy_array[fi], Py_array[ly], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visity_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } if ((lz+1) < num_grids[2]) { /* interpolate from (lx, ly, (lz+1)) */ hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpz_array[fi], Pz_array[lz], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visitz_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } #if DEBUG hypre_sprintf(filename, "zoutSMSG_xup.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); #endif if (l > jump) { /* post-relaxation */ hypre_PFMGRelaxSetPostRelax(relax_array[fi]); hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_post_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 0); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); } } } } } /* part of convergence check */ if ((tol > 0.0) && (rel_change)) { if (num_levels > 1) { e_dot_e = hypre_StructInnerProd(e_array[0], e_array[0]); x_dot_x = hypre_StructInnerProd(x_array[0], x_array[0]); } else { e_dot_e = 0.0; x_dot_x = 1.0; } } /* fine grid post-relaxation */ hypre_PFMGRelaxSetPostRelax(relax_array[0]); hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[0], 0); hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]); (smsg_data -> num_iterations) = (i + 1); } hypre_EndTiming(smsg_data -> time_index); return ierr; }