int hypre_GenerateLocalPartitioning(HYPRE_BigInt length, int num_procs, int myid, HYPRE_BigInt **part_ptr) { int ierr = 0; HYPRE_BigInt *part; HYPRE_BigInt size; int rest; part = hypre_CTAlloc(HYPRE_BigInt, 2); size = length /(HYPRE_BigInt)num_procs; rest = (int)(length - size*(HYPRE_BigInt)num_procs); /* first row I own */ part[0] = size*(HYPRE_BigInt)myid; part[0] += (HYPRE_BigInt)hypre_min(myid, rest); /* last row I own */ part[1] = size*(HYPRE_BigInt)(myid+1); part[1] += (HYPRE_BigInt)hypre_min(myid+1, rest); part[1] = part[1] - 1; /* add 1 to last row since this is for "starts" vector */ part[1] = part[1] + 1; *part_ptr = part; return ierr; }
/** * @param num_threads number of threads that participate in this merge * @param my_thread_num thread id (zeor-based) among the threads that participate in this merge */ static void hypre_parallel_merge( HYPRE_Int *first1, HYPRE_Int *last1, HYPRE_Int *first2, HYPRE_Int *last2, HYPRE_Int *out, HYPRE_Int num_threads, HYPRE_Int my_thread_num) { HYPRE_Int n1 = last1 - first1; HYPRE_Int n2 = last2 - first2; HYPRE_Int n = n1 + n2; HYPRE_Int n_per_thread = (n + num_threads - 1)/num_threads; HYPRE_Int begin_rank = hypre_min(n_per_thread*my_thread_num, n); HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n); #ifdef DBG_MERGE_SORT assert(std::is_sorted(first1, last1)); assert(std::is_sorted(first2, last2)); #endif HYPRE_Int begin1, begin2, end1, end2; kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank); kth_element(&end1, &end2, first1, first2, n1, n2, end_rank); while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif begin1--; begin2++; } while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2]) { #ifdef DBG_MERGE_SORT printf("%s:%d\n", __FILE__, __LINE__); #endif end1--; end2++; } #ifdef DBG_MERGE_SORT assert(begin1 <= end1); assert(begin2 <= end2); #endif hypre_merge( first1 + begin1, first1 + end1, first2 + begin2, first2 + end2, out + begin1 + begin2); #ifdef DBG_MERGE_SORT assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2)); #endif }
HYPRE_Int hypre_IntersectBoxes( hypre_Box *box1, hypre_Box *box2, hypre_Box *ibox ) { HYPRE_Int d; /* find x, y, and z bounds */ for (d = 0; d < 3; d++) { hypre_BoxIMinD(ibox, d) = hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d)); hypre_BoxIMaxD(ibox, d) = hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d)); } return hypre_error_flag; }
int hypre_IntersectBoxes( hypre_Box *box1, hypre_Box *box2, hypre_Box *ibox ) { int ierr = 0; int d; /* find x, y, and z bounds */ for (d = 0; d < 3; d++) { hypre_BoxIMinD(ibox, d) = hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d)); hypre_BoxIMaxD(ibox, d) = hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d)); } return ierr; }
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out) { if (0 == len) return; #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif #ifdef DBG_MERGE_SORT HYPRE_Int *dbg_buf = new HYPRE_Int[len]; std::copy(in, in + len, dbg_buf); std::sort(dbg_buf, dbg_buf + len); #endif // HYPRE_Int thread_private_len[hypre_NumThreads()]; // HYPRE_Int out_len = 0; #ifdef HYPRE_USING_OPENMP #pragma omp parallel #endif { HYPRE_Int num_threads = hypre_NumActiveThreads(); HYPRE_Int my_thread_num = hypre_GetThreadNum(); // thread-private sort HYPRE_Int i_per_thread = (len + num_threads - 1)/num_threads; HYPRE_Int i_begin = hypre_min(i_per_thread*my_thread_num, len); HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len); hypre_qsort0(in, i_begin, i_end - 1); // merge sorted sequences HYPRE_Int in_group_size; HYPRE_Int *in_buf = in; HYPRE_Int *out_buf = temp; for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2) { #ifdef HYPRE_USING_OPENMP #pragma omp barrier #endif // merge 2 in-groups into 1 out-group HYPRE_Int out_group_size = in_group_size*2; HYPRE_Int group_leader = my_thread_num/out_group_size*out_group_size; // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1); HYPRE_Int id_in_group = my_thread_num%out_group_size; HYPRE_Int num_threads_in_group = hypre_min(group_leader + out_group_size, num_threads) - group_leader; HYPRE_Int in_group1_begin = hypre_min(i_per_thread*group_leader, len); HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread*in_group_size, len); HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread*in_group_size, len); HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread*in_group_size, len); hypre_parallel_merge( in_buf + in_group1_begin, in_buf + in_group1_end, in_buf + in_group2_begin, in_buf + in_group2_end, out_buf + in_group1_begin, num_threads_in_group, id_in_group); HYPRE_Int *temp = in_buf; in_buf = out_buf; out_buf = temp; } *out = in_buf; } /* omp parallel */ #ifdef DBG_MERGE_SORT assert(std::equal(*out, *out + len, dbg_buf)); delete[] dbg_buf; #endif #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif }
HYPRE_ParCSRMatrix GenerateLaplacian9pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, double *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]); }
HYPRE_Int hypre_SparseMSGSolve( void *smsg_vdata, hypre_StructMatrix *A, hypre_StructVector *b, hypre_StructVector *x ) { hypre_SparseMSGData *smsg_data = smsg_vdata; HYPRE_Real tol = (smsg_data -> tol); HYPRE_Int max_iter = (smsg_data -> max_iter); HYPRE_Int rel_change = (smsg_data -> rel_change); HYPRE_Int zero_guess = (smsg_data -> zero_guess); HYPRE_Int jump = (smsg_data -> jump); HYPRE_Int num_pre_relax = (smsg_data -> num_pre_relax); HYPRE_Int num_post_relax = (smsg_data -> num_post_relax); HYPRE_Int num_fine_relax = (smsg_data -> num_fine_relax); HYPRE_Int *num_grids = (smsg_data -> num_grids); HYPRE_Int num_all_grids = (smsg_data -> num_all_grids); HYPRE_Int num_levels = (smsg_data -> num_levels); hypre_StructMatrix **A_array = (smsg_data -> A_array); hypre_StructMatrix **Px_array = (smsg_data -> Px_array); hypre_StructMatrix **Py_array = (smsg_data -> Py_array); hypre_StructMatrix **Pz_array = (smsg_data -> Pz_array); hypre_StructMatrix **RTx_array = (smsg_data -> RTx_array); hypre_StructMatrix **RTy_array = (smsg_data -> RTy_array); hypre_StructMatrix **RTz_array = (smsg_data -> RTz_array); hypre_StructVector **b_array = (smsg_data -> b_array); hypre_StructVector **x_array = (smsg_data -> x_array); hypre_StructVector **t_array = (smsg_data -> t_array); hypre_StructVector **r_array = (smsg_data -> r_array); hypre_StructVector **e_array = (smsg_data -> e_array); hypre_StructVector **visitx_array = (smsg_data -> visitx_array); hypre_StructVector **visity_array = (smsg_data -> visity_array); hypre_StructVector **visitz_array = (smsg_data -> visitz_array); HYPRE_Int *grid_on = (smsg_data -> grid_on); void **relax_array = (smsg_data -> relax_array); void **matvec_array = (smsg_data -> matvec_array); void **restrictx_array = (smsg_data -> restrictx_array); void **restricty_array = (smsg_data -> restricty_array); void **restrictz_array = (smsg_data -> restrictz_array); void **interpx_array = (smsg_data -> interpx_array); void **interpy_array = (smsg_data -> interpy_array); void **interpz_array = (smsg_data -> interpz_array); HYPRE_Int logging = (smsg_data -> logging); HYPRE_Real *norms = (smsg_data -> norms); HYPRE_Real *rel_norms = (smsg_data -> rel_norms); HYPRE_Int *restrict_count; HYPRE_Real b_dot_b, r_dot_r, eps; HYPRE_Real e_dot_e, x_dot_x; HYPRE_Int i, l, lx, ly, lz; HYPRE_Int lymin, lymax, lzmin, lzmax; HYPRE_Int fi, ci; HYPRE_Int ierr = 0; #if DEBUG char filename[255]; #endif /*----------------------------------------------------- * Initialize some things and deal with special cases *-----------------------------------------------------*/ hypre_BeginTiming(smsg_data -> time_index); hypre_StructMatrixDestroy(A_array[0]); hypre_StructVectorDestroy(b_array[0]); hypre_StructVectorDestroy(x_array[0]); A_array[0] = hypre_StructMatrixRef(A); b_array[0] = hypre_StructVectorRef(b); x_array[0] = hypre_StructVectorRef(x); (smsg_data -> num_iterations) = 0; /* if max_iter is zero, return */ if (max_iter == 0) { /* if using a zero initial guess, return zero */ if (zero_guess) { hypre_StructVectorSetConstantValues(x, 0.0); } hypre_EndTiming(smsg_data -> time_index); return ierr; } /* part of convergence check */ if (tol > 0.0) { /* eps = (tol^2) */ b_dot_b = hypre_StructInnerProd(b_array[0], b_array[0]); eps = tol*tol; /* if rhs is zero, return a zero solution */ if (b_dot_b == 0.0) { hypre_StructVectorSetConstantValues(x, 0.0); if (logging > 0) { norms[0] = 0.0; rel_norms[0] = 0.0; } hypre_EndTiming(smsg_data -> time_index); return ierr; } } restrict_count = hypre_TAlloc(HYPRE_Int, num_all_grids); /*----------------------------------------------------- * Do V-cycles: * For each index l, "fine" = l, "coarse" = (l+1) *-----------------------------------------------------*/ for (i = 0; i < max_iter; i++) { /*-------------------------------------------------- * Down cycle: * Note that r = b = x through the jump region *--------------------------------------------------*/ /* fine grid pre-relaxation */ hypre_PFMGRelaxSetPreRelax(relax_array[0]); hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[0], zero_guess); hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]); zero_guess = 0; /* compute fine grid residual (b - Ax) */ hypre_StructCopy(b_array[0], r_array[0]); hypre_StructMatvecCompute(matvec_array[0], -1.0, A_array[0], x_array[0], 1.0, r_array[0]); /* convergence check */ if (tol > 0.0) { r_dot_r = hypre_StructInnerProd(r_array[0], r_array[0]); if (logging > 0) { norms[i] = sqrt(r_dot_r); if (b_dot_b > 0) rel_norms[i] = sqrt(r_dot_r/b_dot_b); else rel_norms[i] = 0.0; } /* RDF */ #if 0 hypre_printf("iter = %d, rel_norm = %e\n", i, rel_norms[i]); #endif /* always do at least 1 V-cycle */ if ((r_dot_r/b_dot_b < eps) && (i > 0)) { if (rel_change) { if ((e_dot_e/x_dot_x) < eps) break; } else { break; } } } if (num_levels > 1) { /* initialize restrict_count */ for (fi = 0; fi < num_all_grids; fi++) { restrict_count[fi] = 0; } for (l = 0; l <= (num_levels - 2); l++) { lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0); lzmax = hypre_min((l), (num_grids[2] - 1)); for (lz = lzmin; lz <= lzmax; lz++) { lymin = hypre_max((l - lz - num_grids[0] + 1), 0); lymax = hypre_min((l - lz), (num_grids[1] - 1)); for (ly = lymin; ly <= lymax; ly++) { lx = l - lz - ly; hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi); if (!grid_on[fi]) { break; } if (restrict_count[fi] > 1) { hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]); } if (l > jump) { /* pre-relaxation */ hypre_PFMGRelaxSetPreRelax(relax_array[fi]); hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_pre_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); /* compute residual (b - Ax) */ hypre_StructCopy(b_array[fi], r_array[fi]); hypre_StructMatvecCompute(matvec_array[fi], -1.0, A_array[fi], x_array[fi], 1.0, r_array[fi]); } if ((lx+1) < num_grids[0]) { /* restrict to ((lx+1), ly, lz) */ hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restrictx_array[fi], RTx_array[lx], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restrictx_array[fi], RTx_array[lx], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } if ((ly+1) < num_grids[1]) { /* restrict to (lx, (ly+1), lz) */ hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restricty_array[fi], RTy_array[ly], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restricty_array[fi], RTy_array[ly], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } if ((lz+1) < num_grids[2]) { /* restrict to (lx, ly, (lz+1)) */ hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci); if (grid_on[ci]) { if (restrict_count[ci]) { hypre_SparseMSGRestrict(restrictz_array[fi], RTz_array[lz], r_array[fi], t_array[ci]); hypre_StructAxpy(1.0, t_array[ci], b_array[ci]); } else { hypre_SparseMSGRestrict(restrictz_array[fi], RTz_array[lz], r_array[fi], b_array[ci]); } restrict_count[ci]++; } } #if DEBUG hypre_sprintf(filename, "zoutSMSG_bdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, b_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_xdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_rdown.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, r_array[fi], 0); #endif } } } /*-------------------------------------------------- * Bottom *--------------------------------------------------*/ fi = num_all_grids - 1; if (restrict_count[fi] > 1) { hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]); } hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); #if DEBUG hypre_sprintf(filename, "zoutSMSG_bbottom.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, b_array[fi], 0); hypre_sprintf(filename, "zoutSMSG_xbottom.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); #endif /*-------------------------------------------------- * Up cycle * Note that r = b = x through the jump region *--------------------------------------------------*/ for (l = (num_levels - 2); l >= 0; l--) { lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0); lzmax = hypre_min((l), (num_grids[2] - 1)); for (lz = lzmax; lz >= lzmin; lz--) { lymin = hypre_max((l - lz - num_grids[0] + 1), 0); lymax = hypre_min((l - lz), (num_grids[1] - 1)); for (ly = lymax; ly >= lymin; ly--) { lx = l - lz - ly; hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi); if (!grid_on[fi]) { break; } if ((l >= 1) && (l <= jump)) { hypre_StructVectorSetConstantValues(x_array[fi], 0.0); } if ((lx+1) < num_grids[0]) { /* interpolate from ((lx+1), ly, lz) */ hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpx_array[fi], Px_array[lx], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visitx_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } if ((ly+1) < num_grids[1]) { /* interpolate from (lx, (ly+1), lz) */ hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpy_array[fi], Py_array[ly], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visity_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } if ((lz+1) < num_grids[2]) { /* interpolate from (lx, ly, (lz+1)) */ hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci); if (grid_on[ci]) { hypre_SparseMSGInterp(interpz_array[fi], Pz_array[lz], x_array[ci], e_array[fi]); hypre_SparseMSGFilter(visitz_array[fi], e_array[fi], lx, ly, lz, jump); hypre_StructAxpy(1.0, e_array[fi], x_array[fi]); } } #if DEBUG hypre_sprintf(filename, "zoutSMSG_xup.%d.%d.%d", lx, ly, lz); hypre_StructVectorPrint(filename, x_array[fi], 0); #endif if (l > jump) { /* post-relaxation */ hypre_PFMGRelaxSetPostRelax(relax_array[fi]); hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_post_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 0); hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi], x_array[fi]); } } } } } /* part of convergence check */ if ((tol > 0.0) && (rel_change)) { if (num_levels > 1) { e_dot_e = hypre_StructInnerProd(e_array[0], e_array[0]); x_dot_x = hypre_StructInnerProd(x_array[0], x_array[0]); } else { e_dot_e = 0.0; x_dot_x = 1.0; } } /* fine grid post-relaxation */ hypre_PFMGRelaxSetPostRelax(relax_array[0]); hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax); hypre_PFMGRelaxSetZeroGuess(relax_array[0], 0); hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]); (smsg_data -> num_iterations) = (i + 1); } hypre_EndTiming(smsg_data -> time_index); return ierr; }
hypre_ParCSRMatrix * hypre_ParMatMinus_F( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) /* hypre_ParMatMinus_F subtracts selected rows of its second argument from selected rows of its first argument. The marker array determines which rows are affected - those for which CF_marker<0. The result is returned as a new matrix. */ { /* If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want new Pik = Pik - Cik, for Fine i only, all k. This computation is purely local. */ /* This is _not_ a general-purpose matrix subtraction function. This is written for an interpolation problem where it is known that C(i,k) exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements). */ hypre_ParCSRMatrix *Pnew; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P ); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); HYPRE_Int *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C ); HYPRE_Int *Pnew_diag_i; HYPRE_Int *Pnew_diag_j; double *Pnew_diag_data; HYPRE_Int *Pnew_offd_i; HYPRE_Int *Pnew_offd_j; double *Pnew_offd_data; HYPRE_Int *Pnew_j2m; HYPRE_Int *Pnew_col_map_offd; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); /* HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */ HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int num_cols_offd_Pnew, num_rows_offd_Pnew; HYPRE_Int i1, jmin, jmax, jrange, jrangem1; HYPRE_Int j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg; double dc, dp; /* Pnew = hypre_ParCSRMatrixCompleteClone( C );*/ Pnew = hypre_ParCSRMatrixUnion( C, P ); ; hypre_ParCSRMatrixZero_F( Pnew, CF_marker ); /* fine rows of Pnew set to 0 */ hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */ /* ...Zero_F may not be needed depending on how Pnew is made */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag); Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag); Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd); Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd); Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag); Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd); Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew ); num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd); num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd); /* Find the j-ranges, needed to allocate a "reverse lookup" array. */ /* This is the max j - min j over P and Pnew (which here is a copy of C). Only the diag block is considered. */ /* For scalability reasons (jrange can get big) this won't work for the offd block. Also, indexing is more complicated in the offd block (c.f. col_map_offd). It's not clear, though whether the "quadratic" algorithm I'm using for the offd block is really any slower than the more complicated "linear" algorithm here. */ jrange = 0; jrangem1=-1; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* only Fine rows matter */ { jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ]; jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); /* If columns (of a given row) were in increasing order, the above would be sufficient. If not, the following would be necessary (and sufficient) */ jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1] ]; for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); } } /*----------------------------------------------------------------------- * Loop over Pnew_diag rows. Construct a temporary reverse array: * If j is a column number, Pnew_j2m[j] is the array index for j, i.e. * Pnew_diag_j[ Pnew_j2m[j] ] = j *-----------------------------------------------------------------------*/ Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange ); for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* Fine data only */ { /* just needed for an assertion below... */ for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1; jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; /* If columns (of a given row) were in increasing order, the above line would be sufficient. If not, the following loop would have to be added (or store the jmin computed above )*/ for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; hypre_assert( j-jmin>=0 ); hypre_assert( j-jmin<jrange ); Pnew_j2m[ j-jmin ] = m; } /*----------------------------------------------------------------------- * Loop over C_diag data for the current row. * Subtract each C data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc ) { jc = C_diag_j[mc]; dc = C_diag_data[mc]; m = Pnew_j2m[jc-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] -= dc; } /*----------------------------------------------------------------------- * Loop over P_diag data for the current row. * Add each P data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp ) { jp = P_diag_j[mp]; dp = P_diag_data[mp]; m = Pnew_j2m[jp-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] += dp; } } } /*----------------------------------------------------------------------- * Repeat for the offd block. *-----------------------------------------------------------------------*/ for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 ) /* Fine data only */ { if ( num_cols_offd_Pnew ) { /* This is a simple quadratic algorithm. If necessary I may try to implement the ideas used on the diag block later. */ for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m ) { j = Pnew_offd_j[m]; jg = Pnew_col_map_offd[j]; Pnew_offd_data[m] = 0; if ( num_cols_offd_C ) for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc ) { jC = C_offd_j[mc]; jCg = C_col_map_offd[jC]; if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc]; } if ( num_cols_offd_P ) for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp ) { jP = P_offd_j[mp]; jPg = P_col_map_offd[jP]; if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp]; } } } } } hypre_TFree(Pnew_j2m); return Pnew; }
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P, HYPRE_Real eps, HYPRE_Real dlt, HYPRE_Int * CF_marker ) /* Truncate the interpolation matrix P, but only in rows for which the marker is <0. Truncation means that an element P(i,j) is set to 0 if P(i,j)>0 and P(i,j)<eps*max( P(i,j) ) or if P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) ) or if P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) ) or if P(i,j)<0 and P(i,j)>eps*min( P(i,j) ) ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, ) The min and max are only computed locally, as I'm guessing that there isn't usually much to be gained (in the way of improved performance) by getting them perfectly right. */ /* The function hypre_BoomerAMGInterpTruncation in par_interp.c is very similar. It looks at fabs(value) rather than separately dealing with value<0 and value>0 as recommended by Klaus Stuben, thus as this function does. In this function, only "marked" rows are affected. Lastly, in hypre_BoomerAMGInterpTruncation, if any element gets discarded, it reallocates arrays to the new size. */ { hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_P_diag_i; HYPRE_Int *new_P_offd_i; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd); #if 0 MPI_Comm comm = hypre_ParCSRMatrixComm( P ); HYPRE_Real vmax1, vmin1; #endif HYPRE_Real vmax = 0.0; HYPRE_Real vmin = 0.0; HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin; HYPRE_Int i1, m, m1d, m1o; /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */ for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } } #if 0 /* This can make max,min global so results don't depend on no. processors We don't want this except for testing, or maybe this could be put someplace better. I don't like adding communication here, for a minor reason. */ vmax1 = vmax; vmin1 = vmin; hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm ); hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm ); #endif if ( vmax <= 0.0 ) vmax = 1.0; /* make sure no v is v>vmax if no v is v>0 */ if ( vmin >= 0.0 ) vmin = -1.0; /* make sure no v is v<vmin if no v is v<0 */ wmax = - dlt * vmin; wmin = - dlt * vmax; vmax *= eps; vmin *= eps; /* Repack the i,j,and data arrays so as to discard the small elements of P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 ); new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 ); m1d = P_diag_i[0]; m1o = P_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { old_sum = 0; new_sum = 0; for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_diag_j[m1d] = P_diag_j[m]; P_diag_data[m1d] = P_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_offd_j[m1o] = P_offd_j[m]; P_offd_data[m1o] = P_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_P_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m ) P_diag_data[m] *= scale; if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */ for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m ) P_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_P; i1++ ) { P_diag_i[i1] = new_P_diag_i[i1]; if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1]; } hypre_TFree( new_P_diag_i ); if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i ); hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd; hypre_ParCSRMatrixSetDNumNonzeros( P ); hypre_ParCSRMatrixSetNumNonzeros( P ); }
HYPRE_ParCSRMatrix GenerateDifConv( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, double *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_num_rows = nx_local*ny_local*nz_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; o_cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iz > nz_part[r]) diag_i[cnt]++; else { if (iz) { offd_i[o_cnt]++; } } if (iy > ny_part[q]) diag_i[cnt]++; else { if (iy) { offd_i[o_cnt]++; } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) diag_i[cnt]++; else { if (iy+1 < ny) { offd_i[o_cnt]++; } } if (iz+1 < nz_part[r+1]) diag_i[cnt]++; else { if (iz+1 < nz) { offd_i[o_cnt]++; } } cnt++; o_cnt++; } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]); }
HYPRE_Int main (HYPRE_Int argc, char *argv[]) { HYPRE_Int i; HYPRE_Int myid, num_procs; HYPRE_Int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; HYPRE_Int solver_id; HYPRE_Int print_solution; double h, h2; #ifdef HYPRE_FORTRAN hypre_F90_Obj A; hypre_F90_Obj parcsr_A; hypre_F90_Obj b; hypre_F90_Obj par_b; hypre_F90_Obj x; hypre_F90_Obj par_x; hypre_F90_Obj solver, precond; hypre_F90_Obj long_temp_COMM; HYPRE_Int temp_COMM; HYPRE_Int precond_id; HYPRE_Int one = 1; HYPRE_Int two = 2; HYPRE_Int three = 3; HYPRE_Int six = 6; HYPRE_Int twenty = 20; HYPRE_Int thousand = 1000; HYPRE_Int hypre_type = HYPRE_PARCSR; double oo1 = 1.e-3; double tol = 1.e-7; #else HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; #endif /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; /* Parse command line */ { HYPRE_Int arg_index = 0; HYPRE_Int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -n <n> : problem size in each direction (default: 33)\n"); hypre_printf(" -solver <ID> : solver ID\n"); hypre_printf(" 0 - AMG (default) \n"); hypre_printf(" 1 - AMG-PCG\n"); hypre_printf(" 8 - ParaSails-PCG\n"); hypre_printf(" 50 - PCG\n"); hypre_printf(" -print_solution : print the solution vector\n"); hypre_printf("\n"); } if (print_usage) { hypre_MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ #ifdef HYPRE_FORTRAN long_temp_COMM = (hypre_F90_Obj) hypre_MPI_COMM_WORLD; temp_COMM = (HYPRE_Int) hypre_MPI_COMM_WORLD; HYPRE_IJMatrixCreate(&long_temp_COMM, &ilower, &iupper, &ilower, &iupper, &A); #else HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); #endif /* Choose a parallel csr format storage (see the User's Manual) */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetObjectType(&A, &hypre_type); #else HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); #endif /* Initialize before setting coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixInitialize(&A); #else HYPRE_IJMatrixInitialize(A); #endif /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetValues(&A, &one, &nnz, &i, &cols[0], &values[0]); #else HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); #endif } } /* Assemble after setting the coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixAssemble(&A); #else HYPRE_IJMatrixAssemble(A); #endif /* Get the parcsr matrix object to use */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixGetObject(&A, &parcsr_A); HYPRE_IJMatrixGetObject(&A, &parcsr_A); #else HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); #endif /* Create the rhs and solution */ #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &b); HYPRE_IJVectorSetObjectType(&b, &hypre_type); HYPRE_IJVectorInitialize(&b); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &x); HYPRE_IJVectorSetObjectType(&x, &hypre_type); HYPRE_IJVectorInitialize(&x); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); #endif /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } #ifdef HYPRE_FORTRAN HYPRE_IJVectorSetValues(&b, &local_size, &rows[0], &rhs_values[0]); HYPRE_IJVectorSetValues(&x, &local_size, &rows[0], &x_values[0]); #else HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); #endif free(x_values); free(rhs_values); free(rows); } #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&b); HYPRE_IJVectorGetObject(&b, &par_b); #else HYPRE_IJVectorAssemble(b); HYPRE_IJVectorGetObject(b, (void **) &par_b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&x); HYPRE_IJVectorGetObject(&x, &par_x); #else HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); #endif /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&solver); #else HYPRE_BoomerAMGCreate(&solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetPrintLevel(&solver, &three); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(&solver, &six); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(&solver, &three); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(&solver, &one); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(&solver, &twenty); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(&solver, &tol); /* conv. tolerance */ #else HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_BoomerAMGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGGetNumIterations(&solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGDestroy(&solver); #else HYPRE_BoomerAMGDestroy(solver); #endif } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* prints out the iteration info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); #else HYPRE_ParCSRPCGDestroy(solver); #endif } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the AMG preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(&precond, &one); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(&precond, &six); HYPRE_BoomerAMGSetRelaxType(&precond, &three); HYPRE_BoomerAMGSetNumSweeps(&precond, &one); HYPRE_BoomerAMGSetTol(&precond, &oo1); #else HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 3); HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 1e-3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 2; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_BoomerAMGDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); #endif } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; HYPRE_Int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; HYPRE_Int sai_sym = 1; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the ParaSails preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsCreate(&temp_COMM, &precond); #else HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &precond); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsSetParams(&precond, &sai_threshold, &sai_max_levels); HYPRE_ParaSailsSetFilter(&precond, &sai_filter); HYPRE_ParaSailsSetSym(&precond, &sai_sym); HYPRE_ParaSailsSetLogging(&precond, &three); #else HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 4; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destory solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_ParaSailsDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); #endif } else { if (myid ==0) hypre_printf("Invalid solver id specified.\n"); } /* Print the solution */ #ifdef HYPRE_FORTRAN if (print_solution) HYPRE_IJVectorPrint(&x, "ij.out.x"); #else if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); #endif /* Clean up */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixDestroy(&A); HYPRE_IJVectorDestroy(&b); HYPRE_IJVectorDestroy(&x); #else HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); #endif /* Finalize MPI*/ hypre_MPI_Finalize(); return(0); }
int hypre_StructGridAssembleWithAP( hypre_StructGrid *grid ) { int ierr = 0; int tmp_i; int size, global_num_boxes, num_local_boxes; int i, j, d, k, index; int num_procs, myid; int *sendbuf8, *recvbuf8, *sendbuf2, *recvbuf2; int min_box_size, max_box_size; int global_min_box_size, global_max_box_size; int *ids; int max_regions, max_refinements, ologp; double gamma; hypre_Index min_index, max_index; int prune; hypre_Box *box; MPI_Comm comm = hypre_StructGridComm(grid); hypre_Box *bounding_box = hypre_StructGridBoundingBox(grid); hypre_BoxArray *local_boxes = hypre_StructGridBoxes(grid); int dim = hypre_StructGridDim(grid); hypre_BoxNeighbors *neighbors = hypre_StructGridNeighbors(grid); int max_distance = hypre_StructGridMaxDistance(grid); hypre_IndexRef periodic = hypre_StructGridPeriodic(grid); int *local_boxnums; double dbl_global_size, tmp_dbl; hypre_BoxArray *my_partition; int *part_ids, *part_boxnums; int *proc_array, proc_count, proc_alloc, count; int *tmp_proc_ids = NULL; int max_response_size; int *ap_proc_ids, *send_buf, *send_buf_starts; int *response_buf, *response_buf_starts; hypre_BoxArray *neighbor_boxes, *n_boxes_copy; int *neighbor_proc_ids, *neighbor_boxnums; int *order_index, *delete_array; int tmp_id, start, first_local; int grow, grow_array[6]; hypre_Box *grow_box; int *numghost; int ghostsize; hypre_Box *ghostbox; hypre_StructAssumedPart *assumed_part; hypre_DataExchangeResponse response_obj; int px = hypre_IndexX(periodic); int py = hypre_IndexY(periodic); int pz = hypre_IndexZ(periodic); int i_periodic = px ? 1 : 0; int j_periodic = py ? 1 : 0; int k_periodic = pz ? 1 : 0; int num_periods, multiple_ap, p; hypre_Box *result_box, *period_box; hypre_Index *pshifts; hypre_IndexRef pshift; #if NEIGH_PRINT double start_time, end_time; #endif /*--------------------------------------------- Step 1: Initializations -----------------------------------------------*/ prune = 1; /* default is to prune */ num_local_boxes = hypre_BoxArraySize(local_boxes); num_periods = (1+2*i_periodic) * (1+2*j_periodic) * (1+2*k_periodic); MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm, &myid); /*--------------------------------------------- Step 2: Determine the global size, total number of boxes, and global bounding box. Also get the min and max box sizes since it is convenient to do so. -----------------------------------------------*/ if (neighbors == NULL) { /*these may not be needed - check later */ ids = hypre_TAlloc(int, num_local_boxes); /* for the vol and number of boxes */ sendbuf2 = hypre_CTAlloc(int, 2); recvbuf2 = hypre_CTAlloc(int, 2); size = 0; bounding_box = hypre_BoxCreate(); grow_box = hypre_BoxCreate(); if (num_local_boxes) { min_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0)); max_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0)); /* initialize min and max */ for (d=0; d<3; d++) { hypre_IndexD(min_index, d) = pow(2,30); hypre_IndexD(max_index, d) = -pow(2,30); } hypre_ForBoxI(i, local_boxes) { box = hypre_BoxArrayBox(local_boxes, i); /* get global size and number of boxes */ tmp_i = hypre_BoxVolume(box); size += tmp_i; min_box_size = hypre_min(min_box_size, tmp_i); max_box_size = hypre_max(max_box_size, tmp_i); /* set id */ ids[i] = i; /* 1/3/05 we need this for the case of holes in the domain. (I had commented it out on 12/04 - as I thought this was not necessary. */ /* zero volume boxes - still look at for getting the bounding box */ if (hypre_BoxVolume(box) == 0) /* zero volume boxes - still count */ { hypre_CopyBox(box, grow_box); for (d = 0; d < 3; d++) { if(!hypre_BoxSizeD(box, d)) { grow = (hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(box, d) + 1)/2; grow_array[2*d] = grow; grow_array[2*d+1] = grow; } else { grow_array[2*d] = 0; grow_array[2*d+1] = 0; } } /* expand the box */ hypre_BoxExpand(grow_box, grow_array); box = grow_box; /*pointer copy*/ } /*now we have a vol > 0 box */ for (d = 0; d < dim; d++) /* for each dimension */ { hypre_IndexD(min_index, d) = hypre_min( hypre_IndexD(min_index, d), hypre_BoxIMinD(box, d)); hypre_IndexD(max_index, d) = hypre_max( hypre_IndexD(max_index, d), hypre_BoxIMaxD(box, d)); } }/*end for each box loop */ /* bounding box extents */ hypre_BoxSetExtents(bounding_box, min_index, max_index); }
HYPRE_ParCSRMatrix GenerateSysLaplacianVCoef( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index, row, col; HYPRE_Int index, diag_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int local_grid_size; HYPRE_Int first_j, j_ind; HYPRE_Int num_coeffs, num_offd_coeffs; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; HYPRE_Real val; /* for indexing in values */ HYPRE_Int sz = num_fun*num_fun; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_grid_size = nx_local*ny_local*nz_local; local_num_rows = num_fun*local_grid_size; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; num_cols_offd *= num_fun; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[cnt] = offd_i[cnt-1]; diag_i[cnt] += num_fun; if (iz > nz_part[r]) diag_i[cnt] += num_fun; else { if (iz) { offd_i[cnt] += num_fun; } } if (iy > ny_part[q]) diag_i[cnt] += num_fun; else { if (iy) { offd_i[cnt] += num_fun; } } if (ix > nx_part[p]) diag_i[cnt] += num_fun; else { if (ix) { offd_i[cnt] += num_fun; } } if (ix+1 < nx_part[p+1]) diag_i[cnt] += num_fun; else { if (ix+1 < nx) { offd_i[cnt] += num_fun; } } if (iy+1 < ny_part[q+1]) diag_i[cnt] += num_fun; else { if (iy+1 < ny) { offd_i[cnt] += num_fun; } } if (iz+1 < nz_part[r+1]) diag_i[cnt] += num_fun; else { if (iz+1 < nz) { offd_i[cnt] += num_fun; } } num_coeffs = diag_i[cnt]-diag_i[cnt-1]; num_offd_coeffs = offd_i[cnt]-offd_i[cnt-1]; cnt++; for (i=1; i < num_fun; i++) { diag_i[cnt] = diag_i[cnt-1]+num_coeffs; offd_i[cnt] = offd_i[cnt-1]+num_offd_coeffs; cnt++; } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt = diag_i[row_index];; o_cnt = offd_i[row_index];; num_coeffs = diag_i[row_index+1]-diag_i[row_index]; num_offd_coeffs = offd_i[row_index+1]-offd_i[row_index]; first_j = row_index; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[0*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; if (iz > nz_part[r]) { first_j = row_index-nx_local*ny_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iz) { first_j = num_fun*hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iy > ny_part[q]) { first_j = row_index-nx_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iy) { first_j = num_fun*hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (ix > nx_part[p]) { first_j = row_index-num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (ix) { first_j = num_fun*hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (ix+1 < nx_part[p+1]) { first_j = row_index+num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (ix+1 < nx) { first_j = num_fun*hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iy+1 < ny_part[q+1]) { first_j = row_index+nx_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iy+1 < ny) { first_j = num_fun*hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iz+1 < nz_part[r+1]) { first_j = row_index+nx_local*ny_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iz+1 < nz) { first_j = num_fun*hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } row_index += num_fun; } } } if (num_procs > 1) { cnt = 0; for (i=0; i < local_num_rows; i+=num_fun) { for (j=offd_i[i]; j < offd_i[i+1]; j++) { col_map_offd[cnt++] = offd_j[j]; } } hypre_qsort0(col_map_offd, 0, num_cols_offd-1); for (i=0; i < num_fun*num_cols_offd; i++) for (j=hypre_min(0,abs(i-num_fun)); j < num_cols_offd; j++) if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } for (i=0; i < num_procs+1; i++) global_part[i] *= num_fun; for (j=1; j< num_fun; j++) { for (i=0; i<local_grid_size; i++) { row = i*num_fun+j; diag_index = diag_i[row]; index = diag_index+j; val = diag_data[diag_index]; col = diag_j[diag_index]; diag_data[diag_index] = diag_data[index]; diag_j[diag_index] = diag_j[index]; diag_data[index] = val; diag_j[index] = col; } } #ifdef HYPRE_NO_GLOBAL_PARTITION /* ideally we would use less storage earlier in this function, but this is fine for testing */ { HYPRE_Int tmp1, tmp2; tmp1 = global_part[my_id]; tmp2 = global_part[my_id + 1]; hypre_TFree(global_part); global_part = hypre_CTAlloc(HYPRE_Int, 2); global_part[0] = tmp1; global_part[1] = tmp2; } #endif A = hypre_ParCSRMatrixCreate(comm, num_fun*grid_size, num_fun*grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); return (HYPRE_ParCSRMatrix) A; }
HYPRE_ParCSRMatrix GenerateLaplacian( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Real *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j = NULL; HYPRE_Real *offd_data = NULL; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_num_rows = nx_local*ny_local*nz_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; o_cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iz > nz_part[r]) diag_i[cnt]++; else { if (iz) { offd_i[o_cnt]++; } } if (iy > ny_part[q]) diag_i[cnt]++; else { if (iy) { offd_i[o_cnt]++; } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) diag_i[cnt]++; else { if (iy+1 < ny) { offd_i[o_cnt]++; } } if (iz+1 < nz_part[r+1]) diag_i[cnt]++; else { if (iz+1 < nz) { offd_i[o_cnt]++; } } cnt++; o_cnt++; } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iz > nz_part[r]) { diag_j[cnt] = row_index-nx_local*ny_local; diag_data[cnt++] = value[3]; } else { if (iz) { offd_j[o_cnt] = hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[3]; } } if (iy > ny_part[q]) { diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { offd_j[o_cnt] = hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[2]; } } if (iz+1 < nz_part[r+1]) { diag_j[cnt] = row_index+nx_local*ny_local; diag_data[cnt++] = value[3]; } else { if (iz+1 < nz) { offd_j[o_cnt] = hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[3]; } } row_index++; } } } if (num_procs > 1) { for (i=0; i < num_cols_offd; i++) col_map_offd[i] = offd_j[i]; hypre_qsort0(col_map_offd, 0, num_cols_offd-1); for (i=0; i < num_cols_offd; i++) for (j=0; j < num_cols_offd; j++) if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } #ifdef HYPRE_NO_GLOBAL_PARTITION /* ideally we would use less storage earlier in this function, but this is fine for testing */ { HYPRE_Int tmp1, tmp2; tmp1 = global_part[my_id]; tmp2 = global_part[my_id + 1]; hypre_TFree(global_part); global_part = hypre_CTAlloc(HYPRE_Int, 2); global_part[0] = tmp1; global_part[1] = tmp2; } #endif A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); return (HYPRE_ParCSRMatrix) A; }
/** * Partition the input so that * a1[0:*out1) and a2[0:*out2) contain the smallest k elements */ static void kth_element( HYPRE_Int *out1, HYPRE_Int *out2, HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k) { // either of the inputs is empty if (n1 == 0) { *out1 = 0; *out2 = k; return; } if (n2 == 0) { *out1 = k; *out2 = 0; return; } if (k >= n1 + n2) { *out1 = n1; *out2 = n2; return; } // one is greater than the other if (k < n1 && a1[k] <= a2[0]) { *out1 = k; *out2 = 0; return; } if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1]) { *out1 = n1; *out2 = k - n1; return; } if (k < n2 && a2[k] <= a1[0]) { *out1 = 0; *out2 = k; return; } if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1]) { *out1 = k - n2; *out2 = n2; return; } // now k > 0 // faster to do binary search on the shorter sequence if (n1 > n2) { SWAP(HYPRE_Int, n1, n2); SWAP(HYPRE_Int *, a1, a2); SWAP(HYPRE_Int *, out1, out2); } if (k < (n1 + n2)/2) { kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k); } else { // when k is big, faster to find (n1 + n2 - k)th biggest element HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0); HYPRE_Int new_k = k - offset1 - offset2; HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1); HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1); kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k); *out1 += offset1; *out2 += offset2; } #ifdef DBG_MERGE_SORT assert(*out1 + *out2 == k); #endif }
HYPRE_Int hypre_PFMGSetup( void *pfmg_vdata, hypre_StructMatrix *A, hypre_StructVector *b, hypre_StructVector *x ) { hypre_PFMGData *pfmg_data = pfmg_vdata; MPI_Comm comm = (pfmg_data -> comm); HYPRE_Int relax_type = (pfmg_data -> relax_type); HYPRE_Int usr_jacobi_weight= (pfmg_data -> usr_jacobi_weight); double jacobi_weight = (pfmg_data -> jacobi_weight); HYPRE_Int skip_relax = (pfmg_data -> skip_relax); double *dxyz = (pfmg_data -> dxyz); HYPRE_Int rap_type; HYPRE_Int max_iter; HYPRE_Int max_levels; HYPRE_Int num_levels; hypre_Index cindex; hypre_Index findex; hypre_Index stride; hypre_Index coarsen; HYPRE_Int *cdir_l; HYPRE_Int *active_l; hypre_StructGrid **grid_l; hypre_StructGrid **P_grid_l; double *data; HYPRE_Int data_size = 0; double *relax_weights; double *mean, *deviation; double alpha, beta; hypre_StructMatrix **A_l; hypre_StructMatrix **P_l; hypre_StructMatrix **RT_l; hypre_StructVector **b_l; hypre_StructVector **x_l; /* temp vectors */ hypre_StructVector **tx_l; hypre_StructVector **r_l; hypre_StructVector **e_l; void **relax_data_l; void **matvec_data_l; void **restrict_data_l; void **interp_data_l; hypre_StructGrid *grid; HYPRE_Int dim; hypre_Box *cbox; double min_dxyz; HYPRE_Int cdir, periodic, cmaxsize; HYPRE_Int d, l; HYPRE_Int dxyz_flag; HYPRE_Int b_num_ghost[] = {0, 0, 0, 0, 0, 0}; HYPRE_Int x_num_ghost[] = {1, 1, 1, 1, 1, 1}; HYPRE_Int ierr = 0; #if DEBUG char filename[255]; #endif /*----------------------------------------------------- * Set up coarse grids *-----------------------------------------------------*/ grid = hypre_StructMatrixGrid(A); dim = hypre_StructGridDim(grid); /* Compute a new max_levels value based on the grid */ cbox = hypre_BoxDuplicate(hypre_StructGridBoundingBox(grid)); max_levels = hypre_Log2(hypre_BoxSizeD(cbox, 0)) + 2 + hypre_Log2(hypre_BoxSizeD(cbox, 1)) + 2 + hypre_Log2(hypre_BoxSizeD(cbox, 2)) + 2; if ((pfmg_data -> max_levels) > 0) { max_levels = hypre_min(max_levels, (pfmg_data -> max_levels)); } (pfmg_data -> max_levels) = max_levels; /* compute dxyz */ if ((dxyz[0] == 0) || (dxyz[1] == 0) || (dxyz[2] == 0)) { mean = hypre_CTAlloc(double, 3); deviation = hypre_CTAlloc(double, 3); hypre_PFMGComputeDxyz(A, dxyz, mean, deviation); dxyz_flag= 0; for (d = 0; d < dim; d++) { deviation[d] -= mean[d]*mean[d]; /* square of coeff. of variation */ if (deviation[d]/(mean[d]*mean[d]) > .1) { dxyz_flag= 1; break; } } hypre_TFree(mean); hypre_TFree(deviation); }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }
int main (int argc, char *argv[]) { HYPRE_Int i; int myid, num_procs; int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; int solver_id; int print_solution, print_system; double h, h2; HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; /* Initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; print_system = 0; /* Parse command line */ { int arg_index = 0; int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-print_system") == 0 ) { arg_index++; print_system = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { printf("\n"); printf("Usage: %s [<options>]\n", argv[0]); printf("\n"); printf(" -n <n> : problem size in each direction (default: 33)\n"); printf(" -solver <ID> : solver ID\n"); printf(" 0 - AMG (default) \n"); printf(" 1 - AMG-PCG\n"); printf(" 8 - ParaSails-PCG\n"); printf(" 50 - PCG\n"); printf(" 61 - AMG-FlexGMRES\n"); printf(" -print_solution : print the solution vector\n"); printf(" -print_system : print the matrix and rhs\n"); printf("\n"); } if (print_usage) { MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); /* Choose a parallel csr format storage (see the User's Manual) */ HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); /* Initialize before setting coefficients */ HYPRE_IJMatrixInitialize(A); /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); } } /* Assemble after setting the coefficients */ HYPRE_IJMatrixAssemble(A); /* Note: for the testing of small problems, one may wish to read in a matrix in IJ format (for the format, see the output files from the -print_system option). In this case, one would use the following routine: HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &A ); <filename> = IJ.A.out to read in what has been printed out by -print_system (processor numbers are omitted). A call to HYPRE_IJMatrixRead is an *alternative* to the following sequence of HYPRE_IJMatrix calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ /* Get the parcsr matrix object to use */ HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); /* Create the rhs and solution */ HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); free(x_values); free(rhs_values); free(rows); } HYPRE_IJVectorAssemble(b); /* As with the matrix, for testing purposes, one may wish to read in a rhs: HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &b ); as an alternative to the following sequence of HYPRE_IJVectors calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ HYPRE_IJVectorGetObject(b, (void **) &par_b); HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); /* Print out the system - files names will be IJ.out.A.XXXXX and IJ.out.b.XXXXX, where XXXXX = processor id */ if (print_system) { HYPRE_IJMatrixPrint(A, "IJ.out.A"); HYPRE_IJVectorPrint(b, "IJ.out.b"); } /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_BoomerAMGCreate(&solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ /* Now setup and solve! */ HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_BoomerAMGDestroy(solver); } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_ParCSRPCGDestroy(solver); } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; int sai_sym = 1; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the ParaSails preconditioner and specify any parameters */ HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); } /* Flexible GMRES with AMG Preconditioner */ else if (solver_id == 61) { HYPRE_Int num_iterations; double final_res_norm; int restart = 30; int modify = 1; /* Create solver */ HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_FlexGMRESSetKDim(solver, restart); HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */ HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */ HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the FlexGMRES preconditioner */ HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); if (modify) /* this is an optional call - if you don't call it, hypre_FlexGMRESModifyPCDefault is used - which does nothing. Otherwise, you can define your own, similar to the one used here */ HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample); /* Now setup and solve! */ HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations); HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRFlexGMRESDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } else { if (myid ==0) printf("Invalid solver id specified.\n"); } /* Print the solution */ if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); /* Clean up */ HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); /* Finalize MPI*/ MPI_Finalize(); return(0); }
HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, double alpha, double eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; double *value; double ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(double,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]); }
HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, HYPRE_Real alpha, HYPRE_Real eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Real *value; HYPRE_Real ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(HYPRE_Real,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { diag_j[cnt] = row_index-nx_local-1 ; diag_data[cnt++] = value[3]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { if (ix > nx_part[p]) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; if (ix < nx_part[p+1]-1) { diag_j[cnt] = row_index+nx_local+1 ; diag_data[cnt++] = value[3]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; if (ix < nx_part[p+1]-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix < nx-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } row_index++; } } if (num_procs > 1) { work = hypre_CTAlloc(HYPRE_Int,o_cnt); for (i=0; i < o_cnt; i++) work[i] = offd_j[i]; qsort0(work, 0, o_cnt-1); col_map_offd[0] = work[0]; cnt = 0; for (i=0; i < o_cnt; i++) { if (work[i] > col_map_offd[cnt]) { cnt++; col_map_offd[cnt] = work[i]; } } for (i=0; i < o_cnt; i++) { for (j=0; j < num_cols_offd; j++) { if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } } hypre_TFree(work); } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(value); return (HYPRE_ParCSRMatrix) A; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
HYPRE_Int hypre_AMGSetupStats( void *amg_vdata ) { hypre_AMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_CSRMatrix **A_array; hypre_CSRMatrix **P_array; HYPRE_Int num_levels; HYPRE_Int num_nonzeros; /* HYPRE_Int amg_ioutdat; char *log_file_name; */ /* Local variables */ HYPRE_Int *A_i; double *A_data; HYPRE_Int *P_i; double *P_data; HYPRE_Int level; HYPRE_Int i,j; HYPRE_Int fine_size; HYPRE_Int coarse_size; HYPRE_Int entries; HYPRE_Int total_entries; HYPRE_Int min_entries; HYPRE_Int max_entries; double avg_entries; double rowsum; double min_rowsum; double max_rowsum; double sparse; double min_weight; double max_weight; double op_complxty=0; double grid_complxty=0; double num_nz0; double num_var0; A_array = hypre_AMGDataAArray(amg_data); P_array = hypre_AMGDataPArray(amg_data); num_levels = hypre_AMGDataNumLevels(amg_data); /* amg_ioutdat = hypre_AMGDataIOutDat(amg_data); log_file_name = hypre_AMGDataLogFileName(amg_data); */ hypre_printf("\n AMG SETUP PARAMETERS:\n\n"); hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data)); hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data)); hypre_printf(" Num levels = %d\n\n",num_levels); hypre_printf( "\nOperator Matrix Information:\n\n"); hypre_printf(" nonzero entries p"); hypre_printf("er row row sums\n"); hypre_printf("lev rows entries sparse min max "); hypre_printf("avg min max\n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]); num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]); for (level = 0; level < num_levels; level++) { A_i = hypre_CSRMatrixI(A_array[level]); A_data = hypre_CSRMatrixData(A_array[level]); fine_size = hypre_CSRMatrixNumRows(A_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]); sparse = num_nonzeros /((double) fine_size * (double) fine_size); op_complxty += ((double)num_nonzeros/num_nz0); grid_complxty += ((double)fine_size/num_var0); min_entries = A_i[1]-A_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; for (j = A_i[0]; j < A_i[1]; j++) min_rowsum += A_data[j]; max_rowsum = min_rowsum; for (j = 0; j < fine_size; j++) { entries = A_i[j+1] - A_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = A_i[j]; i < A_i[j+1]; i++) rowsum += A_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } avg_entries = ((double) total_entries) / ((double) fine_size); hypre_printf( "%2d %5d %7d %0.3f %3d %3d", level, fine_size, num_nonzeros, sparse, min_entries, max_entries); hypre_printf(" %4.1f %10.3e %10.3e\n", avg_entries, min_rowsum, max_rowsum); } hypre_printf( "\n\nInterpolation Matrix Information:\n\n"); hypre_printf(" entries/row min max"); hypre_printf(" row sums\n"); hypre_printf("lev rows cols min max "); hypre_printf(" weight weight min max \n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { P_i = hypre_CSRMatrixI(P_array[level]); P_data = hypre_CSRMatrixData(P_array[level]); fine_size = hypre_CSRMatrixNumRows(P_array[level]); coarse_size = hypre_CSRMatrixNumCols(P_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]); min_entries = P_i[1]-P_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; min_weight = P_data[0]; max_weight = 0.0; for (j = P_i[0]; j < P_i[1]; j++) min_rowsum += P_data[j]; max_rowsum = min_rowsum; for (j = 0; j < num_nonzeros; j++) { if (P_data[j] != 1.0) { min_weight = hypre_min(min_weight,P_data[j]); max_weight = hypre_max(max_weight,P_data[j]); } } for (j = 0; j < fine_size; j++) { entries = P_i[j+1] - P_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = P_i[j]; i < P_i[j+1]; i++) rowsum += P_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } hypre_printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, min_entries, max_entries); hypre_printf(" %5.3e %5.3e %5.3e %5.3e\n", min_weight, max_weight, min_rowsum, max_rowsum); } hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); hypre_printf(" Grid Complexity: %8.3f\n", grid_complxty); hypre_WriteSolverParams(amg_data); return(0); }
int hypre_StructCoarsen( hypre_StructGrid *fgrid, hypre_Index index, hypre_Index stride, int prune, hypre_StructGrid **cgrid_ptr ) { int ierr = 0; hypre_StructGrid *cgrid; MPI_Comm comm; int dim; hypre_BoxNeighbors *neighbors; hypre_BoxArray *hood_boxes; int num_hood; int *hood_procs; int *hood_ids; int first_local; int num_local; int num_periodic; int max_distance; hypre_Box *bounding_box; hypre_Index periodic; MPI_Request *send_requests; MPI_Status *send_status; int *send_buffer; int send_size; MPI_Request *recv_requests; MPI_Status *recv_status; int **recv_buffers; int *recv_sizes; int my_rank; int *send_procs; int *recv_procs; int num_sends; int num_recvs; hypre_BoxArray *new_hood_boxes; int new_num_hood; int *new_hood_procs; int *new_hood_ids; int new_first_local; int new_num_local; int new_num_periodic; hypre_Box *box; hypre_Box *local_box; hypre_Box *neighbor_box; hypre_Box *local_cbox; hypre_Box *neighbor_cbox; hypre_Index imin; hypre_Index imax; int alloc_size; double perimeter_count, cperimeter_count; /*double diff, distance, perimeter_count, cperimeter_count;*/ int *iarray; int *jrecv; int i, j, d, ilocal; int data_id, min_id, jj; /*----------------------------------------- * Copy needed info from fgrid *-----------------------------------------*/ comm = hypre_StructGridComm(fgrid); dim = hypre_StructGridDim(fgrid); neighbors = hypre_StructGridNeighbors(fgrid); hood_boxes = hypre_BoxArrayDuplicate(hypre_BoxNeighborsBoxes(neighbors)); num_hood = hypre_BoxArraySize(hood_boxes); iarray = hypre_BoxNeighborsProcs(neighbors); hood_procs = hypre_TAlloc(int, num_hood); for (i = 0; i < num_hood; i++) { hood_procs[i] = iarray[i]; } iarray = hypre_BoxNeighborsIDs(neighbors); hood_ids = hypre_TAlloc(int, num_hood); for (i = 0; i < num_hood; i++) { hood_ids[i] = iarray[i]; } first_local = hypre_BoxNeighborsFirstLocal(neighbors); num_local = hypre_BoxNeighborsNumLocal(neighbors); num_periodic = hypre_BoxNeighborsNumPeriodic(neighbors); max_distance = hypre_StructGridMaxDistance(fgrid); bounding_box = hypre_BoxDuplicate(hypre_StructGridBoundingBox(fgrid)); hypre_CopyIndex(hypre_StructGridPeriodic(fgrid), periodic); MPI_Comm_rank(comm, &my_rank); #if DEBUG sprintf(filename, "zcoarsen.%05d", my_rank); if ((file = fopen(filename, "a")) == NULL) { printf("Error: can't open output file %s\n", filename); exit(1); } fprintf(file, "\n\n============================\n\n"); fprintf(file, "\n\n%d\n\n", debug_count++); fprintf(file, "num_hood = %d\n", num_hood); for (i = 0; i < num_hood; i++) { box = hypre_BoxArrayBox(hood_boxes, i); fprintf(file, "(%d,%d,%d) X (%d,%d,%d) ; (%d,%d); %d\n", hypre_BoxIMinX(box),hypre_BoxIMinY(box),hypre_BoxIMinZ(box), hypre_BoxIMaxX(box),hypre_BoxIMaxY(box),hypre_BoxIMaxZ(box), hood_procs[i], hood_ids[i], hypre_BoxVolume(box)); } fprintf(file, "first_local = %d\n", first_local); fprintf(file, "num_local = %d\n", num_local); fprintf(file, "num_periodic = %d\n", num_periodic); #endif /*----------------------------------------- * Coarsen bounding box *-----------------------------------------*/ hypre_StructCoarsenBox(bounding_box, index, stride); /*----------------------------------------- * Coarsen neighborhood boxes & determine * send / recv procs * * NOTE: Currently, this always communicates * with all neighboring processes. *-----------------------------------------*/ local_cbox = hypre_BoxCreate(); neighbor_cbox = hypre_BoxCreate(); num_recvs = 0; num_sends = 0; recv_procs = NULL; send_procs = NULL; for (i = 0; i < num_hood; i++) { if (hood_procs[i] != my_rank) { for (j = 0; j < num_local; j++) { ilocal = first_local + j; local_box = hypre_BoxArrayBox(hood_boxes, ilocal); neighbor_box = hypre_BoxArrayBox(hood_boxes, i); /* coarsen boxes being considered */ hypre_CopyBox(local_box, local_cbox); hypre_StructCoarsenBox(local_cbox, index, stride); hypre_CopyBox(neighbor_box, neighbor_cbox); hypre_StructCoarsenBox(neighbor_cbox, index, stride); /*----------------------- * Receive info? *-----------------------*/ /* always communicate */ #if 0 perimeter_count = 0; cperimeter_count = 0; for (d = 0; d < 3; d++) { distance = max_distance; diff = hypre_BoxIMaxD(neighbor_box, d) - hypre_BoxIMaxD(local_box, d); if (diff > 0) { distance = hypre_min(distance, diff); } diff = hypre_BoxIMinD(local_box, d) - hypre_BoxIMinD(neighbor_box, d); if (diff > 0) { distance = hypre_min(distance, diff); } if (distance < max_distance) { perimeter_count++; } distance = max_distance; diff = hypre_BoxIMaxD(neighbor_cbox, d) - hypre_BoxIMaxD(local_cbox, d); if (diff > 0) { distance = hypre_min(distance, diff); } diff = hypre_BoxIMinD(local_cbox, d) - hypre_BoxIMinD(neighbor_cbox, d); if (diff > 0) { distance = hypre_min(distance, diff); } if (distance < max_distance) { cperimeter_count++; } } #else perimeter_count = 0; cperimeter_count = 1; #endif if (cperimeter_count > perimeter_count) { if (num_recvs == 0) { recv_procs = hypre_TAlloc(int, num_hood); recv_procs[num_recvs] = hood_procs[i]; num_recvs++; } else if (hood_procs[i] != recv_procs[num_recvs-1]) { recv_procs[num_recvs] = hood_procs[i]; num_recvs++; } } /*----------------------- * Send info? *-----------------------*/ /* always communicate */ #if 0 perimeter_count = 0; cperimeter_count = 0; for (d = 0; d < 3; d++) { distance = max_distance; diff = hypre_BoxIMaxD(local_box, d) - hypre_BoxIMaxD(neighbor_box, d); if (diff > 0) { distance = hypre_min(distance, diff); } diff = hypre_BoxIMinD(neighbor_box, d) - hypre_BoxIMinD(local_box, d); if (diff > 0) { distance = hypre_min(distance, diff); } if (distance < max_distance) { perimeter_count++; } distance = max_distance; diff = hypre_BoxIMaxD(local_cbox, d) - hypre_BoxIMaxD(neighbor_cbox, d); if (diff > 0) { distance = hypre_min(distance, diff); } diff = hypre_BoxIMinD(neighbor_cbox, d) - hypre_BoxIMinD(local_cbox, d); if (diff > 0) { distance = hypre_min(distance, diff); } if (distance < max_distance) { cperimeter_count++; } } #else perimeter_count = 0; cperimeter_count = 1; #endif if (cperimeter_count > perimeter_count) { if (num_sends == 0) { send_procs = hypre_TAlloc(int, num_hood); send_procs[num_sends] = hood_procs[i]; num_sends++; } else if (hood_procs[i] != send_procs[num_sends-1]) { send_procs[num_sends] = hood_procs[i]; num_sends++; } }